PostgreSQL Source Code  git master
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "port/pg_iovec.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static char * str_time (pg_time_t tnow)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
bool check_max_slot_wal_keep_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const char * show_archive_command (void)
 
const char * show_in_hot_standby (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (void)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
void CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

uint32 bootstrap_data_checksum_version
 
int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
char * wal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 111 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 605 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 582 of file xlog.c.

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 586 of file xlog.c.

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 150 of file xlog.c.

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 599 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 593 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

typedef struct XLogCtlData XLogCtlData

◆ XLogCtlInsert

typedef struct XLogCtlInsert XLogCtlInsert

◆ XLogwrtResult

typedef struct XLogwrtResult XLogwrtResult

◆ XLogwrtRqst

typedef struct XLogwrtRqst XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 561 of file xlog.c.

562 {
WalInsertClass
Definition: xlog.c:562
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:564
@ WALINSERT_NORMAL
Definition: xlog.c:563
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:565

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1944 of file xlog.c.

1945 {
1947  int nextidx;
1948  XLogRecPtr OldPageRqstPtr;
1949  XLogwrtRqst WriteRqst;
1950  XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
1951  XLogRecPtr NewPageBeginPtr;
1952  XLogPageHeader NewPage;
1953  int npages pg_attribute_unused() = 0;
1954 
1955  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
1956 
1957  /*
1958  * Now that we have the lock, check if someone initialized the page
1959  * already.
1960  */
1961  while (upto >= XLogCtl->InitializedUpTo || opportunistic)
1962  {
1964 
1965  /*
1966  * Get ending-offset of the buffer page we need to replace (this may
1967  * be zero if the buffer hasn't been used yet). Fall through if it's
1968  * already written out.
1969  */
1970  OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]);
1971  if (LogwrtResult.Write < OldPageRqstPtr)
1972  {
1973  /*
1974  * Nope, got work to do. If we just want to pre-initialize as much
1975  * as we can without flushing, give up now.
1976  */
1977  if (opportunistic)
1978  break;
1979 
1980  /* Before waiting, get info_lck and update LogwrtResult */
1982  if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
1983  XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
1986 
1987  /*
1988  * Now that we have an up-to-date LogwrtResult value, see if we
1989  * still need to write it or if someone else already did.
1990  */
1991  if (LogwrtResult.Write < OldPageRqstPtr)
1992  {
1993  /*
1994  * Must acquire write lock. Release WALBufMappingLock first,
1995  * to make sure that all insertions that we need to wait for
1996  * can finish (up to this same position). Otherwise we risk
1997  * deadlock.
1998  */
1999  LWLockRelease(WALBufMappingLock);
2000 
2001  WaitXLogInsertionsToFinish(OldPageRqstPtr);
2002 
2003  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2004 
2006  if (LogwrtResult.Write >= OldPageRqstPtr)
2007  {
2008  /* OK, someone wrote it already */
2009  LWLockRelease(WALWriteLock);
2010  }
2011  else
2012  {
2013  /* Have to write it ourselves */
2014  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2015  WriteRqst.Write = OldPageRqstPtr;
2016  WriteRqst.Flush = 0;
2017  XLogWrite(WriteRqst, tli, false);
2018  LWLockRelease(WALWriteLock);
2020  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2021  }
2022  /* Re-acquire WALBufMappingLock and retry */
2023  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2024  continue;
2025  }
2026  }
2027 
2028  /*
2029  * Now the next buffer slot is free and we can set it up to be the
2030  * next output page.
2031  */
2032  NewPageBeginPtr = XLogCtl->InitializedUpTo;
2033  NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2034 
2035  Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
2036 
2037  NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2038 
2039  /*
2040  * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2041  * before initializing. Otherwise, the old page may be partially
2042  * zeroed but look valid.
2043  */
2045  pg_write_barrier();
2046 
2047  /*
2048  * Be sure to re-zero the buffer so that bytes beyond what we've
2049  * written will look like zeroes and not valid XLOG records...
2050  */
2051  MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
2052 
2053  /*
2054  * Fill the new page's header
2055  */
2056  NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2057 
2058  /* NewPage->xlp_info = 0; */ /* done by memset */
2059  NewPage->xlp_tli = tli;
2060  NewPage->xlp_pageaddr = NewPageBeginPtr;
2061 
2062  /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2063 
2064  /*
2065  * If online backup is not in progress, mark the header to indicate
2066  * that WAL records beginning in this page have removable backup
2067  * blocks. This allows the WAL archiver to know whether it is safe to
2068  * compress archived WAL data by transforming full-block records into
2069  * the non-full-block format. It is sufficient to record this at the
2070  * page level because we force a page switch (in fact a segment
2071  * switch) when starting a backup, so the flag will be off before any
2072  * records can be written during the backup. At the end of a backup,
2073  * the last page will be marked as all unsafe when perhaps only part
2074  * is unsafe, but at worst the archiver would miss the opportunity to
2075  * compress a few records.
2076  */
2077  if (Insert->runningBackups == 0)
2078  NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2079 
2080  /*
2081  * If first page of an XLOG segment file, make it a long header.
2082  */
2083  if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2084  {
2085  XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2086 
2087  NewLongPage->xlp_sysid = ControlFile->system_identifier;
2088  NewLongPage->xlp_seg_size = wal_segment_size;
2089  NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2090  NewPage->xlp_info |= XLP_LONG_HEADER;
2091  }
2092 
2093  /*
2094  * Make sure the initialization of the page becomes visible to others
2095  * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2096  * holding a lock.
2097  */
2098  pg_write_barrier();
2099 
2100  pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
2101  XLogCtl->InitializedUpTo = NewPageEndPtr;
2102 
2103  npages++;
2104  }
2105  LWLockRelease(WALBufMappingLock);
2106 
2107 #ifdef WAL_DEBUG
2108  if (XLOG_DEBUG && npages > 0)
2109  {
2110  elog(DEBUG1, "initialized %d pages, up to %X/%X",
2111  npages, LSN_FORMAT_ARGS(NewPageEndPtr));
2112  }
2113 #endif
2114 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:480
#define pg_write_barrier()
Definition: atomics.h:152
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:462
#define pg_attribute_unused()
Definition: c.h:123
#define MemSet(start, val, len)
Definition: c.h:1007
size_t Size
Definition: c.h:592
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:224
static void Insert(File file)
Definition: fd.c:1313
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1172
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1785
@ LW_EXCLUSIVE
Definition: lwlock.h:116
PgStat_PendingWalStats PendingWalStats
Definition: pgstat_wal.c:24
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
uint64 system_identifier
Definition: pg_control.h:109
PgStat_Counter wal_buffers_full
Definition: pgstat.h:452
XLogwrtRqst LogwrtRqst
Definition: xlog.c:461
slock_t info_lck
Definition: xlog.c:555
XLogRecPtr InitializedUpTo
Definition: xlog.c:492
char * pages
Definition: xlog.c:499
XLogwrtResult LogwrtResult
Definition: xlog.c:480
pg_atomic_uint64 * xlblocks
Definition: xlog.c:500
XLogCtlInsert Insert
Definition: xlog.c:458
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr Write
Definition: xlog.c:333
XLogRecPtr Flush
Definition: xlog.c:328
XLogRecPtr Write
Definition: xlog.c:327
static XLogCtlData * XLogCtl
Definition: xlog.c:568
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1492
int wal_segment_size
Definition: xlog.c:143
static XLogwrtResult LogwrtResult
Definition: xlog.c:614
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:593
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2273
static ControlFileData * ControlFile
Definition: xlog.c:576
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:78
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), ControlFile, DEBUG1, elog, XLogwrtRqst::Flush, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, XLogCtlData::LogwrtResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, PendingWalStats, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), PgStat_PendingWalStats::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLP_LONG_HEADER, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2157 of file xlog.c.

2158 {
2161 }
double CheckPointCompletionTarget
Definition: checkpointer.c:138
#define newval
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2121

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2150 of file xlog.c.

2151 {
2154 }
int max_wal_size_mb
Definition: xlog.c:114

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char *  newval,
void *  extra 
)

Definition at line 4647 of file xlog.c.

4648 {
4649  /*
4650  * If some checks were deferred, it's possible that the checks will fail
4651  * later during InitializeWalConsistencyChecking(). But in that case, the
4652  * postmaster will exit anyway, so it's safe to proceed with the
4653  * assignment.
4654  *
4655  * Any built-in resource managers specified are assigned immediately,
4656  * which affects WAL created before shared_preload_libraries are
4657  * processed. Any custom resource managers specified won't be assigned
4658  * until after shared_preload_libraries are processed, but that's OK
4659  * because WAL for a custom resource manager can't be written before the
4660  * module is loaded anyway.
4661  */
4662  wal_consistency_checking = extra;
4663 }
bool * wal_consistency_checking
Definition: xlog.c:126

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void *  extra 
)

Definition at line 8474 of file xlog.c.

8475 {
8476  if (wal_sync_method != new_wal_sync_method)
8477  {
8478  /*
8479  * To ensure that no blocks escape unsynced, force an fsync on the
8480  * currently open log segment (if any). Also, if the open flag is
8481  * changing, close the log file so it will be reopened (with new flag
8482  * bit) at next use.
8483  */
8484  if (openLogFile >= 0)
8485  {
8486  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
8487  if (pg_fsync(openLogFile) != 0)
8488  {
8489  char xlogfname[MAXFNAMELEN];
8490  int save_errno;
8491 
8492  save_errno = errno;
8493  XLogFileName(xlogfname, openLogTLI, openLogSegNo,
8495  errno = save_errno;
8496  ereport(PANIC,
8498  errmsg("could not fsync file \"%s\": %m", xlogfname)));
8499  }
8500 
8502  if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
8503  XLogFileClose();
8504  }
8505  }
8506 }
int errcode_for_file_access(void)
Definition: elog.c:882
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
int pg_fsync(int fd)
Definition: fd.c:386
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:88
static void pgstat_report_wait_end(void)
Definition: wait_event.h:104
static int openLogFile
Definition: xlog.c:624
static int get_sync_bit(int method)
Definition: xlog.c:8426
int wal_sync_method
Definition: xlog.c:130
static TimeLineID openLogTLI
Definition: xlog.c:626
static void XLogFileClose(void)
Definition: xlog.c:3569
static XLogSegNo openLogSegNo
Definition: xlog.c:625
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( void  )

Definition at line 4920 of file xlog.c.

4921 {
4922  CheckPoint checkPoint;
4923  char *buffer;
4924  XLogPageHeader page;
4925  XLogLongPageHeader longpage;
4926  XLogRecord *record;
4927  char *recptr;
4928  uint64 sysidentifier;
4929  struct timeval tv;
4930  pg_crc32c crc;
4931 
4932  /* allow ordinary WAL segment creation, like StartupXLOG() would */
4934 
4935  /*
4936  * Select a hopefully-unique system identifier code for this installation.
4937  * We use the result of gettimeofday(), including the fractional seconds
4938  * field, as being about as unique as we can easily get. (Think not to
4939  * use random(), since it hasn't been seeded and there's no portable way
4940  * to seed it other than the system clock value...) The upper half of the
4941  * uint64 value is just the tv_sec part, while the lower half contains the
4942  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
4943  * PID for a little extra uniqueness. A person knowing this encoding can
4944  * determine the initialization time of the installation, which could
4945  * perhaps be useful sometimes.
4946  */
4947  gettimeofday(&tv, NULL);
4948  sysidentifier = ((uint64) tv.tv_sec) << 32;
4949  sysidentifier |= ((uint64) tv.tv_usec) << 12;
4950  sysidentifier |= getpid() & 0xFFF;
4951 
4952  /* page buffer must be aligned suitably for O_DIRECT */
4953  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
4954  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
4955  memset(page, 0, XLOG_BLCKSZ);
4956 
4957  /*
4958  * Set up information for the initial checkpoint record
4959  *
4960  * The initial checkpoint record is written to the beginning of the WAL
4961  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
4962  * used, so that we can use 0/0 to mean "before any valid WAL segment".
4963  */
4964  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
4965  checkPoint.ThisTimeLineID = BootstrapTimeLineID;
4966  checkPoint.PrevTimeLineID = BootstrapTimeLineID;
4967  checkPoint.fullPageWrites = fullPageWrites;
4968  checkPoint.nextXid =
4970  checkPoint.nextOid = FirstGenbkiObjectId;
4971  checkPoint.nextMulti = FirstMultiXactId;
4972  checkPoint.nextMultiOffset = 0;
4973  checkPoint.oldestXid = FirstNormalTransactionId;
4974  checkPoint.oldestXidDB = Template1DbOid;
4975  checkPoint.oldestMulti = FirstMultiXactId;
4976  checkPoint.oldestMultiDB = Template1DbOid;
4979  checkPoint.time = (pg_time_t) time(NULL);
4981 
4982  TransamVariables->nextXid = checkPoint.nextXid;
4983  TransamVariables->nextOid = checkPoint.nextOid;
4985  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
4986  AdvanceOldestClogXid(checkPoint.oldestXid);
4987  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
4988  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
4990 
4991  /* Set up the XLOG page header */
4992  page->xlp_magic = XLOG_PAGE_MAGIC;
4993  page->xlp_info = XLP_LONG_HEADER;
4994  page->xlp_tli = BootstrapTimeLineID;
4996  longpage = (XLogLongPageHeader) page;
4997  longpage->xlp_sysid = sysidentifier;
4998  longpage->xlp_seg_size = wal_segment_size;
4999  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5000 
5001  /* Insert the initial checkpoint record */
5002  recptr = ((char *) page + SizeOfXLogLongPHD);
5003  record = (XLogRecord *) recptr;
5004  record->xl_prev = 0;
5005  record->xl_xid = InvalidTransactionId;
5006  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5008  record->xl_rmid = RM_XLOG_ID;
5009  recptr += SizeOfXLogRecord;
5010  /* fill the XLogRecordDataHeaderShort struct */
5011  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5012  *(recptr++) = sizeof(checkPoint);
5013  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5014  recptr += sizeof(checkPoint);
5015  Assert(recptr - (char *) record == record->xl_tot_len);
5016 
5017  INIT_CRC32C(crc);
5018  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5019  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5020  FIN_CRC32C(crc);
5021  record->xl_crc = crc;
5022 
5023  /* Create first XLOG segment file */
5026 
5027  /*
5028  * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5029  * close the file again in a moment.
5030  */
5031 
5032  /* Write the first page with the initial record */
5033  errno = 0;
5034  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5035  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5036  {
5037  /* if write didn't set errno, assume problem is no disk space */
5038  if (errno == 0)
5039  errno = ENOSPC;
5040  ereport(PANIC,
5042  errmsg("could not write bootstrap write-ahead log file: %m")));
5043  }
5045 
5046  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5047  if (pg_fsync(openLogFile) != 0)
5048  ereport(PANIC,
5050  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5052 
5053  if (close(openLogFile) != 0)
5054  ereport(PANIC,
5056  errmsg("could not close bootstrap write-ahead log file: %m")));
5057 
5058  openLogFile = -1;
5059 
5060  /* Now create pg_control */
5061  InitControlFile(sysidentifier);
5062  ControlFile->time = checkPoint.time;
5063  ControlFile->checkPoint = checkPoint.redo;
5064  ControlFile->checkPointCopy = checkPoint;
5065 
5066  /* some additional ControlFile fields are set in WriteControlFile() */
5067  WriteControlFile();
5068 
5069  /* Bootstrap the commit log, too */
5070  BootStrapCLOG();
5074 
5075  pfree(buffer);
5076 
5077  /*
5078  * Force control file to be read - in contrast to normal processing we'd
5079  * otherwise never run the checks and GUC related initializations therein.
5080  */
5081  ReadControlFile();
5082 }
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:791
void BootStrapCLOG(void)
Definition: clog.c:833
void BootStrapCommitTs(void)
Definition: commit_ts.c:596
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:909
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1508
void * palloc(Size size)
Definition: mcxt.c:1304
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2253
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2287
void BootStrapMultiXact(void)
Definition: multixact.c:1959
#define FirstMultiXactId
Definition: multixact.h:25
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:67
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:50
MultiXactId oldestMulti
Definition: pg_control.h:49
MultiXactOffset nextMultiOffset
Definition: pg_control.h:46
TransactionId newestCommitTsXid
Definition: pg_control.h:54
TransactionId oldestXid
Definition: pg_control.h:47
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:44
TransactionId oldestActiveXid
Definition: pg_control.h:63
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:45
FullTransactionId nextXid
Definition: pg_control.h:43
TransactionId oldestCommitTsXid
Definition: pg_control.h:52
pg_time_t time
Definition: pg_control.h:51
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:48
CheckPoint checkPointCopy
Definition: pg_control.h:134
pg_time_t time
Definition: pg_control.h:131
XLogRecPtr checkPoint
Definition: pg_control.h:132
FullTransactionId nextXid
Definition: transam.h:220
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:270
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3310
bool fullPageWrites
Definition: xlog.c:122
static void InitControlFile(uint64 sysidentifier)
Definition: xlog.c:4131
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9317
static void WriteControlFile(void)
Definition: xlog.c:4166
#define BootstrapTimeLineID
Definition: xlog.c:111
static void ReadControlFile(void)
Definition: xlog.c:4248
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert(), BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2121 of file xlog.c.

2122 {
2123  double target;
2124 
2125  /*-------
2126  * Calculate the distance at which to trigger a checkpoint, to avoid
2127  * exceeding max_wal_size_mb. This is based on two assumptions:
2128  *
2129  * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2130  * WAL for two checkpoint cycles to allow us to recover from the
2131  * secondary checkpoint if the first checkpoint failed, though we
2132  * only did this on the primary anyway, not on standby. Keeping just
2133  * one checkpoint simplifies processing and reduces disk space in
2134  * many smaller databases.)
2135  * b) during checkpoint, we consume checkpoint_completion_target *
2136  * number of segments consumed between checkpoints.
2137  *-------
2138  */
2139  target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2141 
2142  /* round down */
2143  CheckPointSegments = (int) target;
2144 
2145  if (CheckPointSegments < 1)
2146  CheckPointSegments = 1;
2147 }
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:605
int CheckPointSegments
Definition: xlog.c:156

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_max_slot_wal_keep_size()

bool check_max_slot_wal_keep_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2182 of file xlog.c.

2183 {
2184  if (IsBinaryUpgrade && *newval != -1)
2185  {
2186  GUC_check_errdetail("\"%s\" must be set to -1 during binary upgrade mode.",
2187  "max_slot_wal_keep_size");
2188  return false;
2189  }
2190 
2191  return true;
2192 }
bool IsBinaryUpgrade
Definition: globals.c:118
#define GUC_check_errdetail
Definition: guc.h:447

References GUC_check_errdetail, IsBinaryUpgrade, and newval.

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 4527 of file xlog.c.

4528 {
4529  /*
4530  * -1 indicates a request for auto-tune.
4531  */
4532  if (*newval == -1)
4533  {
4534  /*
4535  * If we haven't yet changed the boot_val default of -1, just let it
4536  * be. We'll fix it when XLOGShmemSize is called.
4537  */
4538  if (XLOGbuffers == -1)
4539  return true;
4540 
4541  /* Otherwise, substitute the auto-tune value */
4543  }
4544 
4545  /*
4546  * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4547  * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4548  * the case, we just silently treat such values as a request for the
4549  * minimum. (We could throw an error instead, but that doesn't seem very
4550  * helpful.)
4551  */
4552  if (*newval < 4)
4553  *newval = 4;
4554 
4555  return true;
4556 }
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4511
int XLOGbuffers
Definition: xlog.c:117

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4562 of file xlog.c.

4563 {
4564  char *rawstring;
4565  List *elemlist;
4566  ListCell *l;
4567  bool newwalconsistency[RM_MAX_ID + 1];
4568 
4569  /* Initialize the array */
4570  MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4571 
4572  /* Need a modifiable copy of string */
4573  rawstring = pstrdup(*newval);
4574 
4575  /* Parse string into list of identifiers */
4576  if (!SplitIdentifierString(rawstring, ',', &elemlist))
4577  {
4578  /* syntax error in list */
4579  GUC_check_errdetail("List syntax is invalid.");
4580  pfree(rawstring);
4581  list_free(elemlist);
4582  return false;
4583  }
4584 
4585  foreach(l, elemlist)
4586  {
4587  char *tok = (char *) lfirst(l);
4588  int rmid;
4589 
4590  /* Check for 'all'. */
4591  if (pg_strcasecmp(tok, "all") == 0)
4592  {
4593  for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4594  if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4595  newwalconsistency[rmid] = true;
4596  }
4597  else
4598  {
4599  /* Check if the token matches any known resource manager. */
4600  bool found = false;
4601 
4602  for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4603  {
4604  if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4605  pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4606  {
4607  newwalconsistency[rmid] = true;
4608  found = true;
4609  break;
4610  }
4611  }
4612  if (!found)
4613  {
4614  /*
4615  * During startup, it might be a not-yet-loaded custom
4616  * resource manager. Defer checking until
4617  * InitializeWalConsistencyChecking().
4618  */
4620  {
4622  }
4623  else
4624  {
4625  GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4626  pfree(rawstring);
4627  list_free(elemlist);
4628  return false;
4629  }
4630  }
4631  }
4632  }
4633 
4634  pfree(rawstring);
4635  list_free(elemlist);
4636 
4637  /* assign new value */
4638  *extra = guc_malloc(ERROR, (RM_MAX_ID + 1) * sizeof(bool));
4639  memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4640  return true;
4641 }
#define ERROR
Definition: elog.h:39
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:640
void list_free(List *list)
Definition: list.c:1546
char * pstrdup(const char *in)
Definition: mcxt.c:1683
bool process_shared_preload_libraries_done
Definition: miscinit.c:1779
#define lfirst(lc)
Definition: pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define RM_MAX_ID
Definition: rmgr.h:33
Definition: pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:3456
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:166
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, ERROR, GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2164 of file xlog.c.

2165 {
2166  if (!IsValidWalSegSize(*newval))
2167  {
2168  GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2169  return false;
2170  }
2171 
2172  return true;
2173 }
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7344 of file xlog.c.

7345 {
7351 
7352  /* Write out all dirty data in SLRUs and the main buffer pool */
7353  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
7355  CheckPointCLOG();
7360  CheckPointBuffers(flags);
7361 
7362  /* Perform all queued up fsyncs */
7363  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
7367  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
7368 
7369  /* We deliberately delay 2PC checkpointing as long as possible */
7370  CheckPointTwoPhase(checkPointRedo);
7371 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
void CheckPointBuffers(int flags)
Definition: bufmgr.c:3363
void CheckPointCLOG(void)
Definition: clog.c:937
void CheckPointCommitTs(void)
Definition: commit_ts.c:820
void CheckPointMultiXact(void)
Definition: multixact.c:2229
void CheckPointReplicationOrigin(void)
Definition: origin.c:573
void CheckPointPredicate(void)
Definition: predicate.c:1031
void CheckPointRelationMap(void)
Definition: relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1155
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:1815
void CheckPointSnapBuild(void)
Definition: snapbuild.c:2054
TimestampTz ckpt_write_t
Definition: xlog.h:160
TimestampTz ckpt_sync_end_t
Definition: xlog.h:162
TimestampTz ckpt_sync_t
Definition: xlog.h:161
void CheckPointSUBTRANS(void)
Definition: subtrans.c:355
void ProcessSyncRequests(void)
Definition: sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1816
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:137

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5264 of file xlog.c.

5265 {
5266  /*
5267  * For archive recovery, the WAL must be generated with at least 'replica'
5268  * wal_level.
5269  */
5271  {
5272  ereport(FATAL,
5273  (errmsg("WAL was generated with wal_level=minimal, cannot continue recovering"),
5274  errdetail("This happens if you temporarily set wal_level=minimal on the server."),
5275  errhint("Use a backup taken after setting wal_level to higher than minimal.")));
5276  }
5277 
5278  /*
5279  * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5280  * must have at least as many backend slots as the primary.
5281  */
5283  {
5284  /* We ignore autovacuum_max_workers when we make this test. */
5285  RecoveryRequiresIntParameter("max_connections",
5288  RecoveryRequiresIntParameter("max_worker_processes",
5291  RecoveryRequiresIntParameter("max_wal_senders",
5294  RecoveryRequiresIntParameter("max_prepared_transactions",
5297  RecoveryRequiresIntParameter("max_locks_per_transaction",
5300  }
5301 }
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errhint(const char *fmt,...)
Definition: elog.c:1319
#define FATAL
Definition: elog.h:41
int MaxConnections
Definition: globals.c:140
int max_worker_processes
Definition: globals.c:141
int max_locks_per_xact
Definition: lock.c:53
int max_worker_processes
Definition: pg_control.h:180
int max_locks_per_xact
Definition: pg_control.h:183
int max_prepared_xacts
Definition: pg_control.h:182
int max_prepared_xacts
Definition: twophase.c:115
int max_wal_senders
Definition: walsender.c:121
bool EnableHotStandby
Definition: xlog.c:121
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:72
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:137
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errdetail(), errhint(), errmsg(), FATAL, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3657 of file xlog.c.

3658 {
3659  int save_errno = errno;
3660  XLogSegNo lastRemovedSegNo;
3661 
3663  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3665 
3666  if (segno <= lastRemovedSegNo)
3667  {
3668  char filename[MAXFNAMELEN];
3669 
3670  XLogFileName(filename, tli, segno, wal_segment_size);
3671  errno = save_errno;
3672  ereport(ERROR,
3674  errmsg("requested WAL segment %s has already been removed",
3675  filename)));
3676  }
3677  errno = save_errno;
3678 }
static char * filename
Definition: pg_dumpall.c:121
XLogSegNo lastRemovedSegNo
Definition: xlog.c:467
uint64 XLogSegNo
Definition: xlogdefs.h:48

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5175 of file xlog.c.

5177 {
5178  /*
5179  * Execute the recovery_end_command, if any.
5180  */
5181  if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
5183  "recovery_end_command",
5184  true,
5185  WAIT_EVENT_RECOVERY_END_COMMAND);
5186 
5187  /*
5188  * We switched to a new timeline. Clean up segments on the old timeline.
5189  *
5190  * If there are any higher-numbered segments on the old timeline, remove
5191  * them. They might contain valid WAL, but they might also be
5192  * pre-allocated files containing garbage. In any case, they are not part
5193  * of the new timeline's history so we don't need them.
5194  */
5195  RemoveNonParentXlogFiles(EndOfLog, newTLI);
5196 
5197  /*
5198  * If the switch happened in the middle of a segment, what to do with the
5199  * last, partial segment on the old timeline? If we don't archive it, and
5200  * the server that created the WAL never archives it either (e.g. because
5201  * it was hit by a meteor), it will never make it to the archive. That's
5202  * OK from our point of view, because the new segment that we created with
5203  * the new TLI contains all the WAL from the old timeline up to the switch
5204  * point. But if you later try to do PITR to the "missing" WAL on the old
5205  * timeline, recovery won't find it in the archive. It's physically
5206  * present in the new file with new TLI, but recovery won't look there
5207  * when it's recovering to the older timeline. On the other hand, if we
5208  * archive the partial segment, and the original server on that timeline
5209  * is still running and archives the completed version of the same segment
5210  * later, it will fail. (We used to do that in 9.4 and below, and it
5211  * caused such problems).
5212  *
5213  * As a compromise, we rename the last segment with the .partial suffix,
5214  * and archive it. Archive recovery will never try to read .partial
5215  * segments, so they will normally go unused. But in the odd PITR case,
5216  * the administrator can copy them manually to the pg_wal directory
5217  * (removing the suffix). They can be useful in debugging, too.
5218  *
5219  * If a .done or .ready file already exists for the old timeline, however,
5220  * we had already determined that the segment is complete, so we can let
5221  * it be archived normally. (In particular, if it was restored from the
5222  * archive to begin with, it's expected to have a .done file).
5223  */
5224  if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
5226  {
5227  char origfname[MAXFNAMELEN];
5228  XLogSegNo endLogSegNo;
5229 
5230  XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
5231  XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
5232 
5233  if (!XLogArchiveIsReadyOrDone(origfname))
5234  {
5235  char origpath[MAXPGPATH];
5236  char partialfname[MAXFNAMELEN];
5237  char partialpath[MAXPGPATH];
5238 
5239  XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
5240  snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5241  snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5242 
5243  /*
5244  * Make sure there's no .done or .ready file for the .partial
5245  * file.
5246  */
5247  XLogArchiveCleanup(partialfname);
5248 
5249  durable_rename(origpath, partialpath, ERROR);
5250  XLogArchiveNotify(partialfname);
5251  }
5252  }
5253 }
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
#define MAXPGPATH
#define snprintf
Definition: port.h:238
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition: xlog.c:3870
#define XLogArchivingActive()
Definition: xlog.h:97
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
Definition: xlogarchive.c:664
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
void XLogArchiveNotify(const char *xlog)
Definition: xlogarchive.c:444
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:712
char * recoveryEndCommand
Definition: xlogrecovery.c:83

References durable_rename(), ERROR, ExecuteRecoveryCommand(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4088 of file xlog.c.

4089 {
4090  DIR *xldir;
4091  struct dirent *xlde;
4092  char path[MAXPGPATH + sizeof(XLOGDIR)];
4093 
4094  xldir = AllocateDir(XLOGDIR);
4095 
4096  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4097  {
4098  if (IsBackupHistoryFileName(xlde->d_name))
4099  {
4100  if (XLogArchiveCheckDone(xlde->d_name))
4101  {
4102  elog(DEBUG2, "removing WAL backup history file \"%s\"",
4103  xlde->d_name);
4104  snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4105  unlink(path);
4106  XLogArchiveCleanup(xlde->d_name);
4107  }
4108  }
4109  }
4110 
4111  FreeDir(xldir);
4112 }
#define DEBUG2
Definition: elog.h:29
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int FreeDir(DIR *dir)
Definition: fd.c:2961
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:565

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1215 of file xlog.c.

1217 {
1218  char *currpos;
1219  int freespace;
1220  int written;
1221  XLogRecPtr CurrPos;
1222  XLogPageHeader pagehdr;
1223 
1224  /*
1225  * Get a pointer to the right place in the right WAL buffer to start
1226  * inserting to.
1227  */
1228  CurrPos = StartPos;
1229  currpos = GetXLogBuffer(CurrPos, tli);
1230  freespace = INSERT_FREESPACE(CurrPos);
1231 
1232  /*
1233  * there should be enough space for at least the first field (xl_tot_len)
1234  * on this page.
1235  */
1236  Assert(freespace >= sizeof(uint32));
1237 
1238  /* Copy record data */
1239  written = 0;
1240  while (rdata != NULL)
1241  {
1242  char *rdata_data = rdata->data;
1243  int rdata_len = rdata->len;
1244 
1245  while (rdata_len > freespace)
1246  {
1247  /*
1248  * Write what fits on this page, and continue on the next page.
1249  */
1250  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1251  memcpy(currpos, rdata_data, freespace);
1252  rdata_data += freespace;
1253  rdata_len -= freespace;
1254  written += freespace;
1255  CurrPos += freespace;
1256 
1257  /*
1258  * Get pointer to beginning of next page, and set the xlp_rem_len
1259  * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1260  *
1261  * It's safe to set the contrecord flag and xlp_rem_len without a
1262  * lock on the page. All the other flags were already set when the
1263  * page was initialized, in AdvanceXLInsertBuffer, and we're the
1264  * only backend that needs to set the contrecord flag.
1265  */
1266  currpos = GetXLogBuffer(CurrPos, tli);
1267  pagehdr = (XLogPageHeader) currpos;
1268  pagehdr->xlp_rem_len = write_len - written;
1269  pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1270 
1271  /* skip over the page header */
1272  if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1273  {
1274  CurrPos += SizeOfXLogLongPHD;
1275  currpos += SizeOfXLogLongPHD;
1276  }
1277  else
1278  {
1279  CurrPos += SizeOfXLogShortPHD;
1280  currpos += SizeOfXLogShortPHD;
1281  }
1282  freespace = INSERT_FREESPACE(CurrPos);
1283  }
1284 
1285  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1286  memcpy(currpos, rdata_data, rdata_len);
1287  currpos += rdata_len;
1288  CurrPos += rdata_len;
1289  freespace -= rdata_len;
1290  written += rdata_len;
1291 
1292  rdata = rdata->next;
1293  }
1294  Assert(written == write_len);
1295 
1296  /*
1297  * If this was an xlog-switch, it's not enough to write the switch record,
1298  * we also have to consume all the remaining space in the WAL segment. We
1299  * have already reserved that space, but we need to actually fill it.
1300  */
1301  if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1302  {
1303  /* An xlog-switch record doesn't contain any data besides the header */
1304  Assert(write_len == SizeOfXLogRecord);
1305 
1306  /* Assert that we did reserve the right amount of space */
1307  Assert(XLogSegmentOffset(EndPos, wal_segment_size) == 0);
1308 
1309  /* Use up all the remaining space on the current page */
1310  CurrPos += freespace;
1311 
1312  /*
1313  * Cause all remaining pages in the segment to be flushed, leaving the
1314  * XLog position where it should be, at the start of the next segment.
1315  * We do this one page at a time, to make sure we don't deadlock
1316  * against ourselves if wal_buffers < wal_segment_size.
1317  */
1318  while (CurrPos < EndPos)
1319  {
1320  /*
1321  * The minimal action to flush the page would be to call
1322  * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1323  * AdvanceXLInsertBuffer(...). The page would be left initialized
1324  * mostly to zeros, except for the page header (always the short
1325  * variant, as this is never a segment's first page).
1326  *
1327  * The large vistas of zeros are good for compressibility, but the
1328  * headers interrupting them every XLOG_BLCKSZ (with values that
1329  * differ from page to page) are not. The effect varies with
1330  * compression tool, but bzip2 for instance compresses about an
1331  * order of magnitude worse if those headers are left in place.
1332  *
1333  * Rather than complicating AdvanceXLInsertBuffer itself (which is
1334  * called in heavily-loaded circumstances as well as this lightly-
1335  * loaded one) with variant behavior, we just use GetXLogBuffer
1336  * (which itself calls the two methods we need) to get the pointer
1337  * and zero most of the page. Then we just zero the page header.
1338  */
1339  currpos = GetXLogBuffer(CurrPos, tli);
1340  MemSet(currpos, 0, SizeOfXLogShortPHD);
1341 
1342  CurrPos += XLOG_BLCKSZ;
1343  }
1344  }
1345  else
1346  {
1347  /* Align the end position, so that the next record starts aligned */
1348  CurrPos = MAXALIGN64(CurrPos);
1349  }
1350 
1351  if (CurrPos != EndPos)
1352  elog(PANIC, "space reserved for WAL record does not match what was written");
1353 }
unsigned int uint32
Definition: c.h:493
#define MAXALIGN64(LEN)
Definition: c.h:823
struct XLogRecData * next
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:582
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition: xlog.c:1602
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52

References Assert(), XLogRecData::data, elog, GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

void CreateCheckPoint ( int  flags)

Definition at line 6734 of file xlog.c.

6735 {
6736  bool shutdown;
6737  CheckPoint checkPoint;
6738  XLogRecPtr recptr;
6739  XLogSegNo _logSegNo;
6741  uint32 freespace;
6742  XLogRecPtr PriorRedoPtr;
6743  XLogRecPtr last_important_lsn;
6744  VirtualTransactionId *vxids;
6745  int nvxids;
6746  int oldXLogAllowed = 0;
6747 
6748  /*
6749  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6750  * issued at a different time.
6751  */
6753  shutdown = true;
6754  else
6755  shutdown = false;
6756 
6757  /* sanity check */
6758  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6759  elog(ERROR, "can't create a checkpoint during recovery");
6760 
6761  /*
6762  * Prepare to accumulate statistics.
6763  *
6764  * Note: because it is possible for log_checkpoints to change while a
6765  * checkpoint proceeds, we always accumulate stats, even if
6766  * log_checkpoints is currently off.
6767  */
6768  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
6770 
6771  /*
6772  * Let smgr prepare for checkpoint; this has to happen outside the
6773  * critical section and before we determine the REDO pointer. Note that
6774  * smgr must not do anything that'd have to be undone if we decide no
6775  * checkpoint is needed.
6776  */
6778 
6779  /*
6780  * Use a critical section to force system panic if we have trouble.
6781  */
6783 
6784  if (shutdown)
6785  {
6786  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6789  LWLockRelease(ControlFileLock);
6790  }
6791 
6792  /* Begin filling in the checkpoint WAL record */
6793  MemSet(&checkPoint, 0, sizeof(checkPoint));
6794  checkPoint.time = (pg_time_t) time(NULL);
6795 
6796  /*
6797  * For Hot Standby, derive the oldestActiveXid before we fix the redo
6798  * pointer. This allows us to begin accumulating changes to assemble our
6799  * starting snapshot of locks and transactions.
6800  */
6801  if (!shutdown && XLogStandbyInfoActive())
6803  else
6805 
6806  /*
6807  * Get location of last important record before acquiring insert locks (as
6808  * GetLastImportantRecPtr() also locks WAL locks).
6809  */
6810  last_important_lsn = GetLastImportantRecPtr();
6811 
6812  /*
6813  * If this isn't a shutdown or forced checkpoint, and if there has been no
6814  * WAL activity requiring a checkpoint, skip it. The idea here is to
6815  * avoid inserting duplicate checkpoints when the system is idle.
6816  */
6818  CHECKPOINT_FORCE)) == 0)
6819  {
6820  if (last_important_lsn == ControlFile->checkPoint)
6821  {
6822  END_CRIT_SECTION();
6823  ereport(DEBUG1,
6824  (errmsg_internal("checkpoint skipped because system is idle")));
6825  return;
6826  }
6827  }
6828 
6829  /*
6830  * An end-of-recovery checkpoint is created before anyone is allowed to
6831  * write WAL. To allow us to write the checkpoint record, temporarily
6832  * enable XLogInsertAllowed.
6833  */
6834  if (flags & CHECKPOINT_END_OF_RECOVERY)
6835  oldXLogAllowed = LocalSetXLogInsertAllowed();
6836 
6837  checkPoint.ThisTimeLineID = XLogCtl->InsertTimeLineID;
6838  if (flags & CHECKPOINT_END_OF_RECOVERY)
6839  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
6840  else
6841  checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
6842 
6843  /*
6844  * We must block concurrent insertions while examining insert state.
6845  */
6847 
6848  checkPoint.fullPageWrites = Insert->fullPageWrites;
6849 
6850  if (shutdown)
6851  {
6852  XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
6853 
6854  /*
6855  * Compute new REDO record ptr = location of next XLOG record.
6856  *
6857  * Since this is a shutdown checkpoint, there can't be any concurrent
6858  * WAL insertion.
6859  */
6860  freespace = INSERT_FREESPACE(curInsert);
6861  if (freespace == 0)
6862  {
6863  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
6864  curInsert += SizeOfXLogLongPHD;
6865  else
6866  curInsert += SizeOfXLogShortPHD;
6867  }
6868  checkPoint.redo = curInsert;
6869 
6870  /*
6871  * Here we update the shared RedoRecPtr for future XLogInsert calls;
6872  * this must be done while holding all the insertion locks.
6873  *
6874  * Note: if we fail to complete the checkpoint, RedoRecPtr will be
6875  * left pointing past where it really needs to point. This is okay;
6876  * the only consequence is that XLogInsert might back up whole buffers
6877  * that it didn't really need to. We can't postpone advancing
6878  * RedoRecPtr because XLogInserts that happen while we are dumping
6879  * buffers must assume that their buffer changes are not included in
6880  * the checkpoint.
6881  */
6882  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
6883  }
6884 
6885  /*
6886  * Now we can release the WAL insertion locks, allowing other xacts to
6887  * proceed while we are flushing disk buffers.
6888  */
6890 
6891  /*
6892  * If this is an online checkpoint, we have not yet determined the redo
6893  * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
6894  * record; the LSN at which it starts becomes the new redo pointer. We
6895  * don't do this for a shutdown checkpoint, because in that case no WAL
6896  * can be written between the redo point and the insertion of the
6897  * checkpoint record itself, so the checkpoint record itself serves to
6898  * mark the redo point.
6899  */
6900  if (!shutdown)
6901  {
6902  int dummy = 0;
6903 
6904  /* Record must have payload to avoid assertion failure. */
6905  XLogBeginInsert();
6906  XLogRegisterData((char *) &dummy, sizeof(dummy));
6907  (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
6908 
6909  /*
6910  * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
6911  * shared memory and RedoRecPtr in backend-local memory, but we need
6912  * to copy that into the record that will be inserted when the
6913  * checkpoint is complete.
6914  */
6915  checkPoint.redo = RedoRecPtr;
6916  }
6917 
6918  /* Update the info_lck-protected copy of RedoRecPtr as well */
6920  XLogCtl->RedoRecPtr = checkPoint.redo;
6922 
6923  /*
6924  * If enabled, log checkpoint start. We postpone this until now so as not
6925  * to log anything if we decided to skip the checkpoint.
6926  */
6927  if (log_checkpoints)
6928  LogCheckpointStart(flags, false);
6929 
6930  /* Update the process title */
6931  update_checkpoint_display(flags, false, false);
6932 
6933  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
6934 
6935  /*
6936  * Get the other info we need for the checkpoint record.
6937  *
6938  * We don't need to save oldestClogXid in the checkpoint, it only matters
6939  * for the short period in which clog is being truncated, and if we crash
6940  * during that we'll redo the clog truncation and fix up oldestClogXid
6941  * there.
6942  */
6943  LWLockAcquire(XidGenLock, LW_SHARED);
6944  checkPoint.nextXid = TransamVariables->nextXid;
6945  checkPoint.oldestXid = TransamVariables->oldestXid;
6947  LWLockRelease(XidGenLock);
6948 
6949  LWLockAcquire(CommitTsLock, LW_SHARED);
6952  LWLockRelease(CommitTsLock);
6953 
6954  LWLockAcquire(OidGenLock, LW_SHARED);
6955  checkPoint.nextOid = TransamVariables->nextOid;
6956  if (!shutdown)
6957  checkPoint.nextOid += TransamVariables->oidCount;
6958  LWLockRelease(OidGenLock);
6959 
6960  MultiXactGetCheckptMulti(shutdown,
6961  &checkPoint.nextMulti,
6962  &checkPoint.nextMultiOffset,
6963  &checkPoint.oldestMulti,
6964  &checkPoint.oldestMultiDB);
6965 
6966  /*
6967  * Having constructed the checkpoint record, ensure all shmem disk buffers
6968  * and commit-log buffers are flushed to disk.
6969  *
6970  * This I/O could fail for various reasons. If so, we will fail to
6971  * complete the checkpoint, but there is no reason to force a system
6972  * panic. Accordingly, exit critical section while doing it.
6973  */
6974  END_CRIT_SECTION();
6975 
6976  /*
6977  * In some cases there are groups of actions that must all occur on one
6978  * side or the other of a checkpoint record. Before flushing the
6979  * checkpoint record we must explicitly wait for any backend currently
6980  * performing those groups of actions.
6981  *
6982  * One example is end of transaction, so we must wait for any transactions
6983  * that are currently in commit critical sections. If an xact inserted
6984  * its commit record into XLOG just before the REDO point, then a crash
6985  * restart from the REDO point would not replay that record, which means
6986  * that our flushing had better include the xact's update of pg_xact. So
6987  * we wait till he's out of his commit critical section before proceeding.
6988  * See notes in RecordTransactionCommit().
6989  *
6990  * Because we've already released the insertion locks, this test is a bit
6991  * fuzzy: it is possible that we will wait for xacts we didn't really need
6992  * to wait for. But the delay should be short and it seems better to make
6993  * checkpoint take a bit longer than to hold off insertions longer than
6994  * necessary. (In fact, the whole reason we have this issue is that xact.c
6995  * does commit record XLOG insertion and clog update as two separate steps
6996  * protected by different locks, but again that seems best on grounds of
6997  * minimizing lock contention.)
6998  *
6999  * A transaction that has not yet set delayChkptFlags when we look cannot
7000  * be at risk, since it has not inserted its commit record yet; and one
7001  * that's already cleared it is not at risk either, since it's done fixing
7002  * clog and we will correctly flush the update below. So we cannot miss
7003  * any xacts we need to wait for.
7004  */
7006  if (nvxids > 0)
7007  {
7008  do
7009  {
7010  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7011  pg_usleep(10000L); /* wait for 10 msec */
7013  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7015  }
7016  pfree(vxids);
7017 
7018  CheckPointGuts(checkPoint.redo, flags);
7019 
7021  if (nvxids > 0)
7022  {
7023  do
7024  {
7025  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7026  pg_usleep(10000L); /* wait for 10 msec */
7028  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7030  }
7031  pfree(vxids);
7032 
7033  /*
7034  * Take a snapshot of running transactions and write this to WAL. This
7035  * allows us to reconstruct the state of running transactions during
7036  * archive recovery, if required. Skip, if this info disabled.
7037  *
7038  * If we are shutting down, or Startup process is completing crash
7039  * recovery we don't need to write running xact data.
7040  */
7041  if (!shutdown && XLogStandbyInfoActive())
7043 
7045 
7046  /*
7047  * Now insert the checkpoint record into XLOG.
7048  */
7049  XLogBeginInsert();
7050  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
7051  recptr = XLogInsert(RM_XLOG_ID,
7052  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7054 
7055  XLogFlush(recptr);
7056 
7057  /*
7058  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7059  * overwritten at next startup. No-one should even try, this just allows
7060  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7061  * to just temporarily disable writing until the system has exited
7062  * recovery.
7063  */
7064  if (shutdown)
7065  {
7066  if (flags & CHECKPOINT_END_OF_RECOVERY)
7067  LocalXLogInsertAllowed = oldXLogAllowed;
7068  else
7069  LocalXLogInsertAllowed = 0; /* never again write WAL */
7070  }
7071 
7072  /*
7073  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7074  * = end of actual checkpoint record.
7075  */
7076  if (shutdown && checkPoint.redo != ProcLastRecPtr)
7077  ereport(PANIC,
7078  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7079 
7080  /*
7081  * Remember the prior checkpoint's redo ptr for
7082  * UpdateCheckPointDistanceEstimate()
7083  */
7084  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7085 
7086  /*
7087  * Update the control file.
7088  */
7089  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7090  if (shutdown)
7093  ControlFile->checkPointCopy = checkPoint;
7094  /* crash recovery should always recover to the end of WAL */
7097 
7098  /*
7099  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7100  * unused on non-shutdown checkpoints, but seems useful to store it always
7101  * for debugging purposes.
7102  */
7104 
7106  LWLockRelease(ControlFileLock);
7107 
7108  /* Update shared-memory copy of checkpoint XID/epoch */
7110  XLogCtl->ckptFullXid = checkPoint.nextXid;
7112 
7113  /*
7114  * We are now done with critical updates; no need for system panic if we
7115  * have trouble while fooling with old log segments.
7116  */
7117  END_CRIT_SECTION();
7118 
7119  /*
7120  * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7121  * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7122  * where (a) we're not inside of a critical section and (b) we can be
7123  * certain that the relevant record has been flushed to disk, which must
7124  * happen before it can be summarized.
7125  *
7126  * If this is a shutdown checkpoint, then this happens reasonably
7127  * promptly: we've only just inserted and flushed the
7128  * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7129  * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7130  * record was written before we began flushing data to disk, and that
7131  * could be many minutes ago at this point. However, we don't XLogFlush()
7132  * after inserting that record, so we're not guaranteed that it's on disk
7133  * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7134  * record.
7135  */
7137 
7138  /*
7139  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7140  */
7142 
7143  /*
7144  * Update the average distance between checkpoints if the prior checkpoint
7145  * exists.
7146  */
7147  if (PriorRedoPtr != InvalidXLogRecPtr)
7149 
7150  /*
7151  * Delete old log files, those no longer needed for last checkpoint to
7152  * prevent the disk holding the xlog from growing full.
7153  */
7155  KeepLogSeg(recptr, &_logSegNo);
7157  _logSegNo, InvalidOid,
7159  {
7160  /*
7161  * Some slots have been invalidated; recalculate the old-segment
7162  * horizon, starting again from RedoRecPtr.
7163  */
7165  KeepLogSeg(recptr, &_logSegNo);
7166  }
7167  _logSegNo--;
7168  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7169  checkPoint.ThisTimeLineID);
7170 
7171  /*
7172  * Make more log segments if needed. (Do this after recycling old log
7173  * segments, since that may supply some of the needed files.)
7174  */
7175  if (!shutdown)
7176  PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7177 
7178  /*
7179  * Truncate pg_subtrans if possible. We can throw away all data before
7180  * the oldest XMIN of any running transaction. No future transaction will
7181  * attempt to reference any pg_subtrans entry older than that (see Asserts
7182  * in subtrans.c). During recovery, though, we mustn't do this because
7183  * StartupSUBTRANS hasn't been called yet.
7184  */
7185  if (!RecoveryInProgress())
7187 
7188  /* Real work is done; log and update stats. */
7189  LogCheckpointEnd(false);
7190 
7191  /* Reset the process title */
7192  update_checkpoint_display(flags, false, true);
7193 
7194  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7195  NBuffers,
7199 }
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:471
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1159
int NBuffers
Definition: globals.c:139
@ LW_SHARED
Definition: lwlock.h:117
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2207
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:81
@ DB_SHUTDOWNING
Definition: pg_control.h:93
@ DB_SHUTDOWNED
Definition: pg_control.h:91
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:68
#define InvalidOid
Definition: postgres_ext.h:36
#define DELAY_CHKPT_START
Definition: proc.h:114
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:115
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3030
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2867
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2022
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3076
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1759
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:51
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1285
TimestampTz ckpt_start_t
Definition: xlog.h:159
int ckpt_segs_removed
Definition: xlog.h:168
int ckpt_segs_added
Definition: xlog.h:167
int ckpt_bufs_written
Definition: xlog.h:165
int ckpt_segs_recycled
Definition: xlog.h:169
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:167
XLogRecPtr unloggedLSN
Definition: pg_control.h:136
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:168
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId ckptFullXid
Definition: xlog.c:463
TimeLineID InsertTimeLineID
Definition: xlog.c:511
XLogRecPtr RedoRecPtr
Definition: xlog.c:462
TimeLineID PrevTimeLineID
Definition: xlog.c:512
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:470
XLogRecPtr RedoRecPtr
Definition: xlog.c:436
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:411
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void SetWalSummarizerLatch(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:253
bool RecoveryInProgress(void)
Definition: xlog.c:6201
static void WALInsertLockRelease(void)
Definition: xlog.c:1433
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1817
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1404
static void UpdateControlFile(void)
Definition: xlog.c:4449
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3795
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6499
static XLogRecPtr RedoRecPtr
Definition: xlog.c:273
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6531
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3620
bool log_checkpoints
Definition: xlog.c:129
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:7787
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6289
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6406
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6634
static int LocalXLogInsertAllowed
Definition: xlog.c:236
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2728
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7344
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6672
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:138
#define CHECKPOINT_FORCE
Definition: xlog.h:140
#define XLogStandbyInfoActive()
Definition: xlog.h:121
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SetWalSummarizerLatch(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7210 of file xlog.c.

7211 {
7212  xl_end_of_recovery xlrec;
7213  XLogRecPtr recptr;
7214 
7215  /* sanity check */
7216  if (!RecoveryInProgress())
7217  elog(ERROR, "can only be used to end recovery");
7218 
7219  xlrec.end_time = GetCurrentTimestamp();
7220 
7225 
7227 
7228  XLogBeginInsert();
7229  XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
7230  recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
7231 
7232  XLogFlush(recptr);
7233 
7234  /*
7235  * Update the control file so that crash recovery can follow the timeline
7236  * changes to this point.
7237  */
7238  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7239  ControlFile->minRecoveryPoint = recptr;
7240  ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID;
7242  LWLockRelease(ControlFileLock);
7243 
7244  END_CRIT_SECTION();
7245 }
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:76
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID

References ControlFile, elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, xl_end_of_recovery::ThisTimeLineID, UpdateControlFile(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7274 of file xlog.c.

7276 {
7278  XLogRecPtr recptr;
7279  XLogPageHeader pagehdr;
7280  XLogRecPtr startPos;
7281 
7282  /* sanity checks */
7283  if (!RecoveryInProgress())
7284  elog(ERROR, "can only be used at end of recovery");
7285  if (pagePtr % XLOG_BLCKSZ != 0)
7286  elog(ERROR, "invalid position for missing continuation record %X/%X",
7287  LSN_FORMAT_ARGS(pagePtr));
7288 
7289  /* The current WAL insert position should be right after the page header */
7290  startPos = pagePtr;
7291  if (XLogSegmentOffset(startPos, wal_segment_size) == 0)
7292  startPos += SizeOfXLogLongPHD;
7293  else
7294  startPos += SizeOfXLogShortPHD;
7295  recptr = GetXLogInsertRecPtr();
7296  if (recptr != startPos)
7297  elog(ERROR, "invalid WAL insert position %X/%X for OVERWRITE_CONTRECORD",
7298  LSN_FORMAT_ARGS(recptr));
7299 
7301 
7302  /*
7303  * Initialize the XLOG page header (by GetXLogBuffer), and set the
7304  * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7305  *
7306  * No other backend is allowed to write WAL yet, so acquiring the WAL
7307  * insertion lock is just pro forma.
7308  */
7310  pagehdr = (XLogPageHeader) GetXLogBuffer(pagePtr, newTLI);
7313 
7314  /*
7315  * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7316  * page. We know it becomes the first record, because no other backend is
7317  * allowed to write WAL yet.
7318  */
7319  XLogBeginInsert();
7320  xlrec.overwritten_lsn = aborted_lsn;
7322  XLogRegisterData((char *) &xlrec, sizeof(xl_overwrite_contrecord));
7323  recptr = XLogInsert(RM_XLOG_ID, XLOG_OVERWRITE_CONTRECORD);
7324 
7325  /* check that the record was inserted to the right place */
7326  if (ProcLastRecPtr != startPos)
7327  elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%X",
7329 
7330  XLogFlush(recptr);
7331 
7332  END_CRIT_SECTION();
7333 
7334  return recptr;
7335 }
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:80
static void WALInsertLockAcquire(void)
Definition: xlog.c:1359
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9266
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80

References elog, END_CRIT_SECTION, ERROR, GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, XLP_FIRST_IS_OVERWRITE_CONTRECORD, and XLogPageHeaderData::xlp_info.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7425 of file xlog.c.

7426 {
7427  XLogRecPtr lastCheckPointRecPtr;
7428  XLogRecPtr lastCheckPointEndPtr;
7429  CheckPoint lastCheckPoint;
7430  XLogRecPtr PriorRedoPtr;
7431  XLogRecPtr receivePtr;
7432  XLogRecPtr replayPtr;
7433  TimeLineID replayTLI;
7434  XLogRecPtr endptr;
7435  XLogSegNo _logSegNo;
7436  TimestampTz xtime;
7437 
7438  /* Concurrent checkpoint/restartpoint cannot happen */
7440 
7441  /* Get a local copy of the last safe checkpoint record. */
7443  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7444  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7445  lastCheckPoint = XLogCtl->lastCheckPoint;
7447 
7448  /*
7449  * Check that we're still in recovery mode. It's ok if we exit recovery
7450  * mode after this check, the restart point is valid anyway.
7451  */
7452  if (!RecoveryInProgress())
7453  {
7454  ereport(DEBUG2,
7455  (errmsg_internal("skipping restartpoint, recovery has already ended")));
7456  return false;
7457  }
7458 
7459  /*
7460  * If the last checkpoint record we've replayed is already our last
7461  * restartpoint, we can't perform a new restart point. We still update
7462  * minRecoveryPoint in that case, so that if this is a shutdown restart
7463  * point, we won't start up earlier than before. That's not strictly
7464  * necessary, but when hot standby is enabled, it would be rather weird if
7465  * the database opened up for read-only connections at a point-in-time
7466  * before the last shutdown. Such time travel is still possible in case of
7467  * immediate shutdown, though.
7468  *
7469  * We don't explicitly advance minRecoveryPoint when we do create a
7470  * restartpoint. It's assumed that flushing the buffers will do that as a
7471  * side-effect.
7472  */
7473  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7474  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7475  {
7476  ereport(DEBUG2,
7477  (errmsg_internal("skipping restartpoint, already performed at %X/%X",
7478  LSN_FORMAT_ARGS(lastCheckPoint.redo))));
7479 
7481  if (flags & CHECKPOINT_IS_SHUTDOWN)
7482  {
7483  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7486  LWLockRelease(ControlFileLock);
7487  }
7488  return false;
7489  }
7490 
7491  /*
7492  * Update the shared RedoRecPtr so that the startup process can calculate
7493  * the number of segments replayed since last restartpoint, and request a
7494  * restartpoint if it exceeds CheckPointSegments.
7495  *
7496  * Like in CreateCheckPoint(), hold off insertions to update it, although
7497  * during recovery this is just pro forma, because no WAL insertions are
7498  * happening.
7499  */
7501  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7503 
7504  /* Also update the info_lck-protected copy */
7506  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7508 
7509  /*
7510  * Prepare to accumulate statistics.
7511  *
7512  * Note: because it is possible for log_checkpoints to change while a
7513  * checkpoint proceeds, we always accumulate stats, even if
7514  * log_checkpoints is currently off.
7515  */
7516  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
7518 
7519  if (log_checkpoints)
7520  LogCheckpointStart(flags, true);
7521 
7522  /* Update the process title */
7523  update_checkpoint_display(flags, true, false);
7524 
7525  CheckPointGuts(lastCheckPoint.redo, flags);
7526 
7527  /*
7528  * This location needs to be after CheckPointGuts() to ensure that some
7529  * work has already happened during this checkpoint.
7530  */
7531  INJECTION_POINT("create-restart-point");
7532 
7533  /*
7534  * Remember the prior checkpoint's redo ptr for
7535  * UpdateCheckPointDistanceEstimate()
7536  */
7537  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7538 
7539  /*
7540  * Update pg_control, using current time. Check that it still shows an
7541  * older checkpoint, else do nothing; this is a quick hack to make sure
7542  * nothing really bad happens if somehow we get here after the
7543  * end-of-recovery checkpoint.
7544  */
7545  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7546  if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7547  {
7548  /*
7549  * Update the checkpoint information. We do this even if the cluster
7550  * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7551  * segments recycled below.
7552  */
7553  ControlFile->checkPoint = lastCheckPointRecPtr;
7554  ControlFile->checkPointCopy = lastCheckPoint;
7555 
7556  /*
7557  * Ensure minRecoveryPoint is past the checkpoint record and update it
7558  * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7559  * this will have happened already while writing out dirty buffers,
7560  * but not necessarily - e.g. because no buffers were dirtied. We do
7561  * this because a backup performed in recovery uses minRecoveryPoint
7562  * to determine which WAL files must be included in the backup, and
7563  * the file (or files) containing the checkpoint record must be
7564  * included, at a minimum. Note that for an ordinary restart of
7565  * recovery there's no value in having the minimum recovery point any
7566  * earlier than this anyway, because redo will begin just after the
7567  * checkpoint record.
7568  */
7570  {
7571  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7572  {
7573  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7575 
7576  /* update local copy */
7579  }
7580  if (flags & CHECKPOINT_IS_SHUTDOWN)
7582  }
7584  }
7585  LWLockRelease(ControlFileLock);
7586 
7587  /*
7588  * Update the average distance between checkpoints/restartpoints if the
7589  * prior checkpoint exists.
7590  */
7591  if (PriorRedoPtr != InvalidXLogRecPtr)
7593 
7594  /*
7595  * Delete old log files, those no longer needed for last restartpoint to
7596  * prevent the disk holding the xlog from growing full.
7597  */
7599 
7600  /*
7601  * Retreat _logSegNo using the current end of xlog replayed or received,
7602  * whichever is later.
7603  */
7604  receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7605  replayPtr = GetXLogReplayRecPtr(&replayTLI);
7606  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7607  KeepLogSeg(endptr, &_logSegNo);
7609  _logSegNo, InvalidOid,
7611  {
7612  /*
7613  * Some slots have been invalidated; recalculate the old-segment
7614  * horizon, starting again from RedoRecPtr.
7615  */
7617  KeepLogSeg(endptr, &_logSegNo);
7618  }
7619  _logSegNo--;
7620 
7621  /*
7622  * Try to recycle segments on a useful timeline. If we've been promoted
7623  * since the beginning of this restartpoint, use the new timeline chosen
7624  * at end of recovery. If we're still in recovery, use the timeline we're
7625  * currently replaying.
7626  *
7627  * There is no guarantee that the WAL segments will be useful on the
7628  * current timeline; if recovery proceeds to a new timeline right after
7629  * this, the pre-allocated WAL segments on this timeline will not be used,
7630  * and will go wasted until recycled on the next restartpoint. We'll live
7631  * with that.
7632  */
7633  if (!RecoveryInProgress())
7634  replayTLI = XLogCtl->InsertTimeLineID;
7635 
7636  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7637 
7638  /*
7639  * Make more log segments if needed. (Do this after recycling old log
7640  * segments, since that may supply some of the needed files.)
7641  */
7642  PreallocXlogFiles(endptr, replayTLI);
7643 
7644  /*
7645  * Truncate pg_subtrans if possible. We can throw away all data before
7646  * the oldest XMIN of any running transaction. No future transaction will
7647  * attempt to reference any pg_subtrans entry older than that (see Asserts
7648  * in subtrans.c). When hot standby is disabled, though, we mustn't do
7649  * this because StartupSUBTRANS hasn't been called yet.
7650  */
7651  if (EnableHotStandby)
7653 
7654  /* Real work is done; log and update stats. */
7655  LogCheckpointEnd(true);
7656 
7657  /* Reset the process title */
7658  update_checkpoint_display(flags, true, true);
7659 
7660  xtime = GetLatestXTime();
7662  (errmsg("recovery restart point at %X/%X",
7663  LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7664  xtime ? errdetail("Last completed transaction was at log time %s.",
7665  timestamptz_to_str(xtime)) : 0));
7666 
7667  /*
7668  * Finally, execute archive_cleanup_command, if any.
7669  */
7670  if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7672  "archive_cleanup_command",
7673  false,
7674  WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7675 
7676  return true;
7677 }
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1853
int64 TimestampTz
Definition: timestamp.h:39
#define LOG
Definition: elog.h:31
bool IsUnderPostmaster
Definition: globals.c:117
#define INJECTION_POINT(name)
@ B_CHECKPOINTER
Definition: miscadmin.h:354
BackendType MyBackendType
Definition: miscinit.c:63
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:95
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:92
CheckPoint lastCheckPoint
Definition: xlog.c:547
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:545
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:546
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2648
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:635
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:636
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
char * archiveCleanupCommand
Definition: xlogrecovery.c:84
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert(), B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9225 of file xlog.c.

9226 {
9227  bool during_backup_start = DatumGetBool(arg);
9228 
9229  /* If called during backup start, there shouldn't be one already running */
9230  Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9231 
9232  if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9233  {
9237 
9240 
9241  if (!during_backup_start)
9242  ereport(WARNING,
9243  errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9244  }
9245 }
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
int runningBackups
Definition: xlog.c:444
static SessionBackupState sessionBackupState
Definition: xlog.c:397
@ SESSION_BACKUP_NONE
Definition: xlog.h:283

References arg, Assert(), DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8623 of file xlog.c.

8625 {
8627 
8628  Assert(state != NULL);
8630 
8631  /*
8632  * During recovery, we don't need to check WAL level. Because, if WAL
8633  * level is not sufficient, it's impossible to get here during recovery.
8634  */
8636  ereport(ERROR,
8637  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8638  errmsg("WAL level not sufficient for making an online backup"),
8639  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
8640 
8641  if (strlen(backupidstr) > MAXPGPATH)
8642  ereport(ERROR,
8643  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8644  errmsg("backup label too long (max %d bytes)",
8645  MAXPGPATH)));
8646 
8647  memcpy(state->name, backupidstr, strlen(backupidstr));
8648 
8649  /*
8650  * Mark backup active in shared memory. We must do full-page WAL writes
8651  * during an on-line backup even if not doing so at other times, because
8652  * it's quite possible for the backup dump to obtain a "torn" (partially
8653  * written) copy of a database page if it reads the page concurrently with
8654  * our write to the same page. This can be fixed as long as the first
8655  * write to the page in the WAL sequence is a full-page write. Hence, we
8656  * increment runningBackups then force a CHECKPOINT, to ensure there are
8657  * no dirty pages in shared memory that might get dumped while the backup
8658  * is in progress without having a corresponding WAL record. (Once the
8659  * backup is complete, we need not force full-page writes anymore, since
8660  * we expect that any pages not modified during the backup interval must
8661  * have been correctly captured by the backup.)
8662  *
8663  * Note that forcing full-page writes has no effect during an online
8664  * backup from the standby.
8665  *
8666  * We must hold all the insertion locks to change the value of
8667  * runningBackups, to ensure adequate interlocking against
8668  * XLogInsertRecord().
8669  */
8673 
8674  /*
8675  * Ensure we decrement runningBackups if we fail below. NB -- for this to
8676  * work correctly, it is critical that sessionBackupState is only updated
8677  * after this block is over.
8678  */
8680  {
8681  bool gotUniqueStartpoint = false;
8682  DIR *tblspcdir;
8683  struct dirent *de;
8684  tablespaceinfo *ti;
8685  int datadirpathlen;
8686 
8687  /*
8688  * Force an XLOG file switch before the checkpoint, to ensure that the
8689  * WAL segment the checkpoint is written to doesn't contain pages with
8690  * old timeline IDs. That would otherwise happen if you called
8691  * pg_backup_start() right after restoring from a PITR archive: the
8692  * first WAL segment containing the startup checkpoint has pages in
8693  * the beginning with the old timeline ID. That can cause trouble at
8694  * recovery: we won't have a history file covering the old timeline if
8695  * pg_wal directory was not included in the base backup and the WAL
8696  * archive was cleared too before starting the backup.
8697  *
8698  * This also ensures that we have emitted a WAL page header that has
8699  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8700  * Therefore, if a WAL archiver (such as pglesslog) is trying to
8701  * compress out removable backup blocks, it won't remove any that
8702  * occur after this point.
8703  *
8704  * During recovery, we skip forcing XLOG file switch, which means that
8705  * the backup taken during recovery is not available for the special
8706  * recovery case described above.
8707  */
8709  RequestXLogSwitch(false);
8710 
8711  do
8712  {
8713  bool checkpointfpw;
8714 
8715  /*
8716  * Force a CHECKPOINT. Aside from being necessary to prevent torn
8717  * page problems, this guarantees that two successive backup runs
8718  * will have different checkpoint positions and hence different
8719  * history file names, even if nothing happened in between.
8720  *
8721  * During recovery, establish a restartpoint if possible. We use
8722  * the last restartpoint as the backup starting checkpoint. This
8723  * means that two successive backup runs can have same checkpoint
8724  * positions.
8725  *
8726  * Since the fact that we are executing do_pg_backup_start()
8727  * during recovery means that checkpointer is running, we can use
8728  * RequestCheckpoint() to establish a restartpoint.
8729  *
8730  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
8731  * passing fast = true). Otherwise this can take awhile.
8732  */
8734  (fast ? CHECKPOINT_IMMEDIATE : 0));
8735 
8736  /*
8737  * Now we need to fetch the checkpoint record location, and also
8738  * its REDO pointer. The oldest point in WAL that would be needed
8739  * to restore starting from the checkpoint is precisely the REDO
8740  * pointer.
8741  */
8742  LWLockAcquire(ControlFileLock, LW_SHARED);
8743  state->checkpointloc = ControlFile->checkPoint;
8744  state->startpoint = ControlFile->checkPointCopy.redo;
8746  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8747  LWLockRelease(ControlFileLock);
8748 
8750  {
8751  XLogRecPtr recptr;
8752 
8753  /*
8754  * Check to see if all WAL replayed during online backup
8755  * (i.e., since last restartpoint used as backup starting
8756  * checkpoint) contain full-page writes.
8757  */
8759  recptr = XLogCtl->lastFpwDisableRecPtr;
8761 
8762  if (!checkpointfpw || state->startpoint <= recptr)
8763  ereport(ERROR,
8764  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8765  errmsg("WAL generated with full_page_writes=off was replayed "
8766  "since last restartpoint"),
8767  errhint("This means that the backup being taken on the standby "
8768  "is corrupt and should not be used. "
8769  "Enable full_page_writes and run CHECKPOINT on the primary, "
8770  "and then try an online backup again.")));
8771 
8772  /*
8773  * During recovery, since we don't use the end-of-backup WAL
8774  * record and don't write the backup history file, the
8775  * starting WAL location doesn't need to be unique. This means
8776  * that two base backups started at the same time might use
8777  * the same checkpoint as starting locations.
8778  */
8779  gotUniqueStartpoint = true;
8780  }
8781 
8782  /*
8783  * If two base backups are started at the same time (in WAL sender
8784  * processes), we need to make sure that they use different
8785  * checkpoints as starting locations, because we use the starting
8786  * WAL location as a unique identifier for the base backup in the
8787  * end-of-backup WAL record and when we write the backup history
8788  * file. Perhaps it would be better generate a separate unique ID
8789  * for each backup instead of forcing another checkpoint, but
8790  * taking a checkpoint right after another is not that expensive
8791  * either because only few buffers have been dirtied yet.
8792  */
8794  if (XLogCtl->Insert.lastBackupStart < state->startpoint)
8795  {
8796  XLogCtl->Insert.lastBackupStart = state->startpoint;
8797  gotUniqueStartpoint = true;
8798  }
8800  } while (!gotUniqueStartpoint);
8801 
8802  /*
8803  * Construct tablespace_map file.
8804  */
8805  datadirpathlen = strlen(DataDir);
8806 
8807  /* Collect information about all tablespaces */
8808  tblspcdir = AllocateDir("pg_tblspc");
8809  while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
8810  {
8811  char fullpath[MAXPGPATH + 10];
8812  char linkpath[MAXPGPATH];
8813  char *relpath = NULL;
8814  char *s;
8815  PGFileType de_type;
8816  char *badp;
8817  Oid tsoid;
8818 
8819  /*
8820  * Try to parse the directory name as an unsigned integer.
8821  *
8822  * Tablespace directories should be positive integers that can be
8823  * represented in 32 bits, with no leading zeroes or trailing
8824  * garbage. If we come across a name that doesn't meet those
8825  * criteria, skip it.
8826  */
8827  if (de->d_name[0] < '1' || de->d_name[1] > '9')
8828  continue;
8829  errno = 0;
8830  tsoid = strtoul(de->d_name, &badp, 10);
8831  if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
8832  continue;
8833 
8834  snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
8835 
8836  de_type = get_dirent_type(fullpath, de, false, ERROR);
8837 
8838  if (de_type == PGFILETYPE_LNK)
8839  {
8840  StringInfoData escapedpath;
8841  int rllen;
8842 
8843  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
8844  if (rllen < 0)
8845  {
8846  ereport(WARNING,
8847  (errmsg("could not read symbolic link \"%s\": %m",
8848  fullpath)));
8849  continue;
8850  }
8851  else if (rllen >= sizeof(linkpath))
8852  {
8853  ereport(WARNING,
8854  (errmsg("symbolic link \"%s\" target is too long",
8855  fullpath)));
8856  continue;
8857  }
8858  linkpath[rllen] = '\0';
8859 
8860  /*
8861  * Relpath holds the relative path of the tablespace directory
8862  * when it's located within PGDATA, or NULL if it's located
8863  * elsewhere.
8864  */
8865  if (rllen > datadirpathlen &&
8866  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
8867  IS_DIR_SEP(linkpath[datadirpathlen]))
8868  relpath = pstrdup(linkpath + datadirpathlen + 1);
8869 
8870  /*
8871  * Add a backslash-escaped version of the link path to the
8872  * tablespace map file.
8873  */
8874  initStringInfo(&escapedpath);
8875  for (s = linkpath; *s; s++)
8876  {
8877  if (*s == '\n' || *s == '\r' || *s == '\\')
8878  appendStringInfoChar(&escapedpath, '\\');
8879  appendStringInfoChar(&escapedpath, *s);
8880  }
8881  appendStringInfo(tblspcmapfile, "%s %s\n",
8882  de->d_name, escapedpath.data);
8883  pfree(escapedpath.data);
8884  }
8885  else if (de_type == PGFILETYPE_DIR)
8886  {
8887  /*
8888  * It's possible to use allow_in_place_tablespaces to create
8889  * directories directly under pg_tblspc, for testing purposes
8890  * only.
8891  *
8892  * In this case, we store a relative path rather than an
8893  * absolute path into the tablespaceinfo.
8894  */
8895  snprintf(linkpath, sizeof(linkpath), "pg_tblspc/%s",
8896  de->d_name);
8897  relpath = pstrdup(linkpath);
8898  }
8899  else
8900  {
8901  /* Skip any other file type that appears here. */
8902  continue;
8903  }
8904 
8905  ti = palloc(sizeof(tablespaceinfo));
8906  ti->oid = tsoid;
8907  ti->path = pstrdup(linkpath);
8908  ti->rpath = relpath;
8909  ti->size = -1;
8910 
8911  if (tablespaces)
8912  *tablespaces = lappend(*tablespaces, ti);
8913  }
8914  FreeDir(tblspcdir);
8915 
8916  state->starttime = (pg_time_t) time(NULL);
8917  }
8919 
8920  state->started_in_recovery = backup_started_in_recovery;
8921 
8922  /*
8923  * Mark that the start phase has correctly finished for the backup.
8924  */
8926 }
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
Definition: checkpointer.c:941
int errcode(int sqlerrcode)
Definition: elog.c:859
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:525
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:68
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
#define IS_DIR_SEP(ch)
Definition: port.h:102
unsigned int Oid
Definition: postgres_ext.h:31
#define relpath(rlocator, forknum)
Definition: relpath.h:94
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:194
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:553
XLogRecPtr lastBackupStart
Definition: xlog.c:445
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:236
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:7894
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9225
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:284
#define CHECKPOINT_WAIT
Definition: xlog.h:143
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:139
#define XLogIsNeeded()
Definition: xlog.h:107

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert(), backup_started_in_recovery, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, DatumGetBool(), do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 8951 of file xlog.c.

8952 {
8953  bool backup_stopped_in_recovery = false;
8954  char histfilepath[MAXPGPATH];
8955  char lastxlogfilename[MAXFNAMELEN];
8956  char histfilename[MAXFNAMELEN];
8957  XLogSegNo _logSegNo;
8958  FILE *fp;
8959  int seconds_before_warning;
8960  int waits = 0;
8961  bool reported_waiting = false;
8962 
8963  Assert(state != NULL);
8964 
8965  backup_stopped_in_recovery = RecoveryInProgress();
8966 
8967  /*
8968  * During recovery, we don't need to check WAL level. Because, if WAL
8969  * level is not sufficient, it's impossible to get here during recovery.
8970  */
8971  if (!backup_stopped_in_recovery && !XLogIsNeeded())
8972  ereport(ERROR,
8973  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8974  errmsg("WAL level not sufficient for making an online backup"),
8975  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
8976 
8977  /*
8978  * OK to update backup counter and session-level lock.
8979  *
8980  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
8981  * otherwise they can be updated inconsistently, which might cause
8982  * do_pg_abort_backup() to fail.
8983  */
8985 
8986  /*
8987  * It is expected that each do_pg_backup_start() call is matched by
8988  * exactly one do_pg_backup_stop() call.
8989  */
8992 
8993  /*
8994  * Clean up session-level lock.
8995  *
8996  * You might think that WALInsertLockRelease() can be called before
8997  * cleaning up session-level lock because session-level lock doesn't need
8998  * to be protected with WAL insertion lock. But since
8999  * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9000  * cleaned up before it.
9001  */
9003 
9005 
9006  /*
9007  * If we are taking an online backup from the standby, we confirm that the
9008  * standby has not been promoted during the backup.
9009  */
9010  if (state->started_in_recovery && !backup_stopped_in_recovery)
9011  ereport(ERROR,
9012  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9013  errmsg("the standby was promoted during online backup"),
9014  errhint("This means that the backup being taken is corrupt "
9015  "and should not be used. "
9016  "Try taking another online backup.")));
9017 
9018  /*
9019  * During recovery, we don't write an end-of-backup record. We assume that
9020  * pg_control was backed up last and its minimum recovery point can be
9021  * available as the backup end location. Since we don't have an
9022  * end-of-backup record, we use the pg_control value to check whether
9023  * we've reached the end of backup when starting recovery from this
9024  * backup. We have no way of checking if pg_control wasn't backed up last
9025  * however.
9026  *
9027  * We don't force a switch to new WAL file but it is still possible to
9028  * wait for all the required files to be archived if waitforarchive is
9029  * true. This is okay if we use the backup to start a standby and fetch
9030  * the missing WAL using streaming replication. But in the case of an
9031  * archive recovery, a user should set waitforarchive to true and wait for
9032  * them to be archived to ensure that all the required files are
9033  * available.
9034  *
9035  * We return the current minimum recovery point as the backup end
9036  * location. Note that it can be greater than the exact backup end
9037  * location if the minimum recovery point is updated after the backup of
9038  * pg_control. This is harmless for current uses.
9039  *
9040  * XXX currently a backup history file is for informational and debug
9041  * purposes only. It's not essential for an online backup. Furthermore,
9042  * even if it's created, it will not be archived during recovery because
9043  * an archiver is not invoked. So it doesn't seem worthwhile to write a
9044  * backup history file during recovery.
9045  */
9046  if (backup_stopped_in_recovery)
9047  {
9048  XLogRecPtr recptr;
9049 
9050  /*
9051  * Check to see if all WAL replayed during online backup contain
9052  * full-page writes.
9053  */
9055  recptr = XLogCtl->lastFpwDisableRecPtr;
9057 
9058  if (state->startpoint <= recptr)
9059  ereport(ERROR,
9060  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9061  errmsg("WAL generated with full_page_writes=off was replayed "
9062  "during online backup"),
9063  errhint("This means that the backup being taken on the standby "
9064  "is corrupt and should not be used. "
9065  "Enable full_page_writes and run CHECKPOINT on the primary, "
9066  "and then try an online backup again.")));
9067 
9068 
9069  LWLockAcquire(ControlFileLock, LW_SHARED);
9070  state->stoppoint = ControlFile->minRecoveryPoint;
9072  LWLockRelease(ControlFileLock);
9073  }
9074  else
9075  {
9076  char *history_file;
9077 
9078  /*
9079  * Write the backup-end xlog record
9080  */
9081  XLogBeginInsert();
9082  XLogRegisterData((char *) (&state->startpoint),
9083  sizeof(state->startpoint));
9084  state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9085 
9086  /*
9087  * Given that we're not in recovery, InsertTimeLineID is set and can't
9088  * change, so we can read it without a lock.
9089  */
9090  state->stoptli = XLogCtl->InsertTimeLineID;
9091 
9092  /*
9093  * Force a switch to a new xlog segment file, so that the backup is
9094  * valid as soon as archiver moves out the current segment file.
9095  */
9096  RequestXLogSwitch(false);
9097 
9098  state->stoptime = (pg_time_t) time(NULL);
9099 
9100  /*
9101  * Write the backup history file
9102  */
9103  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9104  BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9105  state->startpoint, wal_segment_size);
9106  fp = AllocateFile(histfilepath, "w");
9107  if (!fp)
9108  ereport(ERROR,
9110  errmsg("could not create file \"%s\": %m",
9111  histfilepath)));
9112 
9113  /* Build and save the contents of the backup history file */
9114  history_file = build_backup_content(state, true);
9115  fprintf(fp, "%s", history_file);
9116  pfree(history_file);
9117 
9118  if (fflush(fp) || ferror(fp) || FreeFile(fp))
9119  ereport(ERROR,
9121  errmsg("could not write file \"%s\": %m",
9122  histfilepath)));
9123 
9124  /*
9125  * Clean out any no-longer-needed history files. As a side effect,
9126  * this will post a .ready file for the newly created history file,
9127  * notifying the archiver that history file may be archived
9128  * immediately.
9129  */
9131  }
9132 
9133  /*
9134  * If archiving is enabled, wait for all the required WAL files to be
9135  * archived before returning. If archiving isn't enabled, the required WAL
9136  * needs to be transported via streaming replication (hopefully with
9137  * wal_keep_size set high enough), or some more exotic mechanism like
9138  * polling and copying files from pg_wal with script. We have no knowledge
9139  * of those mechanisms, so it's up to the user to ensure that he gets all
9140  * the required WAL.
9141  *
9142  * We wait until both the last WAL file filled during backup and the
9143  * history file have been archived, and assume that the alphabetic sorting
9144  * property of the WAL files ensures any earlier WAL files are safely
9145  * archived as well.
9146  *
9147  * We wait forever, since archive_command is supposed to work and we
9148  * assume the admin wanted his backup to work completely. If you don't
9149  * wish to wait, then either waitforarchive should be passed in as false,
9150  * or you can set statement_timeout. Also, some notices are issued to
9151  * clue in anyone who might be doing this interactively.
9152  */
9153 
9154  if (waitforarchive &&
9155  ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9156  (backup_stopped_in_recovery && XLogArchivingAlways())))
9157  {
9158  XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9159  XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9161 
9162  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9163  BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9164  state->startpoint, wal_segment_size);
9165 
9166  seconds_before_warning = 60;
9167  waits = 0;
9168 
9169  while (XLogArchiveIsBusy(lastxlogfilename) ||
9170  XLogArchiveIsBusy(histfilename))
9171  {
9173 
9174  if (!reported_waiting && waits > 5)
9175  {
9176  ereport(NOTICE,
9177  (errmsg("base backup done, waiting for required WAL segments to be archived")));
9178  reported_waiting = true;
9179  }
9180 
9181  (void) WaitLatch(MyLatch,
9183  1000L,
9184  WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9186 
9187  if (++waits >= seconds_before_warning)
9188  {
9189  seconds_before_warning *= 2; /* This wraps in >10 years... */
9190  ereport(WARNING,
9191  (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9192  waits),
9193  errhint("Check that your archive_command is executing properly. "
9194  "You can safely cancel this backup, "
9195  "but the database backup will not be usable without all the WAL segments.")));
9196  }
9197  }
9198 
9199  ereport(NOTICE,
9200  (errmsg("all required WAL segments have been archived")));
9201  }
9202  else if (waitforarchive)
9203  ereport(NOTICE,
9204  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9205 }
#define NOTICE
Definition: elog.h:35
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2583
int FreeFile(FILE *file)
Definition: fd.c:2781
struct Latch * MyLatch
Definition: globals.c:60
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
static void const char fflush(stdout)
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:72
#define fprintf
Definition: port.h:242
static void CleanupBackupHistory(void)
Definition: xlog.c:4088
#define XLogArchivingAlways()
Definition: xlog.h:100
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert(), BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fflush(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 8932 of file xlog.c.

8933 {
8934  return sessionBackupState;
8935 }

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8426 of file xlog.c.

8427 {
8428  int o_direct_flag = 0;
8429 
8430  /*
8431  * Use O_DIRECT if requested, except in walreceiver process. The WAL
8432  * written by walreceiver is normally read by the startup process soon
8433  * after it's written. Also, walreceiver performs unaligned writes, which
8434  * don't work with O_DIRECT, so it is required for correctness too.
8435  */
8437  o_direct_flag = PG_O_DIRECT;
8438 
8439  /* If fsync is disabled, never open in sync mode */
8440  if (!enableFsync)
8441  return o_direct_flag;
8442 
8443  switch (method)
8444  {
8445  /*
8446  * enum values for all sync options are defined even if they are
8447  * not supported on the current platform. But if not, they are
8448  * not included in the enum option array, and therefore will never
8449  * be seen here.
8450  */
8451  case WAL_SYNC_METHOD_FSYNC:
8454  return o_direct_flag;
8455 #ifdef O_SYNC
8456  case WAL_SYNC_METHOD_OPEN:
8457  return O_SYNC | o_direct_flag;
8458 #endif
8459 #ifdef O_DSYNC
8461  return O_DSYNC | o_direct_flag;
8462 #endif
8463  default:
8464  /* can't happen (unless we are out of sync with option array) */
8465  elog(ERROR, "unrecognized wal_sync_method: %d", method);
8466  return 0; /* silence warning */
8467  }
8468 }
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_WAL
Definition: fd.h:55
#define PG_O_DIRECT
Definition: fd.h:97
bool enableFsync
Definition: globals.c:126
#define AmWalReceiverProcess()
Definition: miscadmin.h:380
#define O_DSYNC
Definition: win32_port.h:352
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

References AmWalReceiverProcess, elog, enableFsync, ERROR, io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4749 of file xlog.c.

4750 {
4751  return ControlFile->wal_level;
4752 }

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4494 of file xlog.c.

4495 {
4497 }
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:518

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6334 of file xlog.c.

6335 {
6336  *RedoRecPtr_p = RedoRecPtr;
6337  *doPageWrites_p = doPageWrites;
6338 }
static bool doPageWrites
Definition: xlog.c:286

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 6349 of file xlog.c.

6350 {
6351  XLogRecPtr recptr;
6352 
6354  recptr = XLogCtl->LogwrtRqst.Write;
6356 
6357  return recptr;
6358 }

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, XLogwrtRqst::Write, and XLogCtl.

Referenced by CheckpointerMain(), gistvacuumscan(), and IsCheckpointOnSchedule().

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6406 of file xlog.c.

6407 {
6409  int i;
6410 
6411  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6412  {
6413  XLogRecPtr last_important;
6414 
6415  /*
6416  * Need to take a lock to prevent torn reads of the LSN, which are
6417  * possible on some of the supported platforms. WAL insert locks only
6418  * support exclusive mode, so we have to use that.
6419  */
6421  last_important = WALInsertLocks[i].l.lastImportantAt;
6422  LWLockRelease(&WALInsertLocks[i].l.lock);
6423 
6424  if (res < last_important)
6425  res = last_important;
6426  }
6427 
6428  return res;
6429 }
int i
Definition: isn.c:73
XLogRecPtr lastImportantAt
Definition: xlog.c:377
WALInsertLock l
Definition: xlog.c:389
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:571
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:150

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, res, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6435 of file xlog.c.

6436 {
6437  pg_time_t result;
6438 
6439  /* Need WALWriteLock, but shared lock is sufficient */
6440  LWLockAcquire(WALWriteLock, LW_SHARED);
6441  result = XLogCtl->lastSegSwitchTime;
6442  *lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
6443  LWLockRelease(WALWriteLock);
6444 
6445  return result;
6446 }
pg_time_t lastSegSwitchTime
Definition: xlog.c:473
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:474

References XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char* GetMockAuthenticationNonce ( void  )

Definition at line 4468 of file xlog.c.

4469 {
4470  Assert(ControlFile != NULL);
4472 }
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:228

References Assert(), ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

void GetOldestRestartPoint ( XLogRecPtr oldrecptr,
TimeLineID oldtli 
)

Definition at line 9296 of file xlog.c.

9297 {
9298  LWLockAcquire(ControlFileLock, LW_SHARED);
9299  *oldrecptr = ControlFile->checkPointCopy.redo;
9301  LWLockRelease(ControlFileLock);
9302 }

References ControlFileData::checkPointCopy, ControlFile, LW_SHARED, LWLockAcquire(), LWLockRelease(), CheckPoint::redo, and CheckPoint::ThisTimeLineID.

Referenced by ExecuteRecoveryCommand(), and RestoreArchivedFile().

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6237 of file xlog.c.

6238 {
6239  RecoveryState retval;
6240 
6242  retval = XLogCtl->SharedRecoveryState;
6244 
6245  return retval;
6246 }
RecoveryState
Definition: xlog.h:88

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6304 of file xlog.c.

6305 {
6306  XLogRecPtr ptr;
6307 
6308  /*
6309  * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6310  * grabbed a WAL insertion lock to read the authoritative value in
6311  * Insert->RedoRecPtr, someone might update it just after we've released
6312  * the lock.
6313  */
6315  ptr = XLogCtl->RedoRecPtr;
6317 
6318  if (RedoRecPtr < ptr)
6319  RedoRecPtr = ptr;
6320 
6321  return RedoRecPtr;
6322 }

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

uint64 GetSystemIdentifier ( void  )

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7703 of file xlog.c.

7704 {
7705  XLogRecPtr currpos; /* current write LSN */
7706  XLogSegNo currSeg; /* segid of currpos */
7707  XLogSegNo targetSeg; /* segid of targetLSN */
7708  XLogSegNo oldestSeg; /* actual oldest segid */
7709  XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7710  XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7711  uint64 keepSegs;
7712 
7713  /*
7714  * slot does not reserve WAL. Either deactivated, or has never been active
7715  */
7716  if (XLogRecPtrIsInvalid(targetLSN))
7717  return WALAVAIL_INVALID_LSN;
7718 
7719  /*
7720  * Calculate the oldest segment currently reserved by all slots,
7721  * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7722  * oldestSlotSeg to the current segment.
7723  */
7724  currpos = GetXLogWriteRecPtr();
7725  XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7726  KeepLogSeg(currpos, &oldestSlotSeg);
7727 
7728  /*
7729  * Find the oldest extant segment file. We get 1 until checkpoint removes
7730  * the first WAL segment file since startup, which causes the status being
7731  * wrong under certain abnormal conditions but that doesn't actually harm.
7732  */
7733  oldestSeg = XLogGetLastRemovedSegno() + 1;
7734 
7735  /* calculate oldest segment by max_wal_size */
7736  XLByteToSeg(currpos, currSeg, wal_segment_size);
7738 
7739  if (currSeg > keepSegs)
7740  oldestSegMaxWalSize = currSeg - keepSegs;
7741  else
7742  oldestSegMaxWalSize = 1;
7743 
7744  /* the segment we care about */
7745  XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7746 
7747  /*
7748  * No point in returning reserved or extended status values if the
7749  * targetSeg is known to be lost.
7750  */
7751  if (targetSeg >= oldestSlotSeg)
7752  {
7753  /* show "reserved" when targetSeg is within max_wal_size */
7754  if (targetSeg >= oldestSegMaxWalSize)
7755  return WALAVAIL_RESERVED;
7756 
7757  /* being retained by slots exceeding max_wal_size */
7758  return WALAVAIL_EXTENDED;
7759  }
7760 
7761  /* WAL segments are no longer retained but haven't been removed yet */
7762  if (targetSeg >= oldestSeg)
7763  return WALAVAIL_UNRESERVED;
7764 
7765  /* Definitely lost */
7766  return WALAVAIL_REMOVED;
7767 }
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3688
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9282
@ WALAVAIL_REMOVED
Definition: xlog.h:191
@ WALAVAIL_RESERVED
Definition: xlog.h:187
@ WALAVAIL_UNRESERVED
Definition: xlog.h:190
@ WALAVAIL_EXTENDED
Definition: xlog.h:188
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:186

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6389 of file xlog.c.

6390 {
6392 
6393  /* Since the value can't be changing, no lock is required. */
6394  return XLogCtl->InsertTimeLineID;
6395 }

References Assert(), XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1602 of file xlog.c.

1603 {
1604  int idx;
1605  XLogRecPtr endptr;
1606  static uint64 cachedPage = 0;
1607  static char *cachedPos = NULL;
1608  XLogRecPtr expectedEndPtr;
1609 
1610  /*
1611  * Fast path for the common case that we need to access again the same
1612  * page as last time.
1613  */
1614  if (ptr / XLOG_BLCKSZ == cachedPage)
1615  {
1616  Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1617  Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1618  return cachedPos + ptr % XLOG_BLCKSZ;
1619  }
1620 
1621  /*
1622  * The XLog buffer cache is organized so that a page is always loaded to a
1623  * particular buffer. That way we can easily calculate the buffer a given
1624  * page must be loaded into, from the XLogRecPtr alone.
1625  */
1626  idx = XLogRecPtrToBufIdx(ptr);
1627 
1628  /*
1629  * See what page is loaded in the buffer at the moment. It could be the
1630  * page we're looking for, or something older. It can't be anything newer
1631  * - that would imply the page we're looking for has already been written
1632  * out to disk and evicted, and the caller is responsible for making sure
1633  * that doesn't happen.
1634  *
1635  * We don't hold a lock while we read the value. If someone is just about
1636  * to initialize or has just initialized the page, it's possible that we
1637  * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1638  * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1639  * we're looking for.
1640  */
1641  expectedEndPtr = ptr;
1642  expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1643 
1644  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1645  if (expectedEndPtr != endptr)
1646  {
1647  XLogRecPtr initializedUpto;
1648 
1649  /*
1650  * Before calling AdvanceXLInsertBuffer(), which can block, let others
1651  * know how far we're finished with inserting the record.
1652  *
1653  * NB: If 'ptr' points to just after the page header, advertise a
1654  * position at the beginning of the page rather than 'ptr' itself. If
1655  * there are no other insertions running, someone might try to flush
1656  * up to our advertised location. If we advertised a position after
1657  * the page header, someone might try to flush the page header, even
1658  * though page might actually not be initialized yet. As the first
1659  * inserter on the page, we are effectively responsible for making
1660  * sure that it's initialized, before we let insertingAt to move past
1661  * the page header.
1662  */
1663  if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1664  XLogSegmentOffset(ptr, wal_segment_size) > XLOG_BLCKSZ)
1665  initializedUpto = ptr - SizeOfXLogShortPHD;
1666  else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1667  XLogSegmentOffset(ptr, wal_segment_size) < XLOG_BLCKSZ)
1668  initializedUpto = ptr - SizeOfXLogLongPHD;
1669  else
1670  initializedUpto = ptr;
1671 
1672  WALInsertLockUpdateInsertingAt(initializedUpto);
1673 
1674  AdvanceXLInsertBuffer(ptr, tli, false);
1675  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1676 
1677  if (expectedEndPtr != endptr)
1678  elog(PANIC, "could not find WAL buffer for %X/%X",
1679  LSN_FORMAT_ARGS(ptr));
1680  }
1681  else
1682  {
1683  /*
1684  * Make sure the initialization of the page is visible to us, and
1685  * won't arrive later to overwrite the WAL data we write on the page.
1686  */
1688  }
1689 
1690  /*
1691  * Found the buffer holding this page. Return a pointer to the right
1692  * offset within the page.
1693  */
1694  cachedPage = ptr / XLOG_BLCKSZ;
1695  cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1696 
1697  Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1698  Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1699 
1700  return cachedPos + ptr % XLOG_BLCKSZ;
1701 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define pg_memory_barrier()
Definition: atomics.h:138
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition: xlog.c:1459
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1944

References AdvanceXLInsertBuffer(), Assert(), elog, idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9266 of file xlog.c.

9267 {
9269  uint64 current_bytepos;
9270 
9271  SpinLockAcquire(&Insert->insertpos_lck);
9272  current_bytepos = Insert->CurrBytePos;
9273  SpinLockRelease(&Insert->insertpos_lck);
9274 
9275  return XLogBytePosToRecPtr(current_bytepos);
9276 }

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier)
static

Definition at line 4131 of file xlog.c.

4132 {
4133  char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
4134 
4135  /*
4136  * Generate a random nonce. This is used for authentication requests that
4137  * will fail because the user does not exist. The nonce is used to create
4138  * a genuine-looking password challenge for the non-existent user, in lieu
4139  * of an actual stored password.
4140  */
4141  if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
4142  ereport(PANIC,
4143  (errcode(ERRCODE_INTERNAL_ERROR),
4144  errmsg("could not generate secret authorization token")));
4145 
4146  memset(ControlFile, 0, sizeof(ControlFileData));
4147  /* Initialize pg_control status fields */
4148  ControlFile->system_identifier = sysidentifier;
4149  memcpy(ControlFile->mock_authentication_nonce, mock_auth_nonce, MOCK_AUTH_NONCE_LEN);
4152 
4153  /* Set important parameter values for use when replaying WAL */
4163 }
bool track_commit_timestamp
Definition: commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition: pg_control.h:184
int wal_level
Definition: xlog.c:131
bool wal_log_hints
Definition: xlog.c:123
uint32 bootstrap_data_checksum_version
Definition: bootstrap.c:44
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36

References bootstrap_data_checksum_version, ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4674 of file xlog.c.

4675 {
4677 
4679  {
4680  struct config_generic *guc;
4681 
4682  guc = find_option("wal_consistency_checking", false, false, ERROR);
4683 
4685 
4686  set_config_option_ext("wal_consistency_checking",
4688  guc->scontext, guc->source, guc->srole,
4689  GUC_ACTION_SET, true, ERROR, false);
4690 
4691  /* checking should not be deferred again */
4693  }
4694 }
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1237
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3373
@ GUC_ACTION_SET
Definition: guc.h:199
GucContext scontext
Definition: guc_tables.h:167
GucSource source
Definition: guc_tables.h:165
char * wal_consistency_checking_string
Definition: xlog.c:125

References Assert(), check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char *  tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3493 of file xlog.c.

3495 {
3496  char path[MAXPGPATH];
3497  struct stat stat_buf;
3498 
3499  Assert(tli != 0);
3500 
3501  XLogFilePath(path, tli, *segno, wal_segment_size);
3502 
3503  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3505  {
3506  LWLockRelease(ControlFileLock);
3507  return false;
3508  }
3509 
3510  if (!find_free)
3511  {
3512  /* Force installation: get rid of any pre-existing segment file */
3513  durable_unlink(path, DEBUG1);
3514  }
3515  else
3516  {
3517  /* Find a free slot to put it in */
3518  while (stat(path, &stat_buf) == 0)
3519  {
3520  if ((*segno) >= max_segno)
3521  {
3522  /* Failed to find a free slot within specified range */
3523  LWLockRelease(ControlFileLock);
3524  return false;
3525  }
3526  (*segno)++;
3527  XLogFilePath(path, tli, *segno, wal_segment_size);
3528  }
3529  }
3530 
3531  Assert(access(path, F_OK) != 0 && errno == ENOENT);
3532  if (durable_rename(tmppath, path, LOG) != 0)
3533  {
3534  LWLockRelease(ControlFileLock);
3535  /* durable_rename already emitted log message */
3536  return false;
3537  }
3538 
3539  LWLockRelease(ControlFileLock);
3540 
3541  return true;
3542 }
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:872
short access
Definition: preproc-type.c:36
bool InstallXLogFileSegmentActive
Definition: xlog.c:528
#define stat
Definition: win32_port.h:284

References Assert(), DEBUG1, durable_rename(), durable_unlink(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9325 of file xlog.c.

9326 {
9327  bool result;
9328 
9329  LWLockAcquire(ControlFileLock, LW_SHARED);
9331  LWLockRelease(ControlFileLock);
9332 
9333  return result;
9334 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8516 of file xlog.c.

8517 {
8518  char *msg = NULL;
8519  instr_time start;
8520 
8521  Assert(tli != 0);
8522 
8523  /*
8524  * Quick exit if fsync is disabled or write() has already synced the WAL
8525  * file.
8526  */
8527  if (!enableFsync ||
8530  return;
8531 
8532  /* Measure I/O timing to sync the WAL file */
8533  if (track_wal_io_timing)
8534  INSTR_TIME_SET_CURRENT(start);
8535  else
8536  INSTR_TIME_SET_ZERO(start);
8537 
8538  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8539  switch (wal_sync_method)
8540  {
8541  case WAL_SYNC_METHOD_FSYNC:
8542  if (pg_fsync_no_writethrough(fd) != 0)
8543  msg = _("could not fsync file \"%s\": %m");
8544  break;
8545 #ifdef HAVE_FSYNC_WRITETHROUGH
8547  if (pg_fsync_writethrough(fd) != 0)
8548  msg = _("could not fsync write-through file \"%s\": %m");
8549  break;
8550 #endif
8552  if (pg_fdatasync(fd) != 0)
8553  msg = _("could not fdatasync file \"%s\": %m");
8554  break;
8555  case WAL_SYNC_METHOD_OPEN:
8557  /* not reachable */
8558  Assert(false);
8559  break;
8560  default:
8561  elog(PANIC, "unrecognized wal_sync_method: %d", wal_sync_method);
8562  break;
8563  }
8564 
8565  /* PANIC if failed to fsync */
8566  if (msg)
8567  {
8568  char xlogfname[MAXFNAMELEN];
8569  int save_errno = errno;
8570 
8571  XLogFileName(xlogfname, tli, segno, wal_segment_size);
8572  errno = save_errno;
8573  ereport(PANIC,
8575  errmsg(msg, xlogfname)));
8576  }
8577 
8579 
8580  /*
8581  * Increment the I/O timing and the number of times WAL files were synced.
8582  */
8583  if (track_wal_io_timing)
8584  {
8585  instr_time end;
8586 
8589  }
8590 
8592 }
#define _(x)
Definition: elog.c:90
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:441
int pg_fdatasync(int fd)
Definition: fd.c:480
int pg_fsync_writethrough(int fd)
Definition: fd.c:461
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
#define INSTR_TIME_ACCUM_DIFF(x, y, z)
Definition: instr_time.h:184
static int fd(const char *x, int i)
Definition: preproc-init.c:105
instr_time wal_sync_time
Definition: pgstat.h:456
PgStat_Counter wal_sync
Definition: pgstat.h:454
bool track_wal_io_timing
Definition: xlog.c:137

References _, Assert(), elog, enableFsync, ereport, errcode_for_file_access(), errmsg(), fd(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, MAXFNAMELEN, PANIC, PendingWalStats, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_report_wait_end(), pgstat_report_wait_start(), track_wal_io_timing, wal_segment_size, PgStat_PendingWalStats::wal_sync, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_sync_time, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 7787 of file xlog.c.

7788 {
7789  XLogSegNo currSegNo;
7790  XLogSegNo segno;
7791  XLogRecPtr keep;
7792 
7793  XLByteToSeg(recptr, currSegNo, wal_segment_size);
7794  segno = currSegNo;
7795 
7796  /*
7797  * Calculate how many segments are kept by slots first, adjusting for
7798  * max_slot_wal_keep_size.
7799  */
7801  if (keep != InvalidXLogRecPtr && keep < recptr)
7802  {
7803  XLByteToSeg(keep, segno, wal_segment_size);
7804 
7805  /* Cap by max_slot_wal_keep_size ... */
7806  if (max_slot_wal_keep_size_mb >= 0)
7807  {
7808  uint64 slot_keep_segs;
7809 
7810  slot_keep_segs =
7812 
7813  if (currSegNo - segno > slot_keep_segs)
7814  segno = currSegNo - slot_keep_segs;
7815  }
7816  }
7817 
7818  /*
7819  * If WAL summarization is in use, don't remove WAL that has yet to be
7820  * summarized.
7821  */
7822  keep = GetOldestUnsummarizedLSN(NULL, NULL, false);
7823  if (keep != InvalidXLogRecPtr)
7824  {
7825  XLogSegNo unsummarized_segno;
7826 
7827  XLByteToSeg(keep, unsummarized_segno, wal_segment_size);
7828  if (unsummarized_segno < segno)
7829  segno = unsummarized_segno;
7830  }
7831 
7832  /* but, keep at least wal_keep_size if that's set */
7833  if (wal_keep_size_mb > 0)
7834  {
7835  uint64 keep_segs;
7836 
7838  if (currSegNo - segno < keep_segs)
7839  {
7840  /* avoid underflow, don't go below 1 */
7841  if (currSegNo <= keep_segs)
7842  segno = 1;
7843  else
7844  segno = currSegNo - keep_segs;
7845  }
7846  }
7847 
7848  /* don't delete WAL segments newer than the calculated segment */
7849  if (segno < *logSegNo)
7850  *logSegNo = segno;
7851 }
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact, bool reset_pending_lsn)
int wal_keep_size_mb
Definition: xlog.c:116
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition: xlog.c:2627
int max_slot_wal_keep_size_mb
Definition: xlog.c:135

References ConvertToXSegs, GetOldestUnsummarizedLSN(), InvalidXLogRecPtr, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, and XLogGetReplicationSlotMinimumLSN().

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4736 of file xlog.c.

4737 {
4738  Assert(reset || ControlFile == NULL);
4739  ControlFile = palloc(sizeof(ControlFileData));
4740  ReadControlFile();
4741 }
void reset(void)
Definition: sql-declare.c:600

References Assert(), ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6289 of file xlog.c.

6290 {
6291  int oldXLogAllowed = LocalXLogInsertAllowed;
6292 
6294 
6295  return oldXLogAllowed;
6296 }

References LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6531 of file xlog.c.

6532 {
6533  long write_msecs,
6534  sync_msecs,
6535  total_msecs,
6536  longest_msecs,
6537  average_msecs;
6538  uint64 average_sync_time;
6539 
6541 
6544 
6547 
6548  /* Accumulate checkpoint timing summary data, in milliseconds. */
6549  PendingCheckpointerStats.write_time += write_msecs;
6550  PendingCheckpointerStats.sync_time += sync_msecs;
6551 
6552  /*
6553  * All of the published timing statistics are accounted for. Only
6554  * continue if a log message is to be written.
6555  */
6556  if (!log_checkpoints)
6557  return;
6558 
6561 
6562  /*
6563  * Timing values returned from CheckpointStats are in microseconds.
6564  * Convert to milliseconds for consistent printing.
6565  */
6566  longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
6567 
6568  average_sync_time = 0;
6570  average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6572  average_msecs = (long) ((average_sync_time + 999) / 1000);
6573 
6574  /*
6575  * ControlFileLock is not required to see ControlFile->checkPoint and
6576  * ->checkPointCopy here as we are the only updator of those variables at
6577  * this moment.
6578  */
6579  if (restartpoint)
6580  ereport(LOG,
6581  (errmsg("restartpoint complete: wrote %d buffers (%.1f%%); "
6582  "%d WAL file(s) added, %d removed, %d recycled; "
6583  "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
6584  "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; "
6585  "distance=%d kB, estimate=%d kB; "
6586  "lsn=%X/%X, redo lsn=%X/%X",
6588  (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
6592  write_msecs / 1000, (int) (write_msecs % 1000),
6593  sync_msecs / 1000, (int) (sync_msecs % 1000),
6594  total_msecs / 1000, (int) (total_msecs % 1000),
6596  longest_msecs / 1000, (int) (longest_msecs % 1000),
6597  average_msecs / 1000, (int) (average_msecs % 1000),
6598  (int) (PrevCheckPointDistance / 1024.0),
6599  (int) (CheckPointDistanceEstimate / 1024.0),
6602  else
6603  ereport(LOG,
6604  (errmsg("checkpoint complete: wrote %d buffers (%.1f%%); "
6605  "%d WAL file(s) added, %d removed, %d recycled; "
6606  "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s; "
6607  "sync files=%d, longest=%ld.%03d s, average=%ld.%03d s; "
6608  "distance=%d kB, estimate=%d kB; "
6609  "lsn=%X/%X, redo lsn=%X/%X",
6611  (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
6615  write_msecs / 1000, (int) (write_msecs % 1000),
6616  sync_msecs / 1000, (int) (sync_msecs % 1000),
6617  total_msecs / 1000, (int) (total_msecs % 1000),
6619  longest_msecs / 1000, (int) (longest_msecs % 1000),
6620  average_msecs / 1000, (int) (average_msecs % 1000),
6621  (int) (PrevCheckPointDistance / 1024.0),
6622  (int) (CheckPointDistanceEstimate / 1024.0),
6625 }
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1766
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition: xlog.h:173
uint64 ckpt_longest_sync
Definition: xlog.h:172
TimestampTz ckpt_end_t
Definition: xlog.h:163
int ckpt_sync_rels
Definition: xlog.h:171
PgStat_Counter sync_time
Definition: pgstat.h:269
PgStat_Counter write_time
Definition: pgstat.h:268
static double CheckPointDistanceEstimate
Definition: xlog.c:159
static double PrevCheckPointDistance
Definition: xlog.c:160

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6499 of file xlog.c.

6500 {
6501  if (restartpoint)
6502  ereport(LOG,
6503  /* translator: the placeholders show checkpoint options */
6504  (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6505  (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6506  (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6507  (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6508  (flags & CHECKPOINT_FORCE) ? " force" : "",
6509  (flags & CHECKPOINT_WAIT) ? " wait" : "",
6510  (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6511  (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6512  (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6513  else
6514  ereport(LOG,
6515  /* translator: the placeholders show checkpoint options */
6516  (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6517  (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6518  (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6519  (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
6520  (flags & CHECKPOINT_FORCE) ? " force" : "",
6521  (flags & CHECKPOINT_WAIT) ? " wait" : "",
6522  (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6523  (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6524  (flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
6525 }
#define CHECKPOINT_CAUSE_XLOG
Definition: xlog.h:146
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:141
#define CHECKPOINT_CAUSE_TIME
Definition: xlog.h:147

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FLUSH_ALL, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6151 of file xlog.c.

6152 {
6153  bool promoted = false;
6154 
6155  /*
6156  * Perform a checkpoint to update all our recovery activity to disk.
6157  *
6158  * Note that we write a shutdown checkpoint rather than an on-line one.
6159  * This is not particularly critical, but since we may be assigning a new
6160  * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6161  * only changes in shutdown checkpoints, which allows some extra error
6162  * checking in xlog_redo.
6163  *
6164  * In promotion, only create a lightweight end-of-recovery record instead
6165  * of a full checkpoint. A checkpoint is requested later, after we're
6166  * fully out of recovery mode and already accepting queries.
6167  */
6170  {
6171  promoted = true;
6172 
6173  /*
6174  * Insert a special WAL record to mark the end of recovery, since we
6175  * aren't doing a checkpoint. That means that the checkpointer process
6176  * may likely be in the middle of a time-smoothed restartpoint and
6177  * could continue to be for minutes after this. That sounds strange,
6178  * but the effect is roughly the same and it would be stranger to try
6179  * to come out of the restartpoint and then checkpoint. We request a
6180  * checkpoint later anyway, just for safety.
6181  */
6183  }
6184  else
6185  {
6188  CHECKPOINT_WAIT);
6189  }
6190 
6191  return promoted;
6192 }
static void CreateEndOfRecoveryRecord(void)
Definition: xlog.c:7210
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3620 of file xlog.c.

3621 {
3622  XLogSegNo _logSegNo;
3623  int lf;
3624  bool added;
3625  char path[MAXPGPATH];
3626  uint64 offset;
3627 
3629  return; /* unlocked check says no */
3630 
3631  XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
3632  offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3633  if (offset >= (uint32) (0.75 * wal_segment_size))
3634  {
3635  _logSegNo++;
3636  lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3637  if (lf >= 0)
3638  close(lf);
3639  if (added)
3641  }
3642 }
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3140

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6114 of file xlog.c.

6115 {
6116  /*
6117  * We have reached the end of base backup, as indicated by pg_control. The
6118  * data on disk is now consistent (unless minRecoveryPoint is further
6119  * ahead, which can happen if we crashed during previous recovery). Reset
6120  * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6121  * make sure we don't allow starting up at an earlier point even if
6122  * recovery is stopped and restarted soon after this.
6123  */
6124  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6125 
6126  if (ControlFile->minRecoveryPoint < EndRecPtr)
6127  {
6128  ControlFile->minRecoveryPoint = EndRecPtr;
6130  }
6131 
6134  ControlFile->backupEndRequired = false;
6136 
6137  LWLockRelease(ControlFileLock);
6138 }
XLogRecPtr backupStartPoint
Definition: pg_control.h:169
bool backupEndRequired
Definition: pg_control.h:171
XLogRecPtr backupEndPoint
Definition: pg_control.h:170

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4248 of file xlog.c.

4249 {
4250  pg_crc32c crc;
4251  int fd;
4252  static char wal_segsz_str[20];
4253  int r;
4254 
4255  /*
4256  * Read data...
4257  */
4259  O_RDWR | PG_BINARY);
4260  if (fd < 0)
4261  ereport(PANIC,
4263  errmsg("could not open file \"%s\": %m",
4264  XLOG_CONTROL_FILE)));
4265 
4266  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
4267  r = read(fd, ControlFile, sizeof(ControlFileData));
4268  if (r != sizeof(ControlFileData))
4269  {
4270  if (r < 0)
4271  ereport(PANIC,
4273  errmsg("could not read file \"%s\": %m",
4274  XLOG_CONTROL_FILE)));
4275  else
4276  ereport(PANIC,
4278  errmsg("could not read file \"%s\": read %d of %zu",
4279  XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4280  }
4282 
4283  close(fd);
4284 
4285  /*
4286  * Check for expected pg_control format version. If this is wrong, the
4287  * CRC check will likely fail because we'll be checking the wrong number
4288  * of bytes. Complaining about wrong version will probably be more
4289  * enlightening than complaining about wrong CRC.
4290  */
4291 
4293  ereport(FATAL,
4294  (errmsg("database files are incompatible with server"),
4295  errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4296  " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4299  errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4300 
4302  ereport(FATAL,
4303  (errmsg("database files are incompatible with server"),
4304  errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4305  " but the server was compiled with PG_CONTROL_VERSION %d.",
4307  errhint("It looks like you need to initdb.")));
4308 
4309  /* Now check the CRC. */
4310  INIT_CRC32C(crc);
4311  COMP_CRC32C(crc,
4312  (char *) ControlFile,
4313  offsetof(ControlFileData, crc));
4314  FIN_CRC32C(crc);
4315 
4316  if (!EQ_CRC32C(crc, ControlFile->crc))
4317  ereport(FATAL,
4318  (errmsg("incorrect checksum in control file")));
4319 
4320  /*
4321  * Do compatibility checking immediately. If the database isn't
4322  * compatible with the backend executable, we want to abort before we can
4323  * possibly do any damage.
4324  */
4326  ereport(FATAL,
4327  (errmsg("database files are incompatible with server"),
4328  errdetail("The database cluster was initialized with CATALOG_VERSION_NO %d,"
4329  " but the server was compiled with CATALOG_VERSION_NO %d.",
4331  errhint("It looks like you need to initdb.")));
4332  if (ControlFile->maxAlign != MAXIMUM_ALIGNOF)
4333  ereport(FATAL,
4334  (errmsg("database files are incompatible with server"),
4335  errdetail("The database cluster was initialized with MAXALIGN %d,"
4336  " but the server was compiled with MAXALIGN %d.",
4337  ControlFile->maxAlign, MAXIMUM_ALIGNOF),
4338  errhint("It looks like you need to initdb.")));
4340  ereport(FATAL,
4341  (errmsg("database files are incompatible with server"),
4342  errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4343  errhint("It looks like you need to initdb.")));
4344  if (ControlFile->blcksz != BLCKSZ)
4345  ereport(FATAL,
4346  (errmsg("database files are incompatible with server"),
4347  errdetail("The database cluster was initialized with BLCKSZ %d,"
4348  " but the server was compiled with BLCKSZ %d.",
4349  ControlFile->blcksz, BLCKSZ),
4350  errhint("It looks like you need to recompile or initdb.")));
4351  if (ControlFile->relseg_size != RELSEG_SIZE)
4352  ereport(FATAL,
4353  (errmsg("database files are incompatible with server"),
4354  errdetail("The database cluster was initialized with RELSEG_SIZE %d,"
4355  " but the server was compiled with RELSEG_SIZE %d.",
4356  ControlFile->relseg_size, RELSEG_SIZE),
4357  errhint("It looks like you need to recompile or initdb.")));
4358  if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
4359  ereport(FATAL,
4360  (errmsg("database files are incompatible with server"),
4361  errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
4362  " but the server was compiled with XLOG_BLCKSZ %d.",
4363  ControlFile->xlog_blcksz, XLOG_BLCKSZ),
4364  errhint("It looks like you need to recompile or initdb.")));
4366  ereport(FATAL,
4367  (errmsg("database files are incompatible with server"),
4368  errdetail("The database cluster was initialized with NAMEDATALEN %d,"
4369  " but the server was compiled with NAMEDATALEN %d.",
4371  errhint("It looks like you need to recompile or initdb.")));
4373  ereport(FATAL,
4374  (errmsg("database files are incompatible with server"),
4375  errdetail("The database cluster was initialized with INDEX_MAX_KEYS %d,"
4376  " but the server was compiled with INDEX_MAX_KEYS %d.",
4378  errhint("It looks like you need to recompile or initdb.")));
4380  ereport(FATAL,
4381  (errmsg("database files are incompatible with server"),
4382  errdetail("The database cluster was initialized with TOAST_MAX_CHUNK_SIZE %d,"
4383  " but the server was compiled with TOAST_MAX_CHUNK_SIZE %d.",
4385  errhint("It looks like you need to recompile or initdb.")));
4387  ereport(FATAL,
4388  (errmsg("database files are incompatible with server"),
4389  errdetail("The database cluster was initialized with LOBLKSIZE %d,"
4390  " but the server was compiled with LOBLKSIZE %d.",
4391  ControlFile->loblksize, (int) LOBLKSIZE),
4392  errhint("It looks like you need to recompile or initdb.")));
4393 
4394 #ifdef USE_FLOAT8_BYVAL
4395  if (ControlFile->float8ByVal != true)
4396  ereport(FATAL,
4397  (errmsg("database files are incompatible with server"),
4398  errdetail("The database cluster was initialized without USE_FLOAT8_BYVAL"
4399  " but the server was compiled with USE_FLOAT8_BYVAL."),
4400  errhint("It looks like you need to recompile or initdb.")));
4401 #else
4402  if (ControlFile->float8ByVal != false)
4403  ereport(FATAL,
4404  (errmsg("database files are incompatible with server"),
4405  errdetail("The database cluster was initialized with USE_FLOAT8_BYVAL"
4406  " but the server was compiled without USE_FLOAT8_BYVAL."),
4407  errhint("It looks like you need to recompile or initdb.")));
4408 #endif
4409 
4411 
4413  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4414  errmsg_plural("invalid WAL segment size in control file (%d byte)",
4415  "invalid WAL segment size in control file (%d bytes)",
4418  errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4419 
4420  snprintf(wal_segsz_str, sizeof(wal_segsz_str), "%d", wal_segment_size);
4421  SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4423 
4424  /* check and update variables dependent on wal_segment_size */
4426  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4427  errmsg("min_wal_size must be at least twice wal_segment_size")));
4428 
4430  ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4431  errmsg("max_wal_size must be at least twice wal_segment_size")));
4432 
4434  (wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) -
4436 
4438 
4439  /* Make the initdb settings visible as GUC variables, too */
4440  SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4442 }
#define PG_BINARY
Definition: c.h:1260
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1182
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1087
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4275
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:110
@ PGC_INTERNAL
Definition: guc.h:69
#define TOAST_MAX_CHUNK_SIZE
Definition: heaptoast.h:84
#define read(a, b, c)
Definition: win32.h:13
#define LOBLKSIZE
Definition: large_object.h:70
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define FLOATFORMAT_VALUE
Definition: pg_control.h:200
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
uint32 pg_control_version
Definition: pg_control.h:124
uint32 xlog_seg_size
Definition: pg_control.h:210
uint32 nameDataLen
Definition: pg_control.h:212
uint32 indexMaxKeys
Definition: pg_control.h:213
uint32 relseg_size
Definition: pg_control.h:207
uint32 catalog_version_no
Definition: pg_control.h:125
double floatFormat
Definition: pg_control.h:199
uint32 xlog_blcksz
Definition: pg_control.h:209
uint32 loblksize
Definition: pg_control.h:216
pg_crc32c crc
Definition: pg_control.h:231
uint32 toast_max_chunk_size
Definition: pg_control.h:215
#define UsableBytesInPage
Definition: xlog.c:599
bool DataChecksumsEnabled(void)
Definition: xlog.c:4478
static int UsableBytesInSegment
Definition: xlog.c:608
int min_wal_size_mb
Definition: xlog.c:115
#define XLOG_CONTROL_FILE

References BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6201 of file xlog.c.

6202 {
6203  /*
6204  * We check shared state each time only until we leave recovery mode. We
6205  * can't re-enter recovery, so there's no need to keep checking after the
6206  * shared variable has once been seen false.
6207  */
6209  return false;
6210  else
6211  {
6212  /*
6213  * use volatile pointer to make sure we make a fresh read of the
6214  * shared variable.
6215  */
6216  volatile XLogCtlData *xlogctl = XLogCtl;
6217 
6219 
6220  /*
6221  * Note: We don't need a memory barrier when we're still in recovery.
6222  * We might exit recovery immediately after return, so the caller
6223  * can't rely on 'true' meaning that we're still in recovery anyway.
6224  */
6225 
6226  return LocalRecoveryInProgress;
6227  }
6228 }
static bool LocalRecoveryInProgress
Definition: xlog.c:224

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), btree_index_mainfork_expected(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7384 of file xlog.c.

7385 {
7386  /*
7387  * Also refrain from creating a restartpoint if we have seen any
7388  * references to non-existent pages. Restarting recovery from the
7389  * restartpoint would not see the references, so we would lose the
7390  * cross-check that the pages belonged to a relation that was dropped
7391  * later.
7392  */
7393  if (XLogHaveInvalidPages())
7394  {
7395  elog(DEBUG2,
7396  "could not record restart point at %X/%X because there "
7397  "are unresolved references to invalid pages",
7398  LSN_FORMAT_ARGS(checkPoint->redo));
7399  return;
7400  }
7401 
7402  /*
7403  * Copy the checkpoint record to shared memory, so that checkpointer can
7404  * work out the next time it wants to perform a restartpoint.
7405  */
7409  XLogCtl->lastCheckPoint = *checkPoint;
7411 }
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool XLogHaveInvalidPages(void)
Definition: xlogutils.c:235

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9252 of file xlog.c.

9253 {
9254  static bool already_done = false;
9255 
9256  if (already_done)
9257  return;
9259  already_done = true;
9260 }
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), DatumGetBool(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3870 of file xlog.c.

3871 {
3872  DIR *xldir;
3873  struct dirent *xlde;
3874  char switchseg[MAXFNAMELEN];
3875  XLogSegNo endLogSegNo;
3876  XLogSegNo switchLogSegNo;
3877  XLogSegNo recycleSegNo;
3878 
3879  /*
3880  * Initialize info about where to begin the work. This will recycle,
3881  * somewhat arbitrarily, 10 future segments.
3882  */
3883  XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3884  XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3885  recycleSegNo = endLogSegNo + 10;
3886 
3887  /*
3888  * Construct a filename of the last segment to be kept.
3889  */
3890  XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3891 
3892  elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3893  switchseg);
3894 
3895  xldir = AllocateDir(XLOGDIR);
3896 
3897  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3898  {
3899  /* Ignore files that are not XLOG segments */
3900  if (!IsXLogFileName(xlde->d_name))
3901  continue;
3902 
3903  /*
3904  * Remove files that are on a timeline older than the new one we're
3905  * switching to, but with a segment number >= the first segment on the
3906  * new timeline.
3907  */
3908  if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
3909  strcmp(xlde->d_name + 8, switchseg + 8) > 0)
3910  {
3911  /*
3912  * If the file has already been marked as .ready, however, don't
3913  * remove it yet. It should be OK to remove it - files that are
3914  * not part of our timeline history are not required for recovery
3915  * - but seems safer to let them be archived and removed later.
3916  */
3917  if (!XLogArchiveIsReady(xlde->d_name))
3918  RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
3919  }
3920  }
3921 
3922  FreeDir(xldir);
3923 }
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:3939
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3795 of file xlog.c.

3797 {
3798  DIR *xldir;
3799  struct dirent *xlde;
3800  char lastoff[MAXFNAMELEN];
3801  XLogSegNo endlogSegNo;
3802  XLogSegNo recycleSegNo;
3803 
3804  /* Initialize info about where to try to recycle to */
3805  XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
3806  recycleSegNo = XLOGfileslop(lastredoptr);
3807 
3808  /*
3809  * Construct a filename of the last segment to be kept. The timeline ID
3810  * doesn't matter, we ignore that in the comparison. (During recovery,
3811  * InsertTimeLineID isn't set, so we can't use that.)
3812  */
3813  XLogFileName(lastoff, 0, segno, wal_segment_size);
3814 
3815  elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3816  lastoff);
3817 
3818  xldir = AllocateDir(XLOGDIR);
3819 
3820  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3821  {
3822  /* Ignore files that are not XLOG segments */
3823  if (!IsXLogFileName(xlde->d_name) &&
3824  !IsPartialXLogFileName(xlde->d_name))
3825  continue;
3826 
3827  /*
3828  * We ignore the timeline part of the XLOG segment identifiers in
3829  * deciding whether a segment is still needed. This ensures that we
3830  * won't prematurely remove a segment from a parent timeline. We could
3831  * probably be a little more proactive about removing segments of
3832  * non-parent timelines, but that would be a whole lot more
3833  * complicated.
3834  *
3835  * We use the alphanumeric sorting property of the filenames to decide
3836  * which ones are earlier than the lastoff segment.
3837  */
3838  if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3839  {
3840  if (XLogArchiveCheckDone(xlde->d_name))
3841  {
3842  /* Update the last removed location in shared memory first */
3844 
3845  RemoveXlogFile(xlde, recycleSegNo, &endlogSegNo, insertTLI);
3846  }
3847  }
3848  }
3849 
3850  FreeDir(xldir);
3851 }
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition: xlog.c:2199
static void UpdateLastRemovedPtr(char *filename)
Definition: xlog.c:3742
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3762 of file xlog.c.

3763 {
3764  DIR *xldir;
3765  struct dirent *xlde;
3766 
3767  elog(DEBUG2, "removing all temporary WAL segments");
3768 
3769  xldir = AllocateDir(XLOGDIR);
3770  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3771  {
3772  char path[MAXPGPATH];
3773 
3774  if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3775  continue;
3776 
3777  snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3778  unlink(path);
3779  elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3780  }
3781  FreeDir(xldir);
3782 }

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 3939 of file xlog.c.

3942 {
3943  char path[MAXPGPATH];
3944 #ifdef WIN32
3945  char newpath[MAXPGPATH];
3946 #endif
3947  const char *segname = segment_de->d_name;
3948 
3949  snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
3950 
3951  /*
3952  * Before deleting the file, see if it can be recycled as a future log
3953  * segment. Only recycle normal files, because we don't want to recycle
3954  * symbolic links pointing to a separate archive directory.
3955  */
3956  if (wal_recycle &&
3957  *endlogSegNo <= recycleSegNo &&
3958  XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
3959  get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
3960  InstallXLogFileSegment(endlogSegNo, path,
3961  true, recycleSegNo, insertTLI))
3962  {
3963  ereport(DEBUG2,
3964  (errmsg_internal("recycled write-ahead log file \"%s\"",
3965  segname)));
3967  /* Needn't recheck that slot on future iterations */
3968  (*endlogSegNo)++;
3969  }
3970  else
3971  {
3972  /* No need for any more future segments, or recycling failed ... */
3973  int rc;
3974 
3975  ereport(DEBUG2,
3976  (errmsg_internal("removing write-ahead log file \"%s\"",
3977  segname)));
3978 
3979 #ifdef WIN32
3980 
3981  /*
3982  * On Windows, if another process (e.g another backend) holds the file
3983  * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
3984  * will still show up in directory listing until the last handle is
3985  * closed. To avoid confusing the lingering deleted file for a live
3986  * WAL file that needs to be archived, rename it before deleting it.
3987  *
3988  * If another process holds the file open without FILE_SHARE_DELETE
3989  * flag, rename will fail. We'll try again at the next checkpoint.
3990  */
3991  snprintf(newpath, MAXPGPATH, "%s.deleted", path);
3992  if (rename(path, newpath) != 0)
3993  {
3994  ereport(LOG,
3996  errmsg("could not rename file \"%s\": %m",
3997  path)));
3998  return;
3999  }
4000  rc = durable_unlink(newpath, LOG);
4001 #else
4002  rc = durable_unlink(path, LOG);
4003 #endif
4004  if (rc != 0)
4005  {
4006  /* Message already logged by durable_unlink() */
4007  return;
4008  }
4010  }
4011 
4012  XLogArchiveCleanup(segname);
4013 }
@ PGFILETYPE_REG
Definition: file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition: xlog.c:3493
bool wal_recycle
Definition: xlog.c:128

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, dirent::d_name, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 7894 of file xlog.c.

7895 {
7896  XLogRecPtr RecPtr;
7897 
7898  /* XLOG SWITCH has no data */
7899  XLogBeginInsert();
7900 
7901  if (mark_unimportant)
7903  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
7904 
7905  return RecPtr;
7906 }
#define XLOG_SWITCH
Definition: pg_control.h:71
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:153
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1098 of file xlog.c.

1100 {
1102  uint64 startbytepos;
1103  uint64 endbytepos;
1104  uint64 prevbytepos;
1105 
1106  size = MAXALIGN(size);
1107 
1108  /* All (non xlog-switch) records should contain data. */
1110 
1111  /*
1112  * The duration the spinlock needs to be held is minimized by minimizing
1113  * the calculations that have to be done while holding the lock. The
1114  * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1115  * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1116  * page headers. The mapping between "usable" byte positions and physical
1117  * positions (XLogRecPtrs) can be done outside the locked region, and
1118  * because the usable byte position doesn't include any headers, reserving
1119  * X bytes from WAL is almost as simple as "CurrBytePos += X".
1120  */
1121  SpinLockAcquire(&Insert->insertpos_lck);
1122 
1123  startbytepos = Insert->CurrBytePos;
1124  endbytepos = startbytepos + size;
1125  prevbytepos = Insert->PrevBytePos;
1126  Insert->CurrBytePos = endbytepos;
1127  Insert->PrevBytePos = startbytepos;
1128 
1129  SpinLockRelease(&Insert->insertpos_lck);
1130 
1131  *StartPos = XLogBytePosToRecPtr(startbytepos);
1132  *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1133  *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1134 
1135  /*
1136  * Check that the conversions between "usable byte positions" and
1137  * XLogRecPtrs work consistently in both directions.
1138  */
1139  Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1140  Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1141  Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1142 }
#define MAXALIGN(LEN)
Definition: c.h:798
static pg_noinline void Size size
Definition: slab.c:607
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition: xlog.c:1857
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1900

References Assert(), XLogCtlData::Insert, Insert(), MAXALIGN, size, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1154 of file xlog.c.

1155 {
1157  uint64 startbytepos;
1158  uint64 endbytepos;
1159  uint64 prevbytepos;
1161  XLogRecPtr ptr;
1162  uint32 segleft;
1163 
1164  /*
1165  * These calculations are a bit heavy-weight to be done while holding a
1166  * spinlock, but since we're holding all the WAL insertion locks, there
1167  * are no other inserters competing for it. GetXLogInsertRecPtr() does
1168  * compete for it, but that's not called very frequently.
1169  */
1170  SpinLockAcquire(&Insert->insertpos_lck);
1171 
1172  startbytepos = Insert->CurrBytePos;
1173 
1174  ptr = XLogBytePosToEndRecPtr(startbytepos);
1175  if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1176  {
1177  SpinLockRelease(&Insert->insertpos_lck);
1178  *EndPos = *StartPos = ptr;
1179  return false;
1180  }
1181 
1182  endbytepos = startbytepos + size;
1183  prevbytepos = Insert->PrevBytePos;
1184 
1185  *StartPos = XLogBytePosToRecPtr(startbytepos);
1186  *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1187 
1188  segleft = wal_segment_size - XLogSegmentOffset(*EndPos, wal_segment_size);
1189  if (segleft != wal_segment_size)
1190  {
1191  /* consume the rest of the segment */
1192  *EndPos += segleft;
1193  endbytepos = XLogRecPtrToBytePos(*EndPos);
1194  }
1195  Insert->CurrBytePos = endbytepos;
1196  Insert->PrevBytePos = startbytepos;
1197 
1198  SpinLockRelease(&Insert->insertpos_lck);
1199 
1200  *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1201 
1202  Assert(XLogSegmentOffset(*EndPos, wal_segment_size) == 0);
1203  Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1204  Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1205  Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1206 
1207  return true;
1208 }

References Assert(), XLogCtlData::Insert, Insert(), MAXALIGN, size, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9317 of file xlog.c.

9318 {
9319  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9321  LWLockRelease(ControlFileLock);
9322 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9340 of file xlog.c.

9341 {
9343  XLogCtl->WalWriterSleeping = sleeping;
9345 }
bool WalWriterSleeping
Definition: xlog.c:535

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ show_archive_command()

const char* show_archive_command ( void  )

Definition at line 4700 of file xlog.c.

4701 {
4702  if (XLogArchivingActive())
4703  return XLogArchiveCommand;
4704  else
4705  return "(disabled)";
4706 }
char * XLogArchiveCommand
Definition: xlog.c:120

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_in_hot_standby()

const char* show_in_hot_standby ( void  )

Definition at line 4712 of file xlog.c.

4713 {
4714  /*
4715  * We display the actual state based on shared memory, so that this GUC
4716  * reports up-to-date state if examined intra-query. The underlying
4717  * variable (in_hot_standby_guc) changes only when we transmit a new value
4718  * to the client.
4719  */
4720  return RecoveryInProgress() ? "on" : "off";
4721 }

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6452 of file xlog.c.

6453 {
6454  /*
6455  * We should have an aux process resource owner to use, and we should not
6456  * be in a transaction that's installed some other resowner.
6457  */
6459  Assert(CurrentResourceOwner == NULL ||
6462 
6463  /* Don't be chatty in standalone mode */
6465  (errmsg("shutting down")));
6466 
6467  /*
6468  * Signal walsenders to move to stopping state.
6469  */
6471 
6472  /*
6473  * Wait for WAL senders to be in stopping state. This prevents commands
6474  * from writing new WAL.
6475  */
6477 
6478  if (RecoveryInProgress())
6480  else
6481  {
6482  /*
6483  * If archiving is enabled, rotate the last XLOG file so that all the
6484  * remaining records are archived (postmaster wakes up the archiver
6485  * process one more time at the end of shutdown). The checkpoint
6486  * record will go to the next XLOG file and won't be archived (yet).
6487  */
6488  if (XLogArchivingActive())
6489  RequestXLogSwitch(false);
6490 
6492  }
6493 }
bool IsPostmasterEnvironment
Definition: globals.c:116
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:168
void WalSndInitStopping(void)
Definition: walsender.c:3745
void WalSndWaitStopping(void)
Definition: walsender.c:3771
bool CreateRestartPoint(int flags)
Definition: xlog.c:7425
void CreateCheckPoint(int flags)
Definition: xlog.c:6734

References Assert(), AuxProcessResourceOwner, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by HandleCheckpointerInterrupts(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5307 of file xlog.c.

5308 {
5310  CheckPoint checkPoint;
5311  bool wasShutdown;
5312  bool didCrash;
5313  bool haveTblspcMap;
5314  bool haveBackupLabel;
5315  XLogRecPtr EndOfLog;
5316  TimeLineID EndOfLogTLI;
5317  TimeLineID newTLI;
5318  bool performedWalRecovery;
5319  EndOfWalRecoveryInfo *endOfRecoveryInfo;
5322  TransactionId oldestActiveXID;
5323  bool promoted = false;
5324 
5325  /*
5326  * We should have an aux process resource owner to use, and we should not
5327  * be in a transaction that's installed some other resowner.
5328  */
5330  Assert(CurrentResourceOwner == NULL ||
5333 
5334  /*
5335  * Check that contents look valid.
5336  */
5338  ereport(FATAL,
5339  (errmsg("control file contains invalid checkpoint location")));
5340 
5341  switch (ControlFile->state)
5342  {
5343  case DB_SHUTDOWNED:
5344 
5345  /*
5346  * This is the expected case, so don't be chatty in standalone
5347  * mode
5348  */
5350  (errmsg("database system was shut down at %s",
5351  str_time(ControlFile->time))));
5352  break;
5353 
5355  ereport(LOG,
5356  (errmsg("database system was shut down in recovery at %s",
5357  str_time(ControlFile->time))));
5358  break;
5359 
5360  case DB_SHUTDOWNING:
5361  ereport(LOG,
5362  (errmsg("database system shutdown was interrupted; last known up at %s",
5363  str_time(ControlFile->time))));
5364  break;
5365 
5366  case DB_IN_CRASH_RECOVERY:
5367  ereport(LOG,
5368  (errmsg("database system was interrupted while in recovery at %s",
5370  errhint("This probably means that some data is corrupted and"
5371  " you will have to use the last backup for recovery.")));
5372  break;
5373 
5375  ereport(LOG,
5376  (errmsg("database system was interrupted while in recovery at log time %s",
5378  errhint("If this has occurred more than once some data might be corrupted"
5379  " and you might need to choose an earlier recovery target.")));
5380  break;
5381 
5382  case DB_IN_PRODUCTION:
5383  ereport(LOG,
5384  (errmsg("database system was interrupted; last known up at %s",
5385  str_time(ControlFile->time))));
5386  break;
5387 
5388  default:
5389  ereport(FATAL,
5390  (errmsg("control file contains invalid database cluster state")));
5391  }
5392 
5393  /* This is just to allow attaching to startup process with a debugger */
5394 #ifdef XLOG_REPLAY_DELAY
5396  pg_usleep(60000000L);
5397 #endif
5398 
5399  /*
5400  * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5401  * In cases where someone has performed a copy for PITR, these directories
5402  * may have been excluded and need to be re-created.
5403  */
5405 
5406  /* Set up timeout handler needed to report startup progress. */
5410 
5411  /*----------
5412  * If we previously crashed, perform a couple of actions:
5413  *
5414  * - The pg_wal directory may still include some temporary WAL segments
5415  * used when creating a new segment, so perform some clean up to not
5416  * bloat this path. This is done first as there is no point to sync
5417  * this temporary data.
5418  *
5419  * - There might be data which we had written, intending to fsync it, but
5420  * which we had not actually fsync'd yet. Therefore, a power failure in
5421  * the near future might cause earlier unflushed writes to be lost, even
5422  * though more recent data written to disk from here on would be
5423  * persisted. To avoid that, fsync the entire data directory.
5424  */
5425  if (ControlFile->state != DB_SHUTDOWNED &&
5427  {
5430  didCrash = true;
5431  }
5432  else
5433  didCrash = false;
5434 
5435  /*
5436  * Prepare for WAL recovery if needed.
5437  *
5438  * InitWalRecovery analyzes the control file and the backup label file, if
5439  * any. It updates the in-memory ControlFile buffer according to the
5440  * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5441  * It also applies the tablespace map file, if any.
5442  */
5443  InitWalRecovery(ControlFile, &wasShutdown,
5444  &haveBackupLabel, &haveTblspcMap);
5445  checkPoint = ControlFile->checkPointCopy;
5446 
5447  /* initialize shared memory variables from the checkpoint record */
5448  TransamVariables->nextXid = checkPoint.nextXid;
5449  TransamVariables->nextOid = checkPoint.nextOid;
5451  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5452  AdvanceOldestClogXid(checkPoint.oldestXid);
5453  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5454  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5456  checkPoint.newestCommitTsXid);
5457  XLogCtl->ckptFullXid = checkPoint.nextXid;
5458 
5459  /*
5460  * Clear out any old relcache cache files. This is *necessary* if we do
5461  * any WAL replay, since that would probably result in the cache files
5462  * being out of sync with database reality. In theory we could leave them
5463  * in place if the database had been cleanly shut down, but it seems
5464  * safest to just remove them always and let them be rebuilt during the
5465  * first backend startup. These files needs to be removed from all
5466  * directories including pg_tblspc, however the symlinks are created only
5467  * after reading tablespace_map file in case of archive recovery from
5468  * backup, so needs to clear old relcache files here after creating
5469  * symlinks.
5470  */
5472 
5473  /*
5474  * Initialize replication slots, before there's a chance to remove
5475  * required resources.
5476  */
5478 
5479  /*
5480  * Startup logical state, needs to be setup now so we have proper data
5481  * during crash recovery.
5482  */
5484 
5485  /*
5486  * Startup CLOG. This must be done after TransamVariables->nextXid has
5487  * been initialized and before we accept connections or begin WAL replay.
5488  */
5489  StartupCLOG();
5490 
5491  /*
5492  * Startup MultiXact. We need to do this early to be able to replay
5493  * truncations.
5494  */
5495  StartupMultiXact();
5496 
5497  /*
5498  * Ditto for commit timestamps. Activate the facility if the setting is
5499  * enabled in the control file, as there should be no tracking of commit
5500  * timestamps done when the setting was disabled. This facility can be
5501  * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5502  */
5504  StartupCommitTs();
5505 
5506  /*
5507  * Recover knowledge about replay progress of known replication partners.
5508  */
5510 
5511  /*
5512  * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5513  * control file. On recovery, all unlogged relations are blown away, so
5514  * the unlogged LSN counter can be reset too.
5515  */
5519  else
5522 
5523  /*
5524  * Copy any missing timeline history files between 'now' and the recovery
5525  * target timeline from archive to pg_wal. While we don't need those files
5526  * ourselves - the history file of the recovery target timeline covers all
5527  * the previous timelines in the history too - a cascading standby server
5528  * might be interested in them. Or, if you archive the WAL from this
5529  * server to a different archive than the primary, it'd be good for all
5530  * the history files to get archived there after failover, so that you can
5531  * use one of the old timelines as a PITR target. Timeline history files
5532  * are small, so it's better to copy them unnecessarily than not copy them
5533  * and regret later.
5534  */
5536 
5537  /*
5538  * Before running in recovery, scan pg_twophase and fill in its status to
5539  * be able to work on entries generated by redo. Doing a scan before
5540  * taking any recovery action has the merit to discard any 2PC files that
5541  * are newer than the first record to replay, saving from any conflicts at
5542  * replay. This avoids as well any subsequent scans when doing recovery
5543  * of the on-disk two-phase data.
5544  */
5546 
5547  /*
5548  * When starting with crash recovery, reset pgstat data - it might not be
5549  * valid. Otherwise restore pgstat data. It's safe to do this here,
5550  * because postmaster will not yet have started any other processes.
5551  *
5552  * NB: Restoring replication slot stats relies on slot state to have
5553  * already been restored from disk.
5554  *
5555  * TODO: With a bit of extra work we could just start with a pgstat file
5556  * associated with the checkpoint redo location we're starting from.
5557  */
5558  if (didCrash)
5560  else
5562 
5563  lastFullPageWrites = checkPoint.fullPageWrites;
5564 
5567 
5568  /* REDO */
5569  if (InRecovery)
5570  {
5571  /* Initialize state for RecoveryInProgress() */
5573  if (InArchiveRecovery)
5575  else
5578 
5579  /*
5580  * Update pg_control to show that we are recovering and to show the
5581  * selected checkpoint as the place we are starting from. We also mark
5582  * pg_control with any minimum recovery stop point obtained from a
5583  * backup history file.
5584  *
5585  * No need to hold ControlFileLock yet, we aren't up far enough.
5586  */
5588 
5589  /*
5590  * If there was a backup label file, it's done its job and the info
5591  * has now been propagated into pg_control. We must get rid of the
5592  * label file so that if we crash during recovery, we'll pick up at
5593  * the latest recovery restartpoint instead of going all the way back
5594  * to the backup start point. It seems prudent though to just rename
5595  * the file out of the way rather than delete it completely.
5596  */
5597  if (haveBackupLabel)
5598  {
5599  unlink(BACKUP_LABEL_OLD);
5601  }
5602 
5603  /*
5604  * If there was a tablespace_map file, it's done its job and the
5605  * symlinks have been created. We must get rid of the map file so
5606  * that if we crash during recovery, we don't create symlinks again.
5607  * It seems prudent though to just rename the file out of the way
5608  * rather than delete it completely.
5609  */
5610  if (haveTblspcMap)
5611  {
5612  unlink(TABLESPACE_MAP_OLD);
5614  }
5615 
5616  /*
5617  * Initialize our local copy of minRecoveryPoint. When doing crash
5618  * recovery we want to replay up to the end of WAL. Particularly, in
5619  * the case of a promoted standby minRecoveryPoint value in the
5620  * control file is only updated after the first checkpoint. However,
5621  * if the instance crashes before the first post-recovery checkpoint
5622  * is completed then recovery will use a stale location causing the
5623  * startup process to think that there are still invalid page
5624  * references when checking for data consistency.
5625  */
5626  if (InArchiveRecovery)
5627  {
5630  }
5631  else
5632  {
5635  }
5636 
5637  /* Check that the GUCs used to generate the WAL allow recovery */
5639 
5640  /*
5641  * We're in recovery, so unlogged relations may be trashed and must be
5642  * reset. This should be done BEFORE allowing Hot Standby
5643  * connections, so that read-only backends don't try to read whatever
5644  * garbage is left over from before.
5645  */
5647 
5648  /*
5649  * Likewise, delete any saved transaction snapshot files that got left
5650  * behind by crashed backends.
5651  */
5653 
5654  /*
5655  * Initialize for Hot Standby, if enabled. We won't let backends in
5656  * yet, not until we've reached the min recovery point specified in
5657  * control file and we've established a recovery snapshot from a
5658  * running-xacts WAL record.
5659  */
5661  {
5662  TransactionId *xids;
5663  int nxids;
5664 
5665  ereport(DEBUG1,
5666  (errmsg_internal("initializing for hot standby")));
5667 
5669 
5670  if (wasShutdown)
5671  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5672  else
5673  oldestActiveXID = checkPoint.oldestActiveXid;
5674  Assert(TransactionIdIsValid(oldestActiveXID));
5675 
5676  /* Tell procarray about the range of xids it has to deal with */
5678 
5679  /*
5680  * Startup subtrans only. CLOG, MultiXact and commit timestamp
5681  * have already been started up and other SLRUs are not maintained
5682  * during recovery and need not be started yet.
5683  */
5684  StartupSUBTRANS(oldestActiveXID);
5685 
5686  /*
5687  * If we're beginning at a shutdown checkpoint, we know that
5688  * nothing was running on the primary at this point. So fake-up an
5689  * empty running-xacts record and use that here and now. Recover
5690  * additional standby state for prepared transactions.
5691  */
5692  if (wasShutdown)
5693  {
5694  RunningTransactionsData running;
5695  TransactionId latestCompletedXid;
5696 
5697  /*
5698  * Construct a RunningTransactions snapshot representing a
5699  * shut down server, with only prepared transactions still
5700  * alive. We're never overflowed at this point because all
5701  * subxids are listed with their parent prepared transactions.
5702  */
5703  running.xcnt = nxids;
5704  running.subxcnt = 0;
5705  running.subxid_overflow = false;
5706  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5707  running.oldestRunningXid = oldestActiveXID;
5708  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5709  TransactionIdRetreat(latestCompletedXid);
5710  Assert(TransactionIdIsNormal(latestCompletedXid));
5711  running.latestCompletedXid = latestCompletedXid;
5712  running.xids = xids;
5713 
5714  ProcArrayApplyRecoveryInfo(&running);
5715 
5717  }
5718  }
5719 
5720  /*
5721  * We're all set for replaying the WAL now. Do it.
5722  */
5724  performedWalRecovery = true;
5725  }
5726  else
5727  performedWalRecovery = false;
5728 
5729  /*
5730  * Finish WAL recovery.
5731  */
5732  endOfRecoveryInfo = FinishWalRecovery();
5733  EndOfLog = endOfRecoveryInfo->endOfLog;
5734  EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5735  abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5736  missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5737 
5738  /*
5739  * Reset ps status display, so as no information related to recovery shows
5740  * up.
5741  */
5742  set_ps_display("");
5743 
5744  /*
5745  * When recovering from a backup (we are in recovery, and archive recovery
5746  * was requested), complain if we did not roll forward far enough to reach
5747  * the point where the database is consistent. For regular online
5748  * backup-from-primary, that means reaching the end-of-backup WAL record
5749  * (at which point we reset backupStartPoint to be Invalid), for
5750  * backup-from-replica (which can't inject records into the WAL stream),
5751  * that point is when we reach the minRecoveryPoint in pg_control (which
5752  * we purposefully copy last when backing up from a replica). For
5753  * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5754  * or snapshot-style backups (which don't), backupEndRequired will be set
5755  * to false.
5756  *
5757  * Note: it is indeed okay to look at the local variable
5758  * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5759  * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5760  * been advanced beyond the WAL we processed.
5761  */
5762  if (InRecovery &&
5763  (EndOfLog < LocalMinRecoveryPoint ||
5765  {
5766  /*
5767  * Ran off end of WAL before reaching end-of-backup WAL record, or
5768  * minRecoveryPoint. That's a bad sign, indicating that you tried to
5769  * recover from an online backup but never called pg_backup_stop(), or
5770  * you didn't archive all the WAL needed.
5771  */
5773  {
5775  ereport(FATAL,
5776  (errmsg("WAL ends before end of online backup"),
5777  errhint("All WAL generated while online backup was taken must be available at recovery.")));
5778  else
5779  ereport(FATAL,
5780  (errmsg("WAL ends before consistent recovery point")));
5781  }
5782  }
5783 
5784  /*
5785  * Reset unlogged relations to the contents of their INIT fork. This is
5786  * done AFTER recovery is complete so as to include any unlogged relations
5787  * created during recovery, but BEFORE recovery is marked as having
5788  * completed successfully. Otherwise we'd not retry if any of the post
5789  * end-of-recovery steps fail.
5790  */
5791  if (InRecovery)
5793 
5794  /*
5795  * Pre-scan prepared transactions to find out the range of XIDs present.
5796  * This information is not quite needed yet, but it is positioned here so
5797  * as potential problems are detected before any on-disk change is done.
5798  */
5799  oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5800 
5801  /*
5802  * Allow ordinary WAL segment creation before possibly switching to a new
5803  * timeline, which creates a new segment, and after the last ReadRecord().
5804  */
5806 
5807  /*
5808  * Consider whether we need to assign a new timeline ID.
5809  *
5810  * If we did archive recovery, we always assign a new ID. This handles a
5811  * couple of issues. If we stopped short of the end of WAL during
5812  * recovery, then we are clearly generating a new timeline and must assign
5813  * it a unique new ID. Even if we ran to the end, modifying the current
5814  * last segment is problematic because it may result in trying to
5815  * overwrite an already-archived copy of that segment, and we encourage
5816  * DBAs to make their archive_commands reject that. We can dodge the
5817  * problem by making the new active segment have a new timeline ID.
5818  *
5819  * In a normal crash recovery, we can just extend the timeline we were in.
5820  */
5821  newTLI = endOfRecoveryInfo->lastRecTLI;
5823  {
5824  newTLI = findNewestTimeLine(recoveryTargetTLI) + 1;
5825  ereport(LOG,
5826  (errmsg("selected new timeline ID: %u", newTLI)));
5827 
5828  /*
5829  * Make a writable copy of the last WAL segment. (Note that we also
5830  * have a copy of the last block of the old WAL in
5831  * endOfRecovery->lastPage; we will use that below.)
5832  */
5833  XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
5834 
5835  /*
5836  * Remove the signal files out of the way, so that we don't
5837  * accidentally re-enter archive recovery mode in a subsequent crash.
5838  */
5839  if (endOfRecoveryInfo->standby_signal_file_found)
5841 
5842  if (endOfRecoveryInfo->recovery_signal_file_found)
5844 
5845  /*
5846  * Write the timeline history file, and have it archived. After this
5847  * point (or rather, as soon as the file is archived), the timeline
5848  * will appear as "taken" in the WAL archive and to any standby
5849  * servers. If we crash before actually switching to the new
5850  * timeline, standby servers will nevertheless think that we switched
5851  * to the new timeline, and will try to connect to the new timeline.
5852  * To minimize the window for that, try to do as little as possible
5853  * between here and writing the end-of-recovery record.
5854  */
5856  EndOfLog, endOfRecoveryInfo->recoveryStopReason);
5857 
5858  ereport(LOG,
5859  (errmsg("archive recovery complete")));
5860  }
5861 
5862  /* Save the selected TimeLineID in shared memory, too */
5863  XLogCtl->InsertTimeLineID = newTLI;
5864  XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
5865 
5866  /*
5867  * Actually, if WAL ended in an incomplete record, skip the parts that
5868  * made it through and start writing after the portion that persisted.
5869  * (It's critical to first write an OVERWRITE_CONTRECORD message, which
5870  * we'll do as soon as we're open for writing new WAL.)
5871  */
5873  {
5874  /*
5875  * We should only have a missingContrecPtr if we're not switching to a
5876  * new timeline. When a timeline switch occurs, WAL is copied from the
5877  * old timeline to the new only up to the end of the last complete
5878  * record, so there can't be an incomplete WAL record that we need to
5879  * disregard.
5880  */
5881  Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
5883  EndOfLog = missingContrecPtr;
5884  }
5885 
5886  /*
5887  * Prepare to write WAL starting at EndOfLog location, and init xlog
5888  * buffer cache using the block containing the last record from the
5889  * previous incarnation.
5890  */
5891  Insert = &XLogCtl->Insert;
5892  Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
5893  Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
5894 
5895  /*
5896  * Tricky point here: lastPage contains the *last* block that the LastRec
5897  * record spans, not the one it starts in. The last block is indeed the
5898  * one we want to use.
5899  */
5900  if (EndOfLog % XLOG_BLCKSZ != 0)
5901  {
5902  char *page;
5903  int len;
5904  int firstIdx;
5905 
5906  firstIdx = XLogRecPtrToBufIdx(EndOfLog);
5907  len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
5908  Assert(len < XLOG_BLCKSZ);
5909 
5910  /* Copy the valid part of the last block, and zero the rest */
5911  page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
5912  memcpy(page, endOfRecoveryInfo->lastPage, len);
5913  memset(page + len, 0, XLOG_BLCKSZ - len);
5914 
5915  pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
5916  XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
5917  }
5918  else
5919  {
5920  /*
5921  * There is no partial block to copy. Just set InitializedUpTo, and
5922  * let the first attempt to insert a log record to initialize the next
5923  * buffer.
5924  */
5925  XLogCtl->InitializedUpTo = EndOfLog;
5926  }
5927 
5928  LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
5929 
5931 
5932  XLogCtl->LogwrtRqst.Write = EndOfLog;
5933  XLogCtl->LogwrtRqst.Flush = EndOfLog;
5934 
5935  /*
5936  * Preallocate additional log files, if wanted.
5937  */
5938  PreallocXlogFiles(EndOfLog, newTLI);
5939 
5940  /*
5941  * Okay, we're officially UP.
5942  */
5943  InRecovery = false;
5944 
5945  /* start the archive_timeout timer and LSN running */
5946  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
5947  XLogCtl->lastSegSwitchLSN = EndOfLog;
5948 
5949  /* also initialize latestCompletedXid, to nextXid - 1 */
5950  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
5953  LWLockRelease(ProcArrayLock);
5954 
5955  /*
5956  * Start up subtrans, if not already done for hot standby. (commit
5957  * timestamps are started below, if necessary.)
5958  */
5960  StartupSUBTRANS(oldestActiveXID);
5961 
5962  /*
5963  * Perform end of recovery actions for any SLRUs that need it.
5964  */
5965  TrimCLOG();
5966  TrimMultiXact();
5967 
5968  /*
5969  * Reload shared-memory state for prepared transactions. This needs to
5970  * happen before renaming the last partial segment of the old timeline as
5971  * it may be possible that we have to recovery some transactions from it.
5972  */
5974 
5975  /* Shut down xlogreader */
5977 
5978  /* Enable WAL writes for this backend only. */
5980 
5981  /* If necessary, write overwrite-contrecord before doing anything else */
5983  {
5986  }
5987 
5988  /*
5989  * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
5990  * record before resource manager writes cleanup WAL records or checkpoint
5991  * record is written.
5992  */
5993  Insert->fullPageWrites = lastFullPageWrites;
5995 
5996  /*
5997  * Emit checkpoint or end-of-recovery record in XLOG, if required.
5998  */
5999  if (performedWalRecovery)
6000  promoted = PerformRecoveryXLogAction();
6001 
6002  /*
6003  * If any of the critical GUCs have changed, log them before we allow
6004  * backends to write WAL.
6005  */
6007 
6008  /* If this is archive recovery, perform post-recovery cleanup actions. */
6010  CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6011 
6012  /*
6013  * Local WAL inserts enabled, so it's time to finish initialization of
6014  * commit timestamp.
6015  */
6017 
6018  /*
6019  * All done with end-of-recovery actions.
6020  *
6021  * Now allow backends to write WAL and update the control file status in
6022  * consequence. SharedRecoveryState, that controls if backends can write
6023  * WAL, is updated while holding ControlFileLock to prevent other backends
6024  * to look at an inconsistent state of the control file in shared memory.
6025  * There is still a small window during which backends can write WAL and
6026  * the control file is still referring to a system not in DB_IN_PRODUCTION
6027  * state while looking at the on-disk control file.
6028  *
6029  * Also, we use info_lck to update SharedRecoveryState to ensure that
6030  * there are no race conditions concerning visibility of other recent
6031  * updates to shared memory.
6032  */
6033  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6035 
6039 
6041  LWLockRelease(ControlFileLock);
6042 
6043  /*
6044  * Shutdown the recovery environment. This must occur after
6045  * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6046  * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6047  * any session building a snapshot will not rely on KnownAssignedXids as
6048  * RecoveryInProgress() would return false at this stage. This is
6049  * particularly critical for prepared 2PC transactions, that would still
6050  * need to be included in snapshots once recovery has ended.
6051  */
6054 
6055  /*
6056  * If there were cascading standby servers connected to us, nudge any wal
6057  * sender processes to notice that we've been promoted.
6058  */
6059  WalSndWakeup(true, true);
6060 
6061  /*
6062  * If this was a promotion, request an (online) checkpoint now. This isn't
6063  * required for consistency, but the last restartpoint might be far back,
6064  * and in case of a crash, recovering from it might take a longer than is
6065  * appropriate now that we're not in standby mode anymore.
6066  */
6067  if (promoted)
6069 }
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:489
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:639
void StartupCLOG(void)
Definition: clog.c:877
void TrimCLOG(void)
Definition: clog.c:892
void StartupCommitTs(void)
Definition: commit_ts.c:632
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:642
void SyncDataDirectory(void)
Definition: fd.c:3544
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:451
void TrimMultiXact(void)
Definition: multixact.c:2103
void StartupMultiXact(void)
Definition: multixact.c:2078
void StartupReplicationOrigin(void)
Definition: origin.c:699
@ DB_IN_PRODUCTION
Definition: pg_control.h:96
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:94
const void size_t len
void pgstat_restore_stats(void)
Definition: pgstat.c:407
void pgstat_discard_stats(void)
Definition: pgstat.c:419
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6775
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:1874
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1567
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:94
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:160
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:111
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:120
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:121
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:109
TransactionId oldestRunningXid
Definition: standby.h:84
TransactionId nextXid
Definition: standby.h:83
TransactionId latestCompletedXid
Definition: standby.h:87
TransactionId * xids
Definition: standby.h:89
FullTransactionId latestCompletedXid
Definition: transam.h:238
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:309
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2084
void restoreTwoPhaseData(void)
Definition: twophase.c:1898
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1962
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2043
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3666
void UpdateFullPageWrites(void)
Definition: xlog.c:8000
static char * str_time(pg_time_t tnow)
Definition: xlog.c:5085
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4029
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7274
static void XLogReportParameters(void)
Definition: xlog.c:7937
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6151
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5175
static bool lastFullPageWrites
Definition: xlog.c:217
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5100
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5264
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3762
#define TABLESPACE_MAP_OLD
Definition: xlog.h:302
#define TABLESPACE_MAP
Definition: xlog.h:301
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:297
#define BACKUP_LABEL_OLD
Definition: xlog.h:299
#define BACKUP_LABEL_FILE
Definition: xlog.h:298
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:296
@ RECOVERY_STATE_CRASH
Definition: xlog.h:89
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:90
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
Definition: xlogrecovery.c:138
void PerformWalRecovery(void)
EndOfWalRecoveryInfo * FinishWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:373
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:372
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:512
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:122
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:49

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert(), AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::LogwrtResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_overflow, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow)
static

Definition at line 5085 of file xlog.c.

5086 {
5087  static char buf[128];
5088 
5089  pg_strftime(buf, sizeof(buf),
5090  "%Y-%m-%d %H:%M:%S %Z",
5091  pg_localtime(&tnow, log_timezone));
5092 
5093  return buf;
5094 }
static char * buf
Definition: pg_test_fsync.c:73
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1344
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
PGDLLIMPORT pg_tz * log_timezone
Definition: pgtz.c:31

References buf, log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6076 of file xlog.c.

6077 {
6078  /* initialize minRecoveryPoint to this record */
6079  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6081  if (ControlFile->minRecoveryPoint < EndRecPtr)
6082  {
6083  ControlFile->minRecoveryPoint = EndRecPtr;
6084  ControlFile->minRecoveryPointTLI = replayTLI;
6085  }
6086  /* update local copy */
6089 
6090  /*
6091  * The startup process can update its local copy of minRecoveryPoint from
6092  * this point.
6093  */
6094  updateMinRecoveryPoint = true;
6095 
6097 
6098  /*
6099  * We update SharedRecoveryState while holding the lock on ControlFileLock
6100  * so both states are consistent in shared memory.
6101  */
6105 
6106  LWLockRelease(ControlFileLock);
6107 }
static bool updateMinRecoveryPoint
Definition: xlog.c:637

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 6672 of file xlog.c.

6673 {
6674  /*
6675  * The status is reported only for end-of-recovery and shutdown
6676  * checkpoints or shutdown restartpoints. Updating the ps display is
6677  * useful in those situations as it may not be possible to rely on
6678  * pg_stat_activity to see the status of the checkpointer or the startup
6679  * process.
6680  */
6681  if ((flags & (CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IS_SHUTDOWN)) == 0)
6682  return;
6683 
6684  if (reset)
6685  set_ps_display("");
6686  else
6687  {
6688  char activitymsg[128];
6689 
6690  snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
6691  (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
6692  (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
6693  restartpoint ? "restartpoint" : "checkpoint");
6694  set_ps_display(activitymsg);
6695  }
6696 }

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6634 of file xlog.c.

6635 {
6636  /*
6637  * To estimate the number of segments consumed between checkpoints, keep a
6638  * moving average of the amount of WAL generated in previous checkpoint
6639  * cycles. However, if the load is bursty, with quiet periods and busy
6640  * periods, we want to cater for the peak load. So instead of a plain
6641  * moving average, let the average decline slowly if the previous cycle
6642  * used less WAL than estimated, but bump it up immediately if it used
6643  * more.
6644  *
6645  * When checkpoints are triggered by max_wal_size, this should converge to
6646  * CheckpointSegments * wal_segment_size,
6647  *
6648  * Note: This doesn't pay any attention to what caused the checkpoint.
6649  * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6650  * starting a base backup, are counted the same as those created
6651  * automatically. The slow-decline will largely mask them out, if they are
6652  * not frequent. If they are frequent, it seems reasonable to count them
6653  * in as any others; if you issue a manual checkpoint every 5 minutes and
6654  * never let a timed checkpoint happen, it makes sense to base the
6655  * preallocation on that 5 minute interval rather than whatever
6656  * checkpoint_timeout is set to.
6657  */
6658  PrevCheckPointDistance = nbytes;
6659  if (CheckPointDistanceEstimate < nbytes)
6660  CheckPointDistanceEstimate = nbytes;
6661  else
6663  (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
6664 }

References CheckPointDistanceEstimate, and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4449 of file xlog.c.

4450 {
4452 }
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8000 of file xlog.c.

8001 {
8003  bool recoveryInProgress;
8004 
8005  /*
8006  * Do nothing if full_page_writes has not been changed.
8007  *
8008  * It's safe to check the shared full_page_writes without the lock,
8009  * because we assume that there is no concurrently running process which
8010  * can update it.
8011  */
8012  if (fullPageWrites == Insert->fullPageWrites)
8013  return;
8014 
8015  /*
8016  * Perform this outside critical section so that the WAL insert
8017  * initialization done by RecoveryInProgress() doesn't trigger an
8018  * assertion failure.
8019  */
8020  recoveryInProgress = RecoveryInProgress();
8021 
8023 
8024  /*
8025  * It's always safe to take full page images, even when not strictly
8026  * required, but not the other round. So if we're setting full_page_writes
8027  * to true, first set it true and then write the WAL record. If we're
8028  * setting it to false, first write the WAL record and then set the global
8029  * flag.
8030  */
8031  if (fullPageWrites)
8032  {
8034  Insert->fullPageWrites = true;
8036  }
8037 
8038  /*
8039  * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8040  * full_page_writes during archive recovery, if required.
8041  */
8042  if (XLogStandbyInfoActive() && !recoveryInProgress)
8043  {
8044  XLogBeginInsert();
8045  XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
8046 
8047  XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8048  }
8049 
8050  if (!fullPageWrites)
8051  {
8053  Insert->fullPageWrites = false;
8055  }
8056  END_CRIT_SECTION();
8057 }
#define XLOG_FPW_CHANGE
Definition: pg_control.h:75

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char *  filename)
static

Definition at line 3742 of file xlog.c.

3743 {
3744  uint32 tli;
3745  XLogSegNo segno;
3746 
3747  XLogFromFileName(filename, &tli, &segno, wal_segment_size);
3748 
3750  if (segno > XLogCtl->lastRemovedSegNo)
3751  XLogCtl->lastRemovedSegNo = segno;
3753 }
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2648 of file xlog.c.

2649 {
2650  /* Quick check using our local copy of the variable */
2651  if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2652  return;
2653 
2654  /*
2655  * An invalid minRecoveryPoint means that we need to recover all the WAL,
2656  * i.e., we're doing crash recovery. We never modify the control file's
2657  * value in that case, so we can short-circuit future checks here too. The
2658  * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2659  * updated until crash recovery finishes. We only do this for the startup
2660  * process as it should not update its own reference of minRecoveryPoint
2661  * until it has finished crash recovery to make sure that all WAL
2662  * available is replayed in this case. This also saves from extra locks
2663  * taken on the control file from the startup process.
2664  */
2666  {
2667  updateMinRecoveryPoint = false;
2668  return;
2669  }
2670 
2671  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
2672 
2673  /* update local copy */
2676 
2678  updateMinRecoveryPoint = false;
2679  else if (force || LocalMinRecoveryPoint < lsn)
2680  {
2681  XLogRecPtr newMinRecoveryPoint;
2682  TimeLineID newMinRecoveryPointTLI;
2683 
2684  /*
2685  * To avoid having to update the control file too often, we update it
2686  * all the way to the last record being replayed, even though 'lsn'
2687  * would suffice for correctness. This also allows the 'force' case
2688  * to not need a valid 'lsn' value.
2689  *
2690  * Another important reason for doing it this way is that the passed
2691  * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2692  * the caller got it from a corrupted heap page. Accepting such a
2693  * value as the min recovery point would prevent us from coming up at
2694  * all. Instead, we just log a warning and continue with recovery.
2695  * (See also the comments about corrupt LSNs in XLogFlush.)
2696  */
2697  newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
2698  if (!force && newMinRecoveryPoint < lsn)
2699  elog(WARNING,
2700  "xlog min recovery request %X/%X is past current point %X/%X",
2701  LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
2702 
2703  /* update control file */
2704  if (ControlFile->minRecoveryPoint < newMinRecoveryPoint)
2705  {
2706  ControlFile->minRecoveryPoint = newMinRecoveryPoint;
2707  ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
2709  LocalMinRecoveryPoint = newMinRecoveryPoint;
2710  LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
2711 
2712  ereport(DEBUG2,
2713  (errmsg_internal("updated min recovery point to %X/%X on timeline %u",
2714  LSN_FORMAT_ARGS(newMinRecoveryPoint),
2715  newMinRecoveryPointTLI)));
2716  }
2717  }
2718  LWLockRelease(ControlFileLock);
2719 }
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsInvalid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4029 of file xlog.c.

4030 {
4031  char path[MAXPGPATH];
4032  struct stat stat_buf;
4033 
4034  /* Check for pg_wal; if it doesn't exist, error out */
4035  if (stat(XLOGDIR, &stat_buf) != 0 ||
4036  !S_ISDIR(stat_buf.st_mode))
4037  ereport(FATAL,
4038  (errmsg("required WAL directory \"%s\" does not exist",
4039  XLOGDIR)));
4040 
4041  /* Check for archive_status */
4042  snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4043  if (stat(path, &stat_buf) == 0)
4044  {
4045  /* Check for weird cases where it exists but isn't a directory */
4046  if (!S_ISDIR(stat_buf.st_mode))
4047  ereport(FATAL,
4048  (errmsg("required WAL directory \"%s\" does not exist",
4049  path)));
4050  }
4051  else
4052  {
4053  ereport(LOG,
4054  (errmsg("creating missing WAL directory \"%s\"", path)));
4055  if (MakePGDirectory(path) < 0)
4056  ereport(FATAL,
4057  (errmsg("could not create missing directory \"%s\": %m",
4058  path)));
4059  }
4060 
4061  /* Check for summaries */
4062  snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4063  if (stat(path, &stat_buf) == 0)
4064  {
4065  /* Check for weird cases where it exists but isn't a directory */
4066  if (!S_ISDIR(stat_buf.st_mode))
4067  ereport(FATAL,
4068  (errmsg("required WAL directory \"%s\" does not exist",
4069  path)));
4070  }
4071  else
4072  {
4073  ereport(LOG,
4074  (errmsg("creating missing WAL directory \"%s\"", path)));
4075  if (MakePGDirectory(path) < 0)
4076  ereport(FATAL,
4077  (errmsg("could not create missing directory \"%s\": %m",
4078  path)));
4079  }
4080 }
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3913
#define S_ISDIR(m)
Definition: win32_port.h:325

References ereport, errmsg(), FATAL, LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat::st_mode, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1492 of file xlog.c.

1493 {
1494  uint64 bytepos;
1495  XLogRecPtr reservedUpto;
1496  XLogRecPtr finishedUpto;
1498  int i;
1499 
1500  if (MyProc == NULL)
1501  elog(PANIC, "cannot wait without a PGPROC structure");
1502 
1503  /* Read the current insert position */
1504  SpinLockAcquire(&Insert->insertpos_lck);
1505  bytepos = Insert->CurrBytePos;
1506  SpinLockRelease(&Insert->insertpos_lck);
1507  reservedUpto = XLogBytePosToEndRecPtr(bytepos);
1508 
1509  /*
1510  * No-one should request to flush a piece of WAL that hasn't even been
1511  * reserved yet. However, it can happen if there is a block with a bogus
1512  * LSN on disk, for example. XLogFlush checks for that situation and
1513  * complains, but only after the flush. Here we just assume that to mean
1514  * that all WAL that has been reserved needs to be finished. In this
1515  * corner-case, the return value can be smaller than 'upto' argument.
1516  */
1517  if (upto > reservedUpto)
1518  {
1519  ereport(LOG,
1520  (errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X",
1521  LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))));
1522  upto = reservedUpto;
1523  }
1524 
1525  /*
1526  * Loop through all the locks, sleeping on any in-progress insert older
1527  * than 'upto'.
1528  *
1529  * finishedUpto is our return value, indicating the point upto which all
1530  * the WAL insertions have been finished. Initialize it to the head of
1531  * reserved WAL, and as we iterate through the insertion locks, back it
1532  * out for any insertion that's still in progress.
1533  */
1534  finishedUpto = reservedUpto;
1535  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1536  {
1537  XLogRecPtr insertingat = InvalidXLogRecPtr;
1538 
1539  do
1540  {
1541  /*
1542  * See if this insertion is in progress. LWLockWaitForVar will
1543  * wait for the lock to be released, or for the 'value' to be set
1544  * by a LWLockUpdateVar call. When a lock is initially acquired,
1545  * its value is 0 (InvalidXLogRecPtr), which means that we don't
1546  * know where it's inserting yet. We will have to wait for it. If
1547  * it's a small insertion, the record will most likely fit on the
1548  * same page and the inserter will release the lock without ever
1549  * calling LWLockUpdateVar. But if it has to sleep, it will
1550  * advertise the insertion point with LWLockUpdateVar before
1551  * sleeping.
1552  *
1553  * In this loop we are only waiting for insertions that started
1554  * before WaitXLogInsertionsToFinish was called. The lack of
1555  * memory barriers in the loop means that we might see locks as
1556  * "unused" that have since become used. This is fine because
1557  * they only can be used for later insertions that we would not
1558  * want to wait on anyway. Not taking a lock to acquire the
1559  * current insertingAt value means that we might see older
1560  * insertingAt values. This is also fine, because if we read a
1561  * value too old, we will add ourselves to the wait queue, which
1562  * contains atomic operations.
1563  */
1564  if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1566  insertingat, &insertingat))
1567  {
1568  /* the lock was free, so no insertion in progress */
1569  insertingat = InvalidXLogRecPtr;
1570  break;
1571  }
1572 
1573  /*
1574  * This insertion is still in progress. Have to wait, unless the
1575  * inserter has proceeded past 'upto'.
1576  */
1577  } while (insertingat < upto);
1578 
1579  if (insertingat != InvalidXLogRecPtr && insertingat < finishedUpto)
1580  finishedUpto = insertingat;
1581  }
1582  return finishedUpto;
1583 }
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1588
PGPROC * MyProc
Definition: proc.c:66
pg_atomic_uint64 insertingAt
Definition: xlog.c:376

References elog, ereport, errmsg(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), and XLogCtl.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1359 of file xlog.c.

1360 {
1361  bool immed;
1362 
1363  /*
1364  * It doesn't matter which of the WAL insertion locks we acquire, so try
1365  * the one we used last time. If the system isn't particularly busy, it's
1366  * a good bet that it's still available, and it's good to have some
1367  * affinity to a particular lock so that you don't unnecessarily bounce
1368  * cache lines between processes when there's no contention.
1369  *
1370  * If this is the first time through in this backend, pick a lock
1371  * (semi-)randomly. This allows the locks to be used evenly if you have a
1372  * lot of very short connections.
1373  */
1374  static int lockToTry = -1;
1375 
1376  if (lockToTry == -1)
1377  lockToTry = MyProcNumber % NUM_XLOGINSERT_LOCKS;
1378  MyLockNo = lockToTry;
1379 
1380  /*
1381  * The insertingAt value is initially set to 0, as we don't know our
1382  * insert location yet.
1383  */
1385  if (!immed)
1386  {
1387  /*
1388  * If we couldn't get the lock immediately, try another lock next
1389  * time. On a system with more insertion locks than concurrent
1390  * inserters, this causes all the inserters to eventually migrate to a
1391  * lock that no-one else is using. On a system with more inserters
1392  * than locks, it still helps to distribute the inserters evenly
1393  * across the locks.
1394  */
1395  lockToTry = (lockToTry + 1) % NUM_XLOGINSERT_LOCKS;
1396  }
1397 }
ProcNumber MyProcNumber
Definition: globals.c:87
static int MyLockNo
Definition: xlog.c:640

References LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1404 of file xlog.c.

1405 {
1406  int i;
1407 
1408  /*
1409  * When holding all the locks, all but the last lock's insertingAt
1410  * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1411  * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1412  */
1413  for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1414  {
1416  LWLockUpdateVar(&WALInsertLocks[i].l.lock,
1418  PG_UINT64_MAX);
1419  }
1420  /* Variable value reset to 0 at release */
1422 
1423  holdingAllLocks = true;
1424 }
#define PG_UINT64_MAX
Definition: c.h:580
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1724
static bool holdingAllLocks
Definition: xlog.c:641

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1433 of file xlog.c.

1434 {
1435  if (holdingAllLocks)
1436  {
1437  int i;
1438 
1439  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1442  0);
1443 
1444  holdingAllLocks = false;
1445  }
1446  else
1447  {
1450  0);
1451  }
1452 }
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1858

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1459 of file xlog.c.

1460 {
1461  if (holdingAllLocks)
1462  {
1463  /*
1464  * We use the last lock to mark our actual position, see comments in
1465  * WALInsertLockAcquireExclusive.
1466  */
1469  insertingAt);
1470  }
1471  else
1474  insertingAt);
1475 }

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1718 of file xlog.c.

1720 {
1721  char *pdst = dstbuf;
1722  XLogRecPtr recptr = startptr;
1723  Size nbytes = count;
1724 
1725  if (RecoveryInProgress() || tli != GetWALInsertionTimeLine())
1726  return 0;
1727 
1728  Assert(!XLogRecPtrIsInvalid(startptr));
1729  Assert(startptr + count <= LogwrtResult.Write);
1730 
1731  /*
1732  * Loop through the buffers without a lock. For each buffer, atomically
1733  * read and verify the end pointer, then copy the data out, and finally
1734  * re-read and re-verify the end pointer.
1735  *
1736  * Once a page is evicted, it never returns to the WAL buffers, so if the
1737  * end pointer matches the expected end pointer before and after we copy
1738  * the data, then the right page must have been present during the data
1739  * copy. Read barriers are necessary to ensure that the data copy actually
1740  * happens between the two verification steps.
1741  *
1742  * If either verification fails, we simply terminate the loop and return
1743  * with the data that had been already copied out successfully.
1744  */
1745  while (nbytes > 0)
1746  {
1747  uint32 offset = recptr % XLOG_BLCKSZ;
1748  int idx = XLogRecPtrToBufIdx(recptr);
1749  XLogRecPtr expectedEndPtr;
1750  XLogRecPtr endptr;
1751  const char *page;
1752  const char *psrc;
1753  Size npagebytes;
1754 
1755  /*
1756  * Calculate the end pointer we expect in the xlblocks array if the
1757  * correct page is present.
1758  */
1759  expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1760 
1761  /*
1762  * First verification step: check that the correct page is present in
1763  * the WAL buffers.
1764  */
1765  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1766  if (expectedEndPtr != endptr)
1767  break;
1768 
1769  /*
1770  * The correct page is present (or was at the time the endptr was
1771  * read; must re-verify later). Calculate pointer to source data and
1772  * determine how much data to read from this page.
1773  */
1774  page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1775  psrc = page + offset;
1776  npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1777 
1778  /*
1779  * Ensure that the data copy and the first verification step are not
1780  * reordered.
1781  */
1782  pg_read_barrier();
1783 
1784  /* data copy */
1785  memcpy(pdst, psrc, npagebytes);
1786 
1787  /*
1788  * Ensure that the data copy and the second verification step are not
1789  * reordered.
1790  */
1791  pg_read_barrier();
1792 
1793  /*
1794  * Second verification step: check that the page we read from wasn't
1795  * evicted while we were copying the data.
1796  */
1797  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1798  if (expectedEndPtr != endptr)
1799  break;
1800 
1801  pdst += npagebytes;
1802  recptr += npagebytes;
1803  nbytes -= npagebytes;
1804  }
1805 
1806  Assert(pdst - dstbuf <= count);
1807 
1808  return pdst - dstbuf;
1809 }
#define pg_read_barrier()
Definition: atomics.h:151
#define Min(x, y)
Definition: c.h:991
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6389

References Assert(), GetWALInsertionTimeLine(), idx(), LogwrtResult, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogwrtResult::Write, XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4166 of file xlog.c.

4167 {
4168  int fd;
4169  char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4170 
4171  /*
4172  * Initialize version and compatibility-check fields
4173  */
4176 
4177  ControlFile->maxAlign = MAXIMUM_ALIGNOF;
4179 
4180  ControlFile->blcksz = BLCKSZ;
4181  ControlFile->relseg_size = RELSEG_SIZE;
4182  ControlFile->xlog_blcksz = XLOG_BLCKSZ;
4184 
4187 
4190 
4192 
4193  /* Contents are protected with a CRC */
4196  (char *) ControlFile,
4197  offsetof(ControlFileData, crc));
4199 
4200  /*
4201  * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4202  * the excess over sizeof(ControlFileData). This reduces the odds of
4203  * premature-EOF errors when reading pg_control. We'll still fail when we
4204  * check the contents of the file, but hopefully with a more specific
4205  * error than "couldn't read pg_control".
4206  */
4207  memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4208  memcpy(buffer, ControlFile, sizeof(ControlFileData));
4209 
4211  O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
4212  if (fd < 0)
4213  ereport(PANIC,
4215  errmsg("could not create file \"%s\": %m",
4216  XLOG_CONTROL_FILE)));
4217 
4218  errno = 0;
4219  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
4221  {
4222  /* if write didn't set errno, assume problem is no disk space */
4223  if (errno == 0)
4224  errno = ENOSPC;
4225  ereport(PANIC,
4227  errmsg("could not write to file \"%s\": %m",
4228  XLOG_CONTROL_FILE)));
4229  }
4231 
4232  pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
4233  if (pg_fsync(fd) != 0)
4234  ereport(PANIC,
4236  errmsg("could not fsync file \"%s\": %m",
4237  XLOG_CONTROL_FILE)));
4239 
4240  if (close(fd) != 0)
4241  ereport(PANIC,
4243  errmsg("could not close file \"%s\": %m",
4244  XLOG_CONTROL_FILE)));
4245 }
#define FLOAT8PASSBYVAL
Definition: c.h:622
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:249

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ereport, errcode_for_file_access(), errmsg(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, FLOAT8PASSBYVAL, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8069 of file xlog.c.

8070 {
8071  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8072  XLogRecPtr lsn = record->EndRecPtr;
8073 
8074  /*
8075  * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8076  * XLOG_FPI_FOR_HINT records.
8077  */
8078  Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8079  !XLogRecHasAnyBlockRefs(record));
8080 
8081  if (info == XLOG_NEXTOID)
8082  {
8083  Oid nextOid;
8084 
8085  /*
8086  * We used to try to take the maximum of TransamVariables->nextOid and
8087  * the recorded nextOid, but that fails if the OID counter wraps
8088  * around. Since no OID allocation should be happening during replay
8089  * anyway, better to just believe the record exactly. We still take
8090  * OidGenLock while setting the variable, just in case.
8091  */
8092  memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8093  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8094  TransamVariables->nextOid = nextOid;
8096  LWLockRelease(OidGenLock);
8097  }
8098  else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8099  {
8100  CheckPoint checkPoint;
8101  TimeLineID replayTLI;
8102 
8103  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8104  /* In a SHUTDOWN checkpoint, believe the counters exactly */
8105  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8106  TransamVariables->nextXid = checkPoint.nextXid;
8107  LWLockRelease(XidGenLock);
8108  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8109  TransamVariables->nextOid = checkPoint.nextOid;
8111  LWLockRelease(OidGenLock);
8112  MultiXactSetNextMXact(checkPoint.nextMulti,
8113  checkPoint.nextMultiOffset);
8114 
8116  checkPoint.oldestMultiDB);
8117 
8118  /*
8119  * No need to set oldestClogXid here as well; it'll be set when we
8120  * redo an xl_clog_truncate if it changed since initialization.
8121  */
8122  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8123 
8124  /*
8125  * If we see a shutdown checkpoint while waiting for an end-of-backup
8126  * record, the backup was canceled and the end-of-backup record will
8127  * never arrive.
8128  */
8132  ereport(PANIC,
8133  (errmsg("online backup was canceled, recovery cannot continue")));
8134 
8135  /*
8136  * If we see a shutdown checkpoint, we know that nothing was running
8137  * on the primary at this point. So fake-up an empty running-xacts
8138  * record and use that here and now. Recover additional standby state
8139  * for prepared transactions.
8140  */
8142  {
8143  TransactionId *xids;
8144  int nxids;
8145  TransactionId oldestActiveXID;
8146  TransactionId latestCompletedXid;
8147  RunningTransactionsData running;
8148 
8149  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8150 
8151  /*
8152  * Construct a RunningTransactions snapshot representing a shut
8153  * down server, with only prepared transactions still alive. We're
8154  * never overflowed at this point because all subxids are listed
8155  * with their parent prepared transactions.
8156  */
8157  running.xcnt = nxids;
8158  running.subxcnt = 0;
8159  running.subxid_overflow = false;
8160  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8161  running.oldestRunningXid = oldestActiveXID;
8162  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8163  TransactionIdRetreat(latestCompletedXid);
8164  Assert(TransactionIdIsNormal(latestCompletedXid));
8165  running.latestCompletedXid = latestCompletedXid;
8166  running.xids = xids;
8167 
8168  ProcArrayApplyRecoveryInfo(&running);
8169 
8171  }
8172 
8173  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8174  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8175  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8176  LWLockRelease(ControlFileLock);
8177 
8178  /* Update shared-memory copy of checkpoint XID/epoch */
8180  XLogCtl->ckptFullXid = checkPoint.nextXid;
8182 
8183  /*
8184  * We should've already switched to the new TLI before replaying this
8185  * record.
8186  */
8187  (void) GetCurrentReplayRecPtr(&replayTLI);
8188  if (checkPoint.ThisTimeLineID != replayTLI)
8189  ereport(PANIC,
8190  (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8191  checkPoint.ThisTimeLineID, replayTLI)));
8192 
8193  RecoveryRestartPoint(&checkPoint, record);
8194  }
8195  else if (info == XLOG_CHECKPOINT_ONLINE)
8196  {
8197  CheckPoint checkPoint;
8198  TimeLineID replayTLI;
8199 
8200  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8201  /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8202  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8204  checkPoint.nextXid))
8205  TransamVariables->nextXid = checkPoint.nextXid;
8206  LWLockRelease(XidGenLock);
8207 
8208  /*
8209  * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8210  * to track OID assignment through XLOG_NEXTOID records. The nextOid
8211  * counter is from the start of the checkpoint and might well be stale
8212  * compared to later XLOG_NEXTOID records. We could try to take the
8213  * maximum of the nextOid counter and our latest value, but since
8214  * there's no particular guarantee about the speed with which the OID
8215  * counter wraps around, that's a risky thing to do. In any case,
8216  * users of the nextOid counter are required to avoid assignment of
8217  * duplicates, so that a somewhat out-of-date value should be safe.
8218  */
8219 
8220  /* Handle multixact */
8222  checkPoint.nextMultiOffset);
8223 
8224  /*
8225  * NB: This may perform multixact truncation when replaying WAL
8226  * generated by an older primary.
8227  */
8229  checkPoint.oldestMultiDB);
8231  checkPoint.oldestXid))
8232  SetTransactionIdLimit(checkPoint.oldestXid,
8233  checkPoint.oldestXidDB);
8234  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8235  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8236  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8237  LWLockRelease(ControlFileLock);
8238 
8239  /* Update shared-memory copy of checkpoint XID/epoch */
8241  XLogCtl->ckptFullXid = checkPoint.nextXid;
8243 
8244  /* TLI should not change in an on-line checkpoint */
8245  (void) GetCurrentReplayRecPtr(&replayTLI);
8246  if (checkPoint.ThisTimeLineID != replayTLI)
8247  ereport(PANIC,
8248  (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8249  checkPoint.ThisTimeLineID, replayTLI)));
8250 
8251  RecoveryRestartPoint(&checkPoint, record);
8252  }
8253  else if (info == XLOG_OVERWRITE_CONTRECORD)
8254  {
8255  /* nothing to do here, handled in xlogrecovery_redo() */
8256  }
8257  else if (info == XLOG_END_OF_RECOVERY)
8258  {
8259  xl_end_of_recovery xlrec;
8260  TimeLineID replayTLI;
8261 
8262  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8263 
8264  /*
8265  * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8266  * but this case is rarer and harder to test, so the benefit doesn't
8267  * outweigh the potential extra cost of maintenance.
8268  */
8269 
8270  /*
8271  * We should've already switched to the new TLI before replaying this
8272  * record.
8273  */
8274  (void) GetCurrentReplayRecPtr(&replayTLI);
8275  if (xlrec.ThisTimeLineID != replayTLI)
8276  ereport(PANIC,
8277  (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8278  xlrec.ThisTimeLineID, replayTLI)));
8279  }
8280  else if (info == XLOG_NOOP)
8281  {
8282  /* nothing to do here */
8283  }
8284  else if (info == XLOG_SWITCH)
8285  {
8286  /* nothing to do here */
8287  }
8288  else if (info == XLOG_RESTORE_POINT)
8289  {
8290  /* nothing to do here, handled in xlogrecovery.c */
8291  }
8292  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8293  {
8294  /*
8295  * XLOG_FPI records contain nothing else but one or more block
8296  * references. Every block reference must include a full-page image
8297  * even if full_page_writes was disabled when the record was generated
8298  * - otherwise there would be no point in this record.
8299  *
8300  * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8301  * WAL-logged because of a hint bit update. They are only generated
8302  * when checksums and/or wal_log_hints are enabled. They may include
8303  * no full-page images if full_page_writes was disabled when they were
8304  * generated. In this case there is nothing to do here.
8305  *
8306  * No recovery conflicts are generated by these generic records - if a
8307  * resource manager needs to generate conflicts, it has to define a
8308  * separate WAL record type and redo routine.
8309  */
8310  for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8311  {
8312  Buffer buffer;
8313 
8314  if (!XLogRecHasBlockImage(record, block_id))
8315  {
8316  if (info == XLOG_FPI)
8317  elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8318  continue;
8319  }
8320 
8321  if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8322  elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8323  UnlockReleaseBuffer(buffer);
8324  }
8325  }
8326  else if (info == XLOG_BACKUP_END)
8327  {
8328  /* nothing to do here, handled in xlogrecovery_redo() */
8329  }
8330  else if (info == XLOG_PARAMETER_CHANGE)
8331  {
8332  xl_parameter_change xlrec;
8333 
8334  /* Update our copy of the parameters in pg_control */
8335  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8336 
8337  /*
8338  * Invalidate logical slots if we are in hot standby and the primary
8339  * does not have a WAL level sufficient for logical decoding. No need
8340  * to search for potentially conflicting logically slots if standby is
8341  * running with wal_level lower than logical, because in that case, we
8342  * would have either disallowed creation of logical slots or
8343  * invalidated existing ones.
8344  */
8345  if (InRecovery && InHotStandby &&
8346  xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8349  0, InvalidOid,
8351 
8352  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8358  ControlFile->wal_level = xlrec.wal_level;
8360 
8361  /*
8362  * Update minRecoveryPoint to ensure that if recovery is aborted, we
8363  * recover back up to this point before allowing hot standby again.
8364  * This is important if the max_* settings are decreased, to ensure
8365  * you don't run queries against the WAL preceding the change. The
8366  * local copies cannot be updated as long as crash recovery is
8367  * happening and we expect all the WAL to be replayed.
8368  */
8369  if (InArchiveRecovery)
8370  {
8373  }
8375  {
8376  TimeLineID replayTLI;
8377 
8378  (void) GetCurrentReplayRecPtr(&replayTLI);
8380  ControlFile->minRecoveryPointTLI = replayTLI;
8381  }
8382 
8386 
8388  LWLockRelease(ControlFileLock);
8389 
8390  /* Check to see if any parameter change gives a problem on recovery */
8392  }
8393  else if (info == XLOG_FPW_CHANGE)
8394  {
8395  bool fpw;
8396 
8397  memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8398 
8399  /*
8400  * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8401  * do_pg_backup_start() and do_pg_backup_stop() can check whether
8402  * full_page_writes has been disabled during online backup.
8403  */
8404  if (!fpw)
8405  {
8407  if (XLogCtl->lastFpwDisableRecPtr < record->ReadRecPtr)
8410  }
8411 
8412  /* Keep track of full_page_writes */
8413  lastFullPageWrites = fpw;
8414  }
8415  else if (info == XLOG_CHECKPOINT_REDO)
8416  {
8417  /* nothing to do here, just for informational purposes */
8418  }
8419 }
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4577
unsigned char uint8
Definition: c.h:491
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:664
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2461
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2436
#define XLOG_RESTORE_POINT
Definition: pg_control.h:74
#define XLOG_FPI
Definition: pg_control.h:78
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:77
#define XLOG_NEXTOID
Definition: pg_control.h:70
#define XLOG_NOOP
Definition: pg_control.h:69
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:73
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:55
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7384
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:74
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
@ STANDBY_INITIALIZED
Definition: xlogutils.h:50
#define InHotStandby
Definition: xlogutils.h:57
@ BLK_RESTORED
Definition: xlogutils.h:73

References ArchiveRecoveryRequested, Assert(), ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_overflow, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, XLogRecPtrIsInvalid, and XLR_INFO_MASK.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2918 of file xlog.c.

2919 {
2920  XLogwrtRqst WriteRqst;
2921  bool flexible = true;
2922  static TimestampTz lastflush;
2923  TimestampTz now;
2924  int flushblocks;
2925  TimeLineID insertTLI;
2926 
2927  /* XLOG doesn't need flushing during recovery */
2928  if (RecoveryInProgress())
2929  return false;
2930 
2931  /*
2932  * Since we're not in recovery, InsertTimeLineID is set and can't change,
2933  * so we can read it without a lock.
2934  */
2935  insertTLI = XLogCtl->InsertTimeLineID;
2936 
2937  /* read LogwrtResult and update local state */
2940  WriteRqst = XLogCtl->LogwrtRqst;
2942 
2943  /* back off to last completed page boundary */
2944  WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
2945 
2946  /* if we have already flushed that far, consider async commit records */
2947  if (WriteRqst.Write <= LogwrtResult.Flush)
2948  {
2950  WriteRqst.Write = XLogCtl->asyncXactLSN;
2952  flexible = false; /* ensure it all gets written */
2953  }
2954 
2955  /*
2956  * If already known flushed, we're done. Just need to check if we are
2957  * holding an open file handle to a logfile that's no longer in use,
2958  * preventing the file from being deleted.
2959  */
2960  if (WriteRqst.Write <= LogwrtResult.Flush)
2961  {
2962  if (openLogFile >= 0)
2963  {
2966  {
2967  XLogFileClose();
2968  }
2969  }
2970  return false;
2971  }
2972 
2973  /*
2974  * Determine how far to flush WAL, based on the wal_writer_delay and
2975  * wal_writer_flush_after GUCs.
2976  *
2977  * Note that XLogSetAsyncXactLSN() performs similar calculation based on
2978  * wal_writer_flush_after, to decide when to wake us up. Make sure the
2979  * logic is the same in both places if you change this.
2980  */
2982  flushblocks =
2983  WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2984 
2985  if (WalWriterFlushAfter == 0 || lastflush == 0)
2986  {
2987  /* first call, or block based limits disabled */
2988  WriteRqst.Flush = WriteRqst.Write;
2989  lastflush = now;
2990  }
2991  else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
2992  {
2993  /*
2994  * Flush the writes at least every WalWriterDelay ms. This is
2995  * important to bound the amount of time it takes for an asynchronous
2996  * commit to hit disk.
2997  */
2998  WriteRqst.Flush = WriteRqst.Write;
2999  lastflush = now;
3000  }
3001  else if (flushblocks >= WalWriterFlushAfter)
3002  {
3003  /* exceeded wal_writer_flush_after blocks, flush */
3004  WriteRqst.Flush = WriteRqst.Write;
3005  lastflush = now;
3006  }
3007  else
3008  {
3009  /* no flushing, this time round */
3010  WriteRqst.Flush = 0;
3011  }
3012 
3013 #ifdef WAL_DEBUG
3014  if (XLOG_DEBUG)
3015  elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3016  LSN_FORMAT_ARGS(WriteRqst.Write),
3017  LSN_FORMAT_ARGS(WriteRqst.Flush),
3020 #endif
3021 
3023 
3024  /* now wait for any in-progress insertions to finish and get write lock */
3025  WaitXLogInsertionsToFinish(WriteRqst.Write);
3026  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3028  if (WriteRqst.Write > LogwrtResult.Write ||
3029  WriteRqst.Flush > LogwrtResult.Flush)
3030  {
3031  XLogWrite(WriteRqst, insertTLI, flexible);
3032  }
3033  LWLockRelease(WALWriteLock);
3034 
3035  END_CRIT_SECTION();
3036 
3037  /* wake up walsenders now that we've released heavily contended locks */
3039 
3040  /*
3041  * Great, done. To take some work off the critical path, try to initialize
3042  * as many of the no-longer-needed WAL buffers for future use as we can.
3043  */
3044  AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3045 
3046  /*
3047  * If we determined that we need to write data, but somebody else
3048  * wrote/flushed already, it should be considered as being active, to
3049  * avoid hibernating too early.
3050  */
3051  return true;
3052 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1790
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1618
XLogRecPtr asyncXactLSN
Definition: xlog.c:464
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:66
int WalWriterFlushAfter
Definition: walwriter.c:72
int WalWriterDelay
Definition: walwriter.c:71
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, XLogCtlData::LogwrtResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1857 of file xlog.c.

1858 {
1859  uint64 fullsegs;
1860  uint64 fullpages;
1861  uint64 bytesleft;
1862  uint32 seg_offset;
1863  XLogRecPtr result;
1864 
1865  fullsegs = bytepos / UsableBytesInSegment;
1866  bytesleft = bytepos % UsableBytesInSegment;
1867 
1868  if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1869  {
1870  /* fits on first page of segment */
1871  if (bytesleft == 0)
1872  seg_offset = 0;
1873  else
1874  seg_offset = bytesleft + SizeOfXLogLongPHD;
1875  }
1876  else
1877  {
1878  /* account for the first page on segment with long header */
1879  seg_offset = XLOG_BLCKSZ;
1880  bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1881 
1882  fullpages = bytesleft / UsableBytesInPage;
1883  bytesleft = bytesleft % UsableBytesInPage;
1884 
1885  if (bytesleft == 0)
1886  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1887  else
1888  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1889  }
1890 
1891  XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1892 
1893  return result;
1894 }
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1817 of file xlog.c.

1818 {
1819  uint64 fullsegs;
1820  uint64 fullpages;
1821  uint64 bytesleft;
1822  uint32 seg_offset;
1823  XLogRecPtr result;
1824 
1825  fullsegs = bytepos / UsableBytesInSegment;
1826  bytesleft = bytepos % UsableBytesInSegment;
1827 
1828  if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1829  {
1830  /* fits on first page of segment */
1831  seg_offset = bytesleft + SizeOfXLogLongPHD;
1832  }
1833  else
1834  {
1835  /* account for the first page on segment with long header */
1836  seg_offset = XLOG_BLCKSZ;
1837  bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1838 
1839  fullpages = bytesleft / UsableBytesInPage;
1840  bytesleft = bytesleft % UsableBytesInPage;
1841 
1842  seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1843  }
1844 
1845  XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1846 
1847  return result;
1848 }

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2249 of file xlog.c.

2250 {
2251  XLogSegNo old_segno;
2252 
2254 
2255  if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2256  return true;
2257  return false;
2258 }

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4511 of file xlog.c.

4512 {
4513  int xbuffers;
4514 
4515  xbuffers = NBuffers / 32;
4516  if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
4517  xbuffers = (wal_segment_size / XLOG_BLCKSZ);
4518  if (xbuffers < 8)
4519  xbuffers = 8;
4520  return xbuffers;
4521 }

References NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3569 of file xlog.c.

3570 {
3571  Assert(openLogFile >= 0);
3572 
3573  /*
3574  * WAL segment files will not be re-read in normal operation, so we advise
3575  * the OS to release any cached pages. But do not do so if WAL archiving
3576  * or streaming is active, because archiver and walsender process could
3577  * use the cache to read the WAL segment.
3578  */
3579 #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3580  if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3581  (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3582 #endif
3583 
3584  if (close(openLogFile) != 0)
3585  {
3586  char xlogfname[MAXFNAMELEN];
3587  int save_errno = errno;
3588 
3590  errno = save_errno;
3591  ereport(PANIC,
3593  errmsg("could not close file \"%s\": %m", xlogfname)));
3594  }
3595 
3596  openLogFile = -1;
3598 }
void ReleaseExternalFD(void)
Definition: fd.c:1239

References Assert(), close, ereport, errcode_for_file_access(), errmsg(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3348 of file xlog.c.

3351 {
3352  char path[MAXPGPATH];
3353  char tmppath[MAXPGPATH];
3354  PGAlignedXLogBlock buffer;
3355  int srcfd;
3356  int fd;
3357  int nbytes;
3358 
3359  /*
3360  * Open the source file
3361  */
3362  XLogFilePath(path, srcTLI, srcsegno, wal_segment_size);
3363  srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
3364  if (srcfd < 0)
3365  ereport(ERROR,
3367  errmsg("could not open file \"%s\": %m", path)));
3368 
3369  /*
3370  * Copy into a temp file name.
3371  */
3372  snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3373 
3374  unlink(tmppath);
3375 
3376  /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3377  fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
3378  if (fd < 0)
3379  ereport(ERROR,
3381  errmsg("could not create file \"%s\": %m", tmppath)));
3382 
3383  /*
3384  * Do the data copying.
3385  */
3386  for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3387  {
3388  int nread;
3389 
3390  nread = upto - nbytes;
3391 
3392  /*
3393  * The part that is not read from the source file is filled with
3394  * zeros.
3395  */
3396  if (nread < sizeof(buffer))
3397  memset(buffer.data, 0, sizeof(buffer));
3398 
3399  if (nread > 0)
3400  {
3401  int r;
3402 
3403  if (nread > sizeof(buffer))
3404  nread = sizeof(buffer);
3405  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
3406  r = read(srcfd, buffer.data, nread);
3407  if (r != nread)
3408  {
3409  if (r < 0)
3410  ereport(ERROR,
3412  errmsg("could not read file \"%s\": %m",
3413  path)));
3414  else
3415  ereport(ERROR,
3417  errmsg("could not read file \"%s\": read %d of %zu",
3418  path, r, (Size) nread)));
3419  }
3421  }
3422  errno = 0;
3423  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
3424  if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3425  {
3426  int save_errno = errno;
3427 
3428  /*
3429  * If we fail to make the file, delete it to release disk space
3430  */
3431  unlink(tmppath);
3432  /* if write didn't set errno, assume problem is no disk space */
3433  errno = save_errno ? save_errno : ENOSPC;
3434 
3435  ereport(ERROR,
3437  errmsg("could not write to file \"%s\": %m", tmppath)));
3438  }
3440  }
3441 
3442  pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
3443  if (pg_fsync(fd) != 0)
3446  errmsg("could not fsync file \"%s\": %m", tmppath)));
3448 
3449  if (CloseTransientFile(fd) != 0)
3450  ereport(ERROR,
3452  errmsg("could not close file \"%s\": %m", tmppath)));
3453 
3454  if (CloseTransientFile(srcfd) != 0)
3455  ereport(ERROR,
3457  errmsg("could not close file \"%s\": %m", path)));
3458 
3459  /*
3460  * Now move the segment into place with its final name.
3461  */
3462  if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, destTLI))
3463  elog(ERROR, "InstallXLogFileSegment should not have failed");
3464 }
int CloseTransientFile(int fd)
Definition: fd.c:2809
int data_sync_elevel(int elevel)
Definition: fd.c:3936
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
char data[XLOG_BLCKSZ]
Definition: c.h:1135

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3310 of file xlog.c.

3311 {
3312  bool ignore_added;
3313  char path[MAXPGPATH];
3314  int fd;
3315 
3316  Assert(logtli != 0);
3317 
3318  fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3319  if (fd >= 0)
3320  return fd;
3321 
3322  /* Now open original target segment (might not be file I just made) */
3323  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3325  if (fd < 0)
3326  ereport(ERROR,
3328  errmsg("could not open file \"%s\": %m", path)));
3329  return fd;
3330 }
#define O_CLOEXEC
Definition: win32_port.h:359

References Assert(), BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool added,
char *  path 
)
static

Definition at line 3140 of file xlog.c.

3142 {
3143  char tmppath[MAXPGPATH];
3144  XLogSegNo installed_segno;
3145  XLogSegNo max_segno;
3146  int fd;
3147  int save_errno;
3148  int open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
3149 
3150  Assert(logtli != 0);
3151 
3152  XLogFilePath(path, logtli, logsegno, wal_segment_size);
3153 
3154  /*
3155  * Try to use existent file (checkpoint maker may have created it already)
3156  */
3157  *added = false;
3158  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3160  if (fd < 0)
3161  {
3162  if (errno != ENOENT)
3163  ereport(ERROR,
3165  errmsg("could not open file \"%s\": %m", path)));
3166  }
3167  else
3168  return fd;
3169 
3170  /*
3171  * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3172  * another process is doing the same thing. If so, we will end up
3173  * pre-creating an extra log segment. That seems OK, and better than
3174  * holding the lock throughout this lengthy process.
3175  */
3176  elog(DEBUG2, "creating and filling new WAL file");
3177 
3178  snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3179 
3180  unlink(tmppath);
3181 
3183  open_flags |= PG_O_DIRECT;
3184 
3185  /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3186  fd = BasicOpenFile(tmppath, open_flags);
3187  if (fd < 0)
3188  ereport(ERROR,
3190  errmsg("could not create file \"%s\": %m", tmppath)));
3191 
3192  pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
3193  save_errno = 0;
3194  if (wal_init_zero)
3195  {
3196  ssize_t rc;
3197 
3198  /*
3199  * Zero-fill the file. With this setting, we do this the hard way to
3200  * ensure that all the file space has really been allocated. On
3201  * platforms that allow "holes" in files, just seeking to the end
3202  * doesn't allocate intermediate space. This way, we know that we
3203  * have all the space and (after the fsync below) that all the
3204  * indirect blocks are down on disk. Therefore, fdatasync(2) or
3205  * O_DSYNC will be sufficient to sync future writes to the log file.
3206  */
3208 
3209  if (rc < 0)
3210  save_errno = errno;
3211  }
3212  else
3213  {
3214  /*
3215  * Otherwise, seeking to the end and writing a solitary byte is
3216  * enough.
3217  */
3218  errno = 0;
3219  if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3220  {
3221  /* if write didn't set errno, assume no disk space */
3222  save_errno = errno ? errno : ENOSPC;
3223  }
3224  }
3226 
3227  if (save_errno)
3228  {
3229  /*
3230  * If we fail to make the file, delete it to release disk space
3231  */
3232  unlink(tmppath);
3233 
3234  close(fd);
3235 
3236  errno = save_errno;
3237 
3238  ereport(ERROR,
3240  errmsg("could not write to file \"%s\": %m", tmppath)));
3241  }
3242 
3243  pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
3244  if (pg_fsync(fd) != 0)
3245  {
3246  save_errno = errno;
3247  close(fd);
3248  errno = save_errno;
3249  ereport(ERROR,
3251  errmsg("could not fsync file \"%s\": %m", tmppath)));
3252  }
3254 
3255  if (close(fd) != 0)
3256  ereport(ERROR,
3258  errmsg("could not close file \"%s\": %m", tmppath)));
3259 
3260  /*
3261  * Now move the segment into place with its final name. Cope with
3262  * possibility that someone else has created the file while we were
3263  * filling ours: if so, use ours to pre-create a future log segment.
3264  */
3265  installed_segno = logsegno;
3266 
3267  /*
3268  * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3269  * that was a constant, but that was always a bit dubious: normally, at a
3270  * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3271  * here, it was the offset from the insert location. We can't do the
3272  * normal XLOGfileslop calculation here because we don't have access to
3273  * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3274  * CheckPointSegments.
3275  */
3276  max_segno = logsegno + CheckPointSegments;
3277  if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno,
3278  logtli))
3279  {
3280  *added = true;
3281  elog(DEBUG2, "done creating and filling new WAL file");
3282  }
3283  else
3284  {
3285  /*
3286  * No need for any more future segments, or InstallXLogFileSegment()
3287  * failed to rename the file into place. If the rename failed, a
3288  * caller opening the file may fail.
3289  */
3290  unlink(tmppath);
3291  elog(DEBUG2, "abandoned new WAL file");
3292  }
3293 
3294  return -1;
3295 }
#define IO_DIRECT_WAL_INIT
Definition: fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset)
Definition: file_utils.c:687
#define pg_pwrite
Definition: port.h:226
bool wal_init_zero
Definition: xlog.c:127

References Assert(), BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3548 of file xlog.c.

3549 {
3550  char path[MAXPGPATH];
3551  int fd;
3552 
3553  XLogFilePath(path, tli, segno, wal_segment_size);
3554 
3555  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3557  if (fd < 0)
3558  ereport(PANIC,
3560  errmsg("could not open file \"%s\": %m", path)));
3561 
3562  return fd;
3563 }

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2199 of file xlog.c.

2200 {
2201  XLogSegNo minSegNo;
2202  XLogSegNo maxSegNo;
2203  double distance;
2204  XLogSegNo recycleSegNo;
2205 
2206  /*
2207  * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2208  * correspond to. Always recycle enough segments to meet the minimum, and
2209  * remove enough segments to stay below the maximum.
2210  */
2211  minSegNo = lastredoptr / wal_segment_size +
2213  maxSegNo = lastredoptr / wal_segment_size +
2215 
2216  /*
2217  * Between those limits, recycle enough segments to get us through to the
2218  * estimated end of next checkpoint.
2219  *
2220  * To estimate where the next checkpoint will finish, assume that the
2221  * system runs steadily consuming CheckPointDistanceEstimate bytes between
2222  * every checkpoint.
2223  */
2225  /* add 10% for good measure. */
2226  distance *= 1.10;
2227 
2228  recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2230 
2231  if (recycleSegNo < minSegNo)
2232  recycleSegNo = minSegNo;
2233  if (recycleSegNo > maxSegNo)
2234  recycleSegNo = maxSegNo;
2235 
2236  return recycleSegNo;
2237 }

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2728 of file xlog.c.

2729 {
2730  XLogRecPtr WriteRqstPtr;
2731  XLogwrtRqst WriteRqst;
2732  TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2733 
2734  /*
2735  * During REDO, we are reading not writing WAL. Therefore, instead of
2736  * trying to flush the WAL, we should update minRecoveryPoint instead. We
2737  * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2738  * to act this way too, and because when it tries to write the
2739  * end-of-recovery checkpoint, it should indeed flush.
2740  */
2741  if (!XLogInsertAllowed())
2742  {
2743  UpdateMinRecoveryPoint(record, false);
2744  return;
2745  }
2746 
2747  /* Quick exit if already known flushed */
2748  if (record <= LogwrtResult.Flush)
2749  return;
2750 
2751 #ifdef WAL_DEBUG
2752  if (XLOG_DEBUG)
2753  elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
2754  LSN_FORMAT_ARGS(record),
2757 #endif
2758 
2760 
2761  /*
2762  * Since fsync is usually a horribly expensive operation, we try to
2763  * piggyback as much data as we can on each fsync: if we see any more data
2764  * entered into the xlog buffer, we'll write and fsync that too, so that
2765  * the final value of LogwrtResult.Flush is as large as possible. This
2766  * gives us some chance of avoiding another fsync immediately after.
2767  */
2768 
2769  /* initialize to given target; may increase below */
2770  WriteRqstPtr = record;
2771 
2772  /*
2773  * Now wait until we get the write lock, or someone else does the flush
2774  * for us.
2775  */
2776  for (;;)
2777  {
2778  XLogRecPtr insertpos;
2779 
2780  /* read LogwrtResult and update local state */
2782  if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2783  WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2786 
2787  /* done already? */
2788  if (record <= LogwrtResult.Flush)
2789  break;
2790 
2791  /*
2792  * Before actually performing the write, wait for all in-flight
2793  * insertions to the pages we're about to write to finish.
2794  */
2795  insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2796 
2797  /*
2798  * Try to get the write lock. If we can't get it immediately, wait
2799  * until it's released, and recheck if we still need to do the flush
2800  * or if the backend that held the lock did it for us already. This
2801  * helps to maintain a good rate of group committing when the system
2802  * is bottlenecked by the speed of fsyncing.
2803  */
2804  if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2805  {
2806  /*
2807  * The lock is now free, but we didn't acquire it yet. Before we
2808  * do, loop back to check if someone else flushed the record for
2809  * us already.
2810  */
2811  continue;
2812  }
2813 
2814  /* Got the lock; recheck whether request is satisfied */
2816  if (record <= LogwrtResult.Flush)
2817  {
2818  LWLockRelease(WALWriteLock);
2819  break;
2820  }
2821 
2822  /*
2823  * Sleep before flush! By adding a delay here, we may give further
2824  * backends the opportunity to join the backlog of group commit
2825  * followers; this can significantly improve transaction throughput,
2826  * at the risk of increasing transaction latency.
2827  *
2828  * We do not sleep if enableFsync is not turned on, nor if there are
2829  * fewer than CommitSiblings other backends with active transactions.
2830  */
2831  if (CommitDelay > 0 && enableFsync &&
2833  {
2835 
2836  /*
2837  * Re-check how far we can now flush the WAL. It's generally not
2838  * safe to call WaitXLogInsertionsToFinish while holding
2839  * WALWriteLock, because an in-progress insertion might need to
2840  * also grab WALWriteLock to make progress. But we know that all
2841  * the insertions up to insertpos have already finished, because
2842  * that's what the earlier WaitXLogInsertionsToFinish() returned.
2843  * We're only calling it again to allow insertpos to be moved
2844  * further forward, not to actually wait for anyone.
2845  */
2846  insertpos = WaitXLogInsertionsToFinish(insertpos);
2847  }
2848 
2849  /* try to write/flush later additions to XLOG as well */
2850  WriteRqst.Write = insertpos;
2851  WriteRqst.Flush = insertpos;
2852 
2853  XLogWrite(WriteRqst, insertTLI, false);
2854 
2855  LWLockRelease(WALWriteLock);
2856  /* done */
2857  break;
2858  }
2859 
2860  END_CRIT_SECTION();
2861 
2862  /* wake up walsenders now that we've released heavily contended locks */
2864 
2865  /*
2866  * If we still haven't flushed to the request point then we have a
2867  * problem; most likely, the requested flush point is past end of XLOG.
2868  * This has been seen to occur when a disk page has a corrupted LSN.
2869  *
2870  * Formerly we treated this as a PANIC condition, but that hurts the
2871  * system's robustness rather than helping it: we do not want to take down
2872  * the whole system due to corruption on one data page. In particular, if
2873  * the bad page is encountered again during recovery then we would be
2874  * unable to restart the database at all! (This scenario actually
2875  * happened in the field several times with 7.1 releases.) As of 8.4, bad
2876  * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2877  * the only time we can reach here during recovery is while flushing the
2878  * end-of-recovery checkpoint record, and we don't expect that to have a
2879  * bad LSN.
2880  *
2881  * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2882  * since xact.c calls this routine inside a critical section. However,
2883  * calls from bufmgr.c are not within critical sections and so we will not
2884  * force a restart for a bad LSN on a data page.
2885  */
2886  if (LogwrtResult.Flush < record)
2887  elog(ERROR,
2888  "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2889  LSN_FORMAT_ARGS(record),
2891 }
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1400
bool MinimumActiveBackends(int min)
Definition: procarray.c:3533
int CommitDelay
Definition: xlog.c:132
int CommitSiblings
Definition: xlog.c:133
bool XLogInsertAllowed(void)
Definition: xlog.c:6256

References CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, XLogCtlData::LogwrtResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FlushBuffer(), LogLogicalMessage(), pg_attribute_noreturn(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3688 of file xlog.c.

3689 {
3690  XLogSegNo lastRemovedSegNo;
3691 
3693  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3695 
3696  return lastRemovedSegNo;
3697 }

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3704 of file xlog.c.

3705 {
3706  DIR *xldir;
3707  struct dirent *xlde;
3708  XLogSegNo oldest_segno = 0;
3709 
3710  xldir = AllocateDir(XLOGDIR);
3711  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3712  {
3713  TimeLineID file_tli;
3714  XLogSegNo file_segno;
3715 
3716  /* Ignore files that are not XLOG segments. */
3717  if (!IsXLogFileName(xlde->d_name))
3718  continue;
3719 
3720  /* Parse filename to get TLI and segno. */
3721  XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3723 
3724  /* Ignore anything that's not from the TLI of interest. */
3725  if (tli != file_tli)
3726  continue;
3727 
3728  /* If it's the oldest so far, update oldest_segno. */
3729  if (oldest_segno == 0 || file_segno < oldest_segno)
3730  oldest_segno = file_segno;
3731  }
3732 
3733  FreeDir(xldir);
3734  return oldest_segno;
3735 }

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogGetReplicationSlotMinimumLSN()

static XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )
static

Definition at line 2627 of file xlog.c.

2628 {
2629  XLogRecPtr retval;
2630 
2632  retval = XLogCtl->replicationSlotMinLSN;
2634 
2635  return retval;
2636 }
XLogRecPtr replicationSlotMinLSN
Definition: xlog.c:465

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5100 of file xlog.c.

5101 {
5102  char xlogfname[MAXFNAMELEN];
5103  XLogSegNo endLogSegNo;
5104  XLogSegNo startLogSegNo;
5105 
5106  /* we always switch to a new timeline after archive recovery */
5107  Assert(endTLI != newTLI);
5108 
5109  /*
5110  * Update min recovery point one last time.
5111  */
5113 
5114  /*
5115  * Calculate the last segment on the old timeline, and the first segment
5116  * on the new timeline. If the switch happens in the middle of a segment,
5117  * they are the same, but if the switch happens exactly at a segment
5118  * boundary, startLogSegNo will be endLogSegNo + 1.
5119  */
5120  XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size);
5121  XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size);
5122 
5123  /*
5124  * Initialize the starting WAL segment for the new timeline. If the switch
5125  * happens in the middle of a segment, copy data from the last WAL segment
5126  * of the old timeline up to the switch point, to the starting WAL segment
5127  * on the new timeline.
5128  */
5129  if (endLogSegNo == startLogSegNo)
5130  {
5131  /*
5132  * Make a copy of the file on the new timeline.
5133  *
5134  * Writing WAL isn't allowed yet, so there are no locking
5135  * considerations. But we should be just as tense as XLogFileInit to
5136  * avoid emplacing a bogus file.
5137  */
5138  XLogFileCopy(newTLI, endLogSegNo, endTLI, endLogSegNo,
5139  XLogSegmentOffset(endOfLog, wal_segment_size));
5140  }
5141  else
5142  {
5143  /*
5144  * The switch happened at a segment boundary, so just create the next
5145  * segment on the new timeline.
5146  */
5147  int fd;
5148 
5149  fd = XLogFileInit(startLogSegNo, newTLI);
5150 
5151  if (close(fd) != 0)
5152  {
5153  int save_errno = errno;
5154 
5155  XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5156  errno = save_errno;
5157  ereport(ERROR,
5159  errmsg("could not close file \"%s\": %m", xlogfname)));
5160  }
5161  }
5162 
5163  /*
5164  * Let's just make real sure there are not .ready or .done flags posted
5165  * for the new segment.
5166  */
5167  XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5168  XLogArchiveCleanup(xlogfname);
5169 }
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition: xlog.c:3348

References Assert(), close, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6256 of file xlog.c.

6257 {
6258  /*
6259  * If value is "unconditionally true" or "unconditionally false", just
6260  * return it. This provides the normal fast path once recovery is known
6261  * done.
6262  */
6263  if (LocalXLogInsertAllowed >= 0)
6264  return (bool) LocalXLogInsertAllowed;
6265 
6266  /*
6267  * Else, must check to see if we're still in recovery.
6268  */
6269  if (RecoveryInProgress())
6270  return false;
6271 
6272  /*
6273  * On exit from recovery, reset to "unconditionally true", since there is
6274  * no need to keep checking.
6275  */
6277  return true;
6278 }

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), and XLogInsertRecord().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 737 of file xlog.c.

742 {
744  pg_crc32c rdata_crc;
745  bool inserted;
746  XLogRecord *rechdr = (XLogRecord *) rdata->data;
747  uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
749  XLogRecPtr StartPos;
750  XLogRecPtr EndPos;
751  bool prevDoPageWrites = doPageWrites;
752  TimeLineID insertTLI;
753 
754  /* Does this record type require special handling? */
755  if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
756  {
757  if (info == XLOG_SWITCH)
758  class = WALINSERT_SPECIAL_SWITCH;
759  else if (info == XLOG_CHECKPOINT_REDO)
761  }
762 
763  /* we assume that all of the record header is in the first chunk */
764  Assert(rdata->len >= SizeOfXLogRecord);
765 
766  /* cross-check on whether we should be here or not */
767  if (!XLogInsertAllowed())
768  elog(ERROR, "cannot make new WAL entries during recovery");
769 
770  /*
771  * Given that we're not in recovery, InsertTimeLineID is set and can't
772  * change, so we can read it without a lock.
773  */
774  insertTLI = XLogCtl->InsertTimeLineID;
775 
776  /*----------
777  *
778  * We have now done all the preparatory work we can without holding a
779  * lock or modifying shared state. From here on, inserting the new WAL
780  * record to the shared WAL buffer cache is a two-step process:
781  *
782  * 1. Reserve the right amount of space from the WAL. The current head of
783  * reserved space is kept in Insert->CurrBytePos, and is protected by
784  * insertpos_lck.
785  *
786  * 2. Copy the record to the reserved WAL space. This involves finding the
787  * correct WAL buffer containing the reserved space, and copying the
788  * record in place. This can be done concurrently in multiple processes.
789  *
790  * To keep track of which insertions are still in-progress, each concurrent
791  * inserter acquires an insertion lock. In addition to just indicating that
792  * an insertion is in progress, the lock tells others how far the inserter
793  * has progressed. There is a small fixed number of insertion locks,
794  * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
795  * boundary, it updates the value stored in the lock to the how far it has
796  * inserted, to allow the previous buffer to be flushed.
797  *
798  * Holding onto an insertion lock also protects RedoRecPtr and
799  * fullPageWrites from changing until the insertion is finished.
800  *
801  * Step 2 can usually be done completely in parallel. If the required WAL
802  * page is not initialized yet, you have to grab WALBufMappingLock to
803  * initialize it, but the WAL writer tries to do that ahead of insertions
804  * to avoid that from happening in the critical path.
805  *
806  *----------
807  */
809 
810  if (likely(class == WALINSERT_NORMAL))
811  {
813 
814  /*
815  * Check to see if my copy of RedoRecPtr is out of date. If so, may
816  * have to go back and have the caller recompute everything. This can
817  * only happen just after a checkpoint, so it's better to be slow in
818  * this case and fast otherwise.
819  *
820  * Also check to see if fullPageWrites was just turned on or there's a
821  * running backup (which forces full-page writes); if we weren't
822  * already doing full-page writes then go back and recompute.
823  *
824  * If we aren't doing full-page writes then RedoRecPtr doesn't
825  * actually affect the contents of the XLOG record, so we'll update
826  * our local copy but not force a recomputation. (If doPageWrites was
827  * just turned off, we could recompute the record without full pages,
828  * but we choose not to bother.)
829  */
830  if (RedoRecPtr != Insert->RedoRecPtr)
831  {
832  Assert(RedoRecPtr < Insert->RedoRecPtr);
833  RedoRecPtr = Insert->RedoRecPtr;
834  }
835  doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
836 
837  if (doPageWrites &&
838  (!prevDoPageWrites ||
839  (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
840  {
841  /*
842  * Oops, some buffer now needs to be backed up that the caller
843  * didn't back up. Start over.
844  */
847  return InvalidXLogRecPtr;
848  }
849 
850  /*
851  * Reserve space for the record in the WAL. This also sets the xl_prev
852  * pointer.
853  */
854  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
855  &rechdr->xl_prev);
856 
857  /* Normal records are always inserted. */
858  inserted = true;
859  }
860  else if (class == WALINSERT_SPECIAL_SWITCH)
861  {
862  /*
863  * In order to insert an XLOG_SWITCH record, we need to hold all of
864  * the WAL insertion locks, not just one, so that no one else can
865  * begin inserting a record until we've figured out how much space
866  * remains in the current WAL segment and claimed all of it.
867  *
868  * Nonetheless, this case is simpler than the normal cases handled
869  * below, which must check for changes in doPageWrites and RedoRecPtr.
870  * Those checks are only needed for records that can contain buffer
871  * references, and an XLOG_SWITCH record never does.
872  */
873  Assert(fpw_lsn == InvalidXLogRecPtr);
875  inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
876  }
877  else
878  {
880 
881  /*
882  * We need to update both the local and shared copies of RedoRecPtr,
883  * which means that we need to hold all the WAL insertion locks.
884  * However, there can't be any buffer references, so as above, we need
885  * not check RedoRecPtr before inserting the record; we just need to
886  * update it afterwards.
887  */
888  Assert(fpw_lsn == InvalidXLogRecPtr);
890  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
891  &rechdr->xl_prev);
892  RedoRecPtr = Insert->RedoRecPtr = StartPos;
893  inserted = true;
894  }
895 
896  if (inserted)
897  {
898  /*
899  * Now that xl_prev has been filled in, calculate CRC of the record
900  * header.
901  */
902  rdata_crc = rechdr->xl_crc;
903  COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
904  FIN_CRC32C(rdata_crc);
905  rechdr->xl_crc = rdata_crc;
906 
907  /*
908  * All the record data, including the header, is now ready to be
909  * inserted. Copy the record in the space reserved.
910  */
912  class == WALINSERT_SPECIAL_SWITCH, rdata,
913  StartPos, EndPos, insertTLI);
914 
915  /*
916  * Unless record is flagged as not important, update LSN of last
917  * important record in the current slot. When holding all locks, just
918  * update the first one.
919  */
920  if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
921  {
922  int lockno = holdingAllLocks ? 0 : MyLockNo;
923 
924  WALInsertLocks[lockno].l.lastImportantAt = StartPos;
925  }
926  }
927  else
928  {
929  /*
930  * This was an xlog-switch record, but the current insert location was
931  * already exactly at the beginning of a segment, so there was no need
932  * to do anything.
933  */
934  }
935 
936  /*
937  * Done! Let others know that we're finished.
938  */
940 
942 
944 
945  /*
946  * Mark top transaction id is logged (if needed) so that we should not try
947  * to log it again with the next WAL record in the current subtransaction.
948  */
949  if (topxid_included)
951 
952  /*
953  * Update shared LogwrtRqst.Write, if we crossed page boundary.
954  */
955  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
956  {
958  /* advance global request to include new block(s) */
959  if (XLogCtl->LogwrtRqst.Write < EndPos)
960  XLogCtl->LogwrtRqst.Write = EndPos;
961  /* update local result copy while I have the chance */
964  }
965 
966  /*
967  * If this was an XLOG_SWITCH record, flush the record and the empty
968  * padding space that fills the rest of the segment, and perform
969  * end-of-segment actions (eg, notifying archiver).
970  */
971  if (class == WALINSERT_SPECIAL_SWITCH)
972  {
973  TRACE_POSTGRESQL_WAL_SWITCH();
974  XLogFlush(EndPos);
975 
976  /*
977  * Even though we reserved the rest of the segment for us, which is
978  * reflected in EndPos, we return a pointer to just the end of the
979  * xlog-switch record.
980  */
981  if (inserted)
982  {
983  EndPos = StartPos + SizeOfXLogRecord;
984  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
985  {
986  uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
987 
988  if (offset == EndPos % XLOG_BLCKSZ)
989  EndPos += SizeOfXLogLongPHD;
990  else
991  EndPos += SizeOfXLogShortPHD;
992  }
993  }
994  }
995 
996 #ifdef WAL_DEBUG
997  if (XLOG_DEBUG)
998  {
999  static XLogReaderState *debug_reader = NULL;
1000  XLogRecord *record;
1001  DecodedXLogRecord *decoded;
1003  StringInfoData recordBuf;
1004  char *errormsg = NULL;
1005  MemoryContext oldCxt;
1006 
1007  oldCxt = MemoryContextSwitchTo(walDebugCxt);
1008 
1009  initStringInfo(&buf);
1010  appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
1011 
1012  /*
1013  * We have to piece together the WAL record data from the XLogRecData
1014  * entries, so that we can pass it to the rm_desc function as one
1015  * contiguous chunk.
1016  */
1017  initStringInfo(&recordBuf);
1018  for (; rdata != NULL; rdata = rdata->next)
1019  appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1020 
1021  /* We also need temporary space to decode the record. */
1022  record = (XLogRecord *) recordBuf.data;
1023  decoded = (DecodedXLogRecord *)
1025 
1026  if (!debug_reader)
1027  debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1028  XL_ROUTINE(.page_read = NULL,
1029  .segment_open = NULL,
1030  .segment_close = NULL),
1031  NULL);
1032  if (!debug_reader)
1033  {
1034  appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1035  }
1036  else if (!DecodeXLogRecord(debug_reader,
1037  decoded,
1038  record,
1039  EndPos,
1040  &errormsg))
1041  {
1042  appendStringInfo(&buf, "error decoding record: %s",
1043  errormsg ? errormsg : "no error message");
1044  }
1045  else
1046  {
1047  appendStringInfoString(&buf, " - ");
1048 
1049  debug_reader->record = decoded;
1050  xlog_outdesc(&buf, debug_reader);
1051  debug_reader->record = NULL;
1052  }
1053  elog(LOG, "%s", buf.data);
1054 
1055  pfree(decoded);
1056  pfree(buf.data);
1057  pfree(recordBuf.data);
1058  MemoryContextSwitchTo(oldCxt);
1059  }
1060 #endif
1061 
1062  /*
1063  * Update our global variables
1064  */
1065  ProcLastRecPtr = StartPos;
1066  XactLastRecEnd = EndPos;
1067 
1068  /* Report WAL traffic to the instrumentation. */
1069  if (inserted)
1070  {
1071  pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1073  pgWalUsage.wal_fpi += num_fpi;
1074  }
1075 
1076  return EndPos;
1077 }
#define likely(x)
Definition: c.h:297
#define unlikely(x)
Definition: c.h:298
WalUsage pgWalUsage
Definition: instrument.c:22
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:583
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:533
XLogRecPtr XactLastRecEnd
Definition: xlog.c:254
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1215
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1098
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1154
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:106
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1662
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1629
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert(), buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, XLogCtlData::LogwrtResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3061 of file xlog.c.

3062 {
3063  /*
3064  * During recovery, we don't flush WAL but update minRecoveryPoint
3065  * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3066  * would need to be updated.
3067  */
3068  if (RecoveryInProgress())
3069  {
3070  /*
3071  * An invalid minRecoveryPoint means that we need to recover all the
3072  * WAL, i.e., we're doing crash recovery. We never modify the control
3073  * file's value in that case, so we can short-circuit future checks
3074  * here too. This triggers a quick exit path for the startup process,
3075  * which cannot update its local copy of minRecoveryPoint as long as
3076  * it has not replayed all WAL available when doing crash recovery.
3077  */
3079  updateMinRecoveryPoint = false;
3080 
3081  /* Quick exit if already known to be updated or cannot be updated */
3083  return false;
3084 
3085  /*
3086  * Update local copy of minRecoveryPoint. But if the lock is busy,
3087  * just return a conservative guess.
3088  */
3089  if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3090  return true;
3093  LWLockRelease(ControlFileLock);
3094 
3095  /*
3096  * Check minRecoveryPoint for any other process than the startup
3097  * process doing crash recovery, which should not update the control
3098  * file value if crash recovery is still running.
3099  */
3101  updateMinRecoveryPoint = false;
3102 
3103  /* check again */
3105  return false;
3106  else
3107  return true;
3108  }
3109 
3110  /* Quick exit if already known flushed */
3111  if (record <= LogwrtResult.Flush)
3112  return false;
3113 
3114  /* read LogwrtResult and update local state */
3118 
3119  /* check again */
3120  if (record <= LogwrtResult.Flush)
3121  return false;
3122 
3123  return true;
3124 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1343

References ControlFile, XLogwrtResult::Flush, XLogCtlData::info_lck, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, XLogCtlData::LogwrtResult, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RecoveryInProgress(), SpinLockAcquire, SpinLockRelease, updateMinRecoveryPoint, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), and SetHintBits().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 7857 of file xlog.c.

7858 {
7859  XLogBeginInsert();
7860  XLogRegisterData((char *) (&nextOid), sizeof(Oid));
7861  (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
7862 
7863  /*
7864  * We need not flush the NEXTOID record immediately, because any of the
7865  * just-allocated OIDs could only reach disk as part of a tuple insert or
7866  * update that would have its own XLOG record that must follow the NEXTOID
7867  * record. Therefore, the standard buffer LSN interlock applied to those
7868  * records will ensure no such OID reaches disk before the NEXTOID record
7869  * does.
7870  *
7871  * Note, however, that the above statement only covers state "within" the
7872  * database. When we use a generated OID as a file or directory name, we
7873  * are in a sense violating the basic WAL rule, because that filesystem
7874  * change may reach disk before the NEXTOID WAL record does. The impact
7875  * of this is that if a database crash occurs immediately afterward, we
7876  * might after restart re-generate the same OID and find that it conflicts
7877  * with the leftover file or directory. But since for safety's sake we
7878  * always loop until finding a nonconflicting filename, this poses no real
7879  * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
7880  */
7881 }

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1900 of file xlog.c.

1901 {
1902  uint64 fullsegs;
1903  uint32 fullpages;
1904  uint32 offset;
1905  uint64 result;
1906 
1907  XLByteToSeg(ptr, fullsegs, wal_segment_size);
1908 
1909  fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ;
1910  offset = ptr % XLOG_BLCKSZ;
1911 
1912  if (fullpages == 0)
1913  {
1914  result = fullsegs * UsableBytesInSegment;
1915  if (offset > 0)
1916  {
1917  Assert(offset >= SizeOfXLogLongPHD);
1918  result += offset - SizeOfXLogLongPHD;
1919  }
1920  }
1921  else
1922  {
1923  result = fullsegs * UsableBytesInSegment +
1924  (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1925  (fullpages - 1) * UsableBytesInPage; /* full pages */
1926  if (offset > 0)
1927  {
1928  Assert(offset >= SizeOfXLogShortPHD);
1929  result += offset - SizeOfXLogShortPHD;
1930  }
1931  }
1932 
1933  return result;
1934 }

References Assert(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 7937 of file xlog.c.

7938 {
7939  if (wal_level != ControlFile->wal_level ||
7947  {
7948  /*
7949  * The change in number of backend slots doesn't need to be WAL-logged
7950  * if archiving is not enabled, as you can't start archive recovery
7951  * with wal_level=minimal anyway. We don't really care about the
7952  * values in pg_control either if wal_level=minimal, but seems better
7953  * to keep them up-to-date to avoid confusion.
7954  */
7956  {
7957  xl_parameter_change xlrec;
7958  XLogRecPtr recptr;
7959 
7965  xlrec.wal_level = wal_level;
7966  xlrec.wal_log_hints = wal_log_hints;
7968 
7969  XLogBeginInsert();
7970  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
7971 
7972  recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
7973  XLogFlush(recptr);
7974  }
7975 
7976  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7977 
7987 
7988  LWLockRelease(ControlFileLock);
7989  }
7990 }

References ControlFile, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, wal_log_hints, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 7912 of file xlog.c.

7913 {
7914  XLogRecPtr RecPtr;
7915  xl_restore_point xlrec;
7916 
7917  xlrec.rp_time = GetCurrentTimestamp();
7918  strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
7919 
7920  XLogBeginInsert();
7921  XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
7922 
7923  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
7924 
7925  ereport(LOG,
7926  (errmsg("restore point \"%s\" created at %X/%X",
7927  rpName, LSN_FORMAT_ARGS(RecPtr))));
7928 
7929  return RecPtr;
7930 }
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
char rp_name[MAXFNAMELEN]
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2564 of file xlog.c.

2565 {
2566  XLogRecPtr WriteRqstPtr = asyncXactLSN;
2567  bool sleeping;
2568  bool wakeup = false;
2569  XLogRecPtr prevAsyncXactLSN;
2570 
2573  sleeping = XLogCtl->WalWriterSleeping;
2574  prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2575  if (XLogCtl->asyncXactLSN < asyncXactLSN)
2576  XLogCtl->asyncXactLSN = asyncXactLSN;
2578 
2579  /*
2580  * If somebody else already called this function with a more aggressive
2581  * LSN, they will have done what we needed (and perhaps more).
2582  */
2583  if (asyncXactLSN <= prevAsyncXactLSN)
2584  return;
2585 
2586  /*
2587  * If the WALWriter is sleeping, kick it to make it come out of low-power
2588  * mode, so that this async commit will reach disk within the expected
2589  * amount of time. Otherwise, determine whether it has enough WAL
2590  * available to flush, the same way that XLogBackgroundFlush() does.
2591  */
2592  if (sleeping)
2593  wakeup = true;
2594  else
2595  {
2596  int flushblocks;
2597 
2598  flushblocks =
2599  WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2600 
2601  if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2602  wakeup = true;
2603  }
2604 
2607 }
void SetLatch(Latch *latch)
Definition: latch.c:632
PROC_HDR * ProcGlobal
Definition: proc.c:78
Latch * walwriterLatch
Definition: proc.h:412
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:129

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::LogwrtResult, LogwrtResult, ProcGlobal, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterLatch, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4808 of file xlog.c.

4809 {
4810  bool foundCFile,
4811  foundXLog;
4812  char *allocptr;
4813  int i;
4814  ControlFileData *localControlFile;
4815 
4816 #ifdef WAL_DEBUG
4817 
4818  /*
4819  * Create a memory context for WAL debugging that's exempt from the normal
4820  * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4821  * an allocation fails, but wal_debug is not for production use anyway.
4822  */
4823  if (walDebugCxt == NULL)
4824  {
4826  "WAL Debug",
4828  MemoryContextAllowInCriticalSection(walDebugCxt, true);
4829  }
4830 #endif
4831 
4832 
4833  XLogCtl = (XLogCtlData *)
4834  ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
4835 
4836  localControlFile = ControlFile;
4838  ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
4839 
4840  if (foundCFile || foundXLog)
4841  {
4842  /* both should be present or neither */
4843  Assert(foundCFile && foundXLog);
4844 
4845  /* Initialize local copy of WALInsertLocks */
4847 
4848  if (localControlFile)
4849  pfree(localControlFile);
4850  return;
4851  }
4852  memset(XLogCtl, 0, sizeof(XLogCtlData));
4853 
4854  /*
4855  * Already have read control file locally, unless in bootstrap mode. Move
4856  * contents into shared memory.
4857  */
4858  if (localControlFile)
4859  {
4860  memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
4861  pfree(localControlFile);
4862  }
4863 
4864  /*
4865  * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
4866  * multiple of the alignment for same, so no extra alignment padding is
4867  * needed here.
4868  */
4869  allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
4870  XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
4871  allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
4872 
4873  for (i = 0; i < XLOGbuffers; i++)
4874  {
4876  }
4877 
4878  /* WAL insertion locks. Ensure they're aligned to the full padded size */
4879  allocptr += sizeof(WALInsertLockPadded) -
4880  ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
4882  (WALInsertLockPadded *) allocptr;
4883  allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
4884 
4885  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
4886  {
4890  }
4891 
4892  /*
4893  * Align the start of the page buffers to a full xlog block size boundary.
4894  * This simplifies some calculations in XLOG insertion. It is also
4895  * required for O_DIRECT.
4896  */
4897  allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
4898  XLogCtl->pages = allocptr;
4899  memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
4900 
4901  /*
4902  * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
4903  * in additional info.)
4904  */
4908  XLogCtl->WalWriterSleeping = false;
4909 
4913 }
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:448
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:188
MemoryContext TopMemoryContext
Definition: mcxt.c:137
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:682
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:60
int XLogCacheBlck
Definition: xlog.c:501
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:450
slock_t insertpos_lck
Definition: xlog.c:404
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4758
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LWLockInitialize(), LWTRANCHE_WAL_INSERT, MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4758 of file xlog.c.

4759 {
4760  Size size;
4761 
4762  /*
4763  * If the value of wal_buffers is -1, use the preferred auto-tune value.
4764  * This isn't an amazingly clean place to do this, but we must wait till
4765  * NBuffers has received its final value, and must do it before using the
4766  * value of XLOGbuffers to do anything important.
4767  *
4768  * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4769  * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4770  * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4771  * the matter with PGC_S_OVERRIDE.
4772  */
4773  if (XLOGbuffers == -1)
4774  {
4775  char buf[32];
4776 
4777  snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4778  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4780  if (XLOGbuffers == -1) /* failed to apply it? */
4781  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4782  PGC_S_OVERRIDE);
4783  }
4784  Assert(XLOGbuffers > 0);
4785 
4786  /* XLogCtl */
4787  size = sizeof(XLogCtlData);
4788 
4789  /* WAL insertion locks, plus alignment */
4791  /* xlblocks array */
4793  /* extra alignment padding for XLOG I/O buffers */
4794  size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4795  /* and the buffers themselves */
4796  size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4797 
4798  /*
4799  * Note: we don't count ControlFileData, it comes out of the "slop factor"
4800  * added by CreateSharedMemoryAndSemaphores. This lets us use this
4801  * routine again below to compute the actual allocation size.
4802  */
4803 
4804  return size;
4805 }
#define Max(x, y)
Definition: c.h:985
@ PGC_S_OVERRIDE
Definition: guc.h:119
@ PGC_POSTMASTER
Definition: guc.h:70
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

References add_size(), Assert(), buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), size, snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9306 of file xlog.c.

9307 {
9308  ShutdownWalRcv();
9309 
9310  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9312  LWLockRelease(ControlFileLock);
9313 }
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2273 of file xlog.c.

2274 {
2275  bool ispartialpage;
2276  bool last_iteration;
2277  bool finishing_seg;
2278  int curridx;
2279  int npages;
2280  int startidx;
2281  uint32 startoffset;
2282 
2283  /* We should always be inside a critical section here */
2284  Assert(CritSectionCount > 0);
2285 
2286  /*
2287  * Update local LogwrtResult (caller probably did this already, but...)
2288  */
2290 
2291  /*
2292  * Since successive pages in the xlog cache are consecutively allocated,
2293  * we can usually gather multiple pages together and issue just one
2294  * write() call. npages is the number of pages we have determined can be
2295  * written together; startidx is the cache block index of the first one,
2296  * and startoffset is the file offset at which it should go. The latter
2297  * two variables are only valid when npages > 0, but we must initialize
2298  * all of them to keep the compiler quiet.
2299  */
2300  npages = 0;
2301  startidx = 0;
2302  startoffset = 0;
2303 
2304  /*
2305  * Within the loop, curridx is the cache block index of the page to
2306  * consider writing. Begin at the buffer containing the next unwritten
2307  * page, or last partially written page.
2308  */
2310 
2311  while (LogwrtResult.Write < WriteRqst.Write)
2312  {
2313  /*
2314  * Make sure we're not ahead of the insert process. This could happen
2315  * if we're passed a bogus WriteRqst.Write that is past the end of the
2316  * last page that's been initialized by AdvanceXLInsertBuffer.
2317  */
2318  XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
2319 
2320  if (LogwrtResult.Write >= EndPtr)
2321  elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
2323  LSN_FORMAT_ARGS(EndPtr));
2324 
2325  /* Advance LogwrtResult.Write to end of current buffer page */
2326  LogwrtResult.Write = EndPtr;
2327  ispartialpage = WriteRqst.Write < LogwrtResult.Write;
2328 
2331  {
2332  /*
2333  * Switch to new logfile segment. We cannot have any pending
2334  * pages here (since we dump what we have at segment end).
2335  */
2336  Assert(npages == 0);
2337  if (openLogFile >= 0)
2338  XLogFileClose();
2341  openLogTLI = tli;
2342 
2343  /* create/use new log file */
2346  }
2347 
2348  /* Make sure we have the current logfile open */
2349  if (openLogFile < 0)
2350  {
2353  openLogTLI = tli;
2356  }
2357 
2358  /* Add current page to the set of pending pages-to-dump */
2359  if (npages == 0)
2360  {
2361  /* first of group */
2362  startidx = curridx;
2363  startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
2365  }
2366  npages++;
2367 
2368  /*
2369  * Dump the set if this will be the last loop iteration, or if we are
2370  * at the last page of the cache area (since the next page won't be
2371  * contiguous in memory), or if we are at the end of the logfile
2372  * segment.
2373  */
2374  last_iteration = WriteRqst.Write <= LogwrtResult.Write;
2375 
2376  finishing_seg = !ispartialpage &&
2377  (startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;
2378 
2379  if (last_iteration ||
2380  curridx == XLogCtl->XLogCacheBlck ||
2381  finishing_seg)
2382  {
2383  char *from;
2384  Size nbytes;
2385  Size nleft;
2386  ssize_t written;
2387  instr_time start;
2388 
2389  /* OK to write the page(s) */
2390  from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2391  nbytes = npages * (Size) XLOG_BLCKSZ;
2392  nleft = nbytes;
2393  do
2394  {
2395  errno = 0;
2396 
2397  /* Measure I/O timing to write WAL data */
2398  if (track_wal_io_timing)
2399  INSTR_TIME_SET_CURRENT(start);
2400  else
2401  INSTR_TIME_SET_ZERO(start);
2402 
2403  pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
2404  written = pg_pwrite(openLogFile, from, nleft, startoffset);
2406 
2407  /*
2408  * Increment the I/O timing and the number of times WAL data
2409  * were written out to disk.
2410  */
2411  if (track_wal_io_timing)
2412  {
2413  instr_time end;
2414 
2417  }
2418 
2420 
2421  if (written <= 0)
2422  {
2423  char xlogfname[MAXFNAMELEN];
2424  int save_errno;
2425 
2426  if (errno == EINTR)
2427  continue;
2428 
2429  save_errno = errno;
2430  XLogFileName(xlogfname, tli, openLogSegNo,
2432  errno = save_errno;
2433  ereport(PANIC,
2435  errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2436  xlogfname, startoffset, nleft)));
2437  }
2438  nleft -= written;
2439  from += written;
2440  startoffset += written;
2441  } while (nleft > 0);
2442 
2443  npages = 0;
2444 
2445  /*
2446  * If we just wrote the whole last page of a logfile segment,
2447  * fsync the segment immediately. This avoids having to go back
2448  * and re-open prior segments when an fsync request comes along
2449  * later. Doing it here ensures that one and only one backend will
2450  * perform this fsync.
2451  *
2452  * This is also the right place to notify the Archiver that the
2453  * segment is ready to copy to archival storage, and to update the
2454  * timer for archive_timeout, and to signal for a checkpoint if
2455  * too many logfile segments have been used since the last
2456  * checkpoint.
2457  */
2458  if (finishing_seg)
2459  {
2461 
2462  /* signal that we need to wakeup walsenders later */
2464 
2465  LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2466 
2467  if (XLogArchivingActive())
2469 
2470  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
2472 
2473  /*
2474  * Request a checkpoint if we've consumed too much xlog since
2475  * the last one. For speed, we first check using the local
2476  * copy of RedoRecPtr, which might be out of date; if it looks
2477  * like a checkpoint is needed, forcibly update RedoRecPtr and
2478  * recheck.
2479  */
2481  {
2482  (void) GetRedoRecPtr();
2485  }
2486  }
2487  }
2488 
2489  if (ispartialpage)
2490  {
2491  /* Only asked to write a partial page */
2492  LogwrtResult.Write = WriteRqst.Write;
2493  break;
2494  }
2495  curridx = NextBufIdx(curridx);
2496 
2497  /* If flexible, break out of loop as soon as we wrote something */
2498  if (flexible && npages == 0)
2499  break;
2500  }
2501 
2502  Assert(npages == 0);
2503 
2504  /*
2505  * If asked to flush, do so
2506  */
2507  if (LogwrtResult.Flush < WriteRqst.Flush &&
2509  {
2510  /*
2511  * Could get here without iterating above loop, in which case we might
2512  * have no open file or the wrong one. However, we do not need to
2513  * fsync more than one file.
2514  */
2517  {
2518  if (openLogFile >= 0 &&
2521  XLogFileClose();
2522  if (openLogFile < 0)
2523  {
2526  openLogTLI = tli;
2529  }
2530 
2532  }
2533 
2534  /* signal that we need to wakeup walsenders later */
2536 
2538  }
2539 
2540  /*
2541  * Update shared-memory status
2542  *
2543  * We make sure that the shared 'request' values do not fall behind the
2544  * 'result' values. This is not absolutely essential, but it saves some
2545  * code in a couple of places.
2546  */
2547  {
2555  }
2556 }
void ReserveExternalFD(void)
Definition: fd.c:1221
volatile uint32 CritSectionCount
Definition: globals.c:43
PgStat_Counter wal_write
Definition: pgstat.h:453
instr_time wal_write_time
Definition: pgstat.h:455
#define WalSndWakeupRequest()
Definition: walsender.h:59
#define EINTR
Definition: win32_port.h:374
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6304
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3548
#define NextBufIdx(idx)
Definition: xlog.c:586
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:8516
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition: xlog.c:2249
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
Definition: xlogarchive.c:492

References Assert(), CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), XLogwrtRqst::Flush, XLogwrtResult::Flush, GetRedoRecPtr(), XLogCtlData::info_lck, INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::LogwrtResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, PendingWalStats, pg_atomic_read_u64(), pg_pwrite, pgstat_report_wait_end(), pgstat_report_wait_start(), RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_write, PgStat_PendingWalStats::wal_write_time, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:65
@ ARCHIVE_MODE_OFF
Definition: xlog.h:63
@ ARCHIVE_MODE_ON
Definition: xlog.h:64

Definition at line 166 of file xlog.c.

◆ bootstrap_data_checksum_version

uint32 bootstrap_data_checksum_version
extern

Definition at line 44 of file bootstrap.c.

Referenced by BootstrapModeMain(), and InitControlFile().

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 166 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 159 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 132 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 133 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 122 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 217 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 224 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 236 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 135 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 115 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 625 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 626 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 160 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

bool track_wal_io_timing = false

Definition at line 137 of file xlog.c.

Referenced by issue_xlog_fsync(), and XLogWrite().

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 637 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 124 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 126 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 125 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 136 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 127 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 116 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 123 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 128 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 143 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 166 of file xlog.c.

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

char* XLogArchiveCommand = NULL

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 118 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 117 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl