PostgreSQL Source Code  git master
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/tuptoaster.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogutils.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/walwriter.h"
#include "postmaster/startup.h"
#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/origin.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/pmsignal.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "utils/backend_random.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/pg_lsn.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timestamp.h"
#include "pg_trace.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 
struct  XLogPageReadPrivate
 

Macros

#define RECOVERY_COMMAND_FILE   "recovery.conf"
 
#define RECOVERY_COMMAND_DONE   "recovery.done"
 
#define PROMOTE_SIGNAL_FILE   "promote"
 
#define FALLBACK_PROMOTE_SIGNAL_FILE   "fallback_promote"
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)   (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)   (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)   (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   (x / ((segsize) / (1024 * 1024)))
 
#define RecoveryRequiresIntParameter(param_name, currValue, minValue)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef enum ExclusiveBackupState ExclusiveBackupState
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 
typedef struct XLogPageReadPrivate XLogPageReadPrivate
 

Enumerations

enum  ExclusiveBackupState { EXCLUSIVE_BACKUP_NONE = 0, EXCLUSIVE_BACKUP_STARTING, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_STOPPING }
 
enum  XLogSource { XLOG_FROM_ANY = 0, XLOG_FROM_ARCHIVE, XLOG_FROM_PG_WAL, XLOG_FROM_STREAM }
 

Functions

static void readRecoveryCommandFile (void)
 
static void exitArchiveRecovery (TimeLineID endTLI, XLogRecPtr endOfLog)
 
static bool recoveryStopsBefore (XLogReaderState *record)
 
static bool recoveryStopsAfter (XLogReaderState *record)
 
static void recoveryPausesHere (void)
 
static bool recoveryApplyDelay (XLogReaderState *record)
 
static void SetLatestXTime (TimestampTz xtime)
 
static void SetCurrentChunkStartTime (TimestampTz xtime)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static void checkTimeLineSwitch (XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI)
 
static void LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, bool opportunistic)
 
static bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
static void XLogWrite (XLogwrtRqst WriteRqst, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, bool use_lock)
 
static int XLogFileRead (XLogSegNo segno, int emode, TimeLineID tli, int source, bool notfoundOk)
 
static int XLogFileReadAnyTLI (XLogSegNo segno, int emode, int source)
 
static int XLogPageRead (XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI)
 
static bool WaitForWALToBecomeAvailable (XLogRecPtr RecPtr, bool randAccess, bool fetching_ckpt, XLogRecPtr tliRecPtr)
 
static int emode_for_corrupt_record (int emode, XLogRecPtr RecPtr)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
 
static void RemoveXlogFile (const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static XLogRecordReadRecord (XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode, bool fetching_ckpt)
 
static void CheckRecoveryConsistency (void)
 
static XLogRecordReadCheckpointRecord (XLogReaderState *xlogreader, XLogRecPtr RecPtr, int whichChkpti, bool report)
 
static bool rescanLatestTimeLine (void)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static char * str_time (pg_time_t tnow)
 
static bool CheckForStandbyTrigger (void)
 
static void xlog_outdesc (StringInfo buf, XLogReaderState *record)
 
static void pg_start_backup_callback (int code, Datum arg)
 
static void pg_stop_backup_callback (int code, Datum arg)
 
static bool read_backup_label (XLogRecPtr *checkPointLoc, bool *backupEndRequired, bool *backupFromStandby)
 
static bool read_tablespace_map (List **tablespaces)
 
static void rm_redo_error_callback (void *arg)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void checkXLogConsistency (XLogReaderState *record)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
static XLogSegNo XLOGfileslop (XLogRecPtr RedoRecPtr)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
int XLogFileInit (XLogSegNo logsegno, bool *use_existent, bool use_lock)
 
static void XLogFileCopy (XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
static void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
void UpdateControlFile (void)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
void LocalProcessControlFile (bool reset)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (void)
 
static bool getRecordTimestamp (XLogReaderState *record, TimestampTz *recordXtime)
 
bool RecoveryIsPaused (void)
 
void SetRecoveryPause (bool recoveryPause)
 
TimestampTz GetLatestXTime (void)
 
TimestampTz GetCurrentChunkReplayStartTime (void)
 
void GetXLogReceiptTime (TimestampTz *rtime, bool *fromStream)
 
void StartupXLOG (void)
 
bool RecoveryInProgress (void)
 
bool HotStandbyActive (void)
 
bool HotStandbyActiveInReplay (void)
 
bool XLogInsertAllowed (void)
 
void InitXLOGAccess (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void GetNextXidAndEpoch (TransactionId *xid, uint32 *epoch)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
void CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint)
 
bool CreateRestartPoint (int flags)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_xlog_sync_method (int new_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno)
 
char * XLogFileNameP (TimeLineID tli, XLogSegNo segno)
 
XLogRecPtr do_pg_start_backup (const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, bool needtblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
XLogRecPtr do_pg_stop_backup (char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
 
void do_pg_abort_backup (void)
 
XLogRecPtr GetXLogReplayRecPtr (TimeLineID *replayTLI)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
bool BackupInProgress (void)
 
void CancelBackup (void)
 
void RemovePromoteSignalFiles (void)
 
bool CheckPromoteSignal (void)
 
void WakeupRecovery (void)
 
void SetWalWriterSleeping (bool sleeping)
 
void XLogRequestWalReceiverReply (void)
 

Variables

uint32 bootstrap_data_checksum_version
 
int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_segments = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
bool wal_compression = false
 
char * wal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool log_checkpoints = false
 
int sync_method = DEFAULT_SYNC_METHOD
 
int wal_level = WAL_LEVEL_MINIMAL
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
const struct config_enum_entry sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
TimeLineID ThisTimeLineID = 0
 
bool InRecovery = false
 
HotStandbyState standbyState = STANDBY_DISABLED
 
static XLogRecPtr LastRec
 
static XLogRecPtr receivedUpto = 0
 
static TimeLineID receiveTLI = 0
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static bool LocalHotStandbyActive = false
 
static int LocalXLogInsertAllowed = -1
 
bool ArchiveRecoveryRequested = false
 
bool InArchiveRecovery = false
 
static bool restoredFromArchive = false
 
static char * replay_image_masked = NULL
 
static char * master_image_masked = NULL
 
char * recoveryRestoreCommand = NULL
 
static char * recoveryEndCommand = NULL
 
static char * archiveCleanupCommand = NULL
 
static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET
 
static bool recoveryTargetInclusive = true
 
static RecoveryTargetAction recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE
 
static TransactionId recoveryTargetXid
 
static TimestampTz recoveryTargetTime
 
static char * recoveryTargetName
 
static XLogRecPtr recoveryTargetLSN
 
static int recovery_min_apply_delay = 0
 
static TimestampTz recoveryDelayUntilTime
 
static bool StandbyModeRequested = false
 
static char * PrimaryConnInfo = NULL
 
static char * PrimarySlotName = NULL
 
static char * TriggerFile = NULL
 
bool StandbyMode = false
 
static bool fast_promote = false
 
static TransactionId recoveryStopXid
 
static TimestampTz recoveryStopTime
 
static XLogRecPtr recoveryStopLSN
 
static char recoveryStopName [MAXFNAMELEN]
 
static bool recoveryStopAfter
 
static TimeLineID recoveryTargetTLI
 
static bool recoveryTargetIsLatest = false
 
static ListexpectedTLEs
 
static TimeLineID curFileTLI
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static bool doRequestWalReceiverReply
 
static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static const char * xlogSourceNames [] = {"any", "archive", "pg_wal", "stream"}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static uint32 openLogOff = 0
 
static int readFile = -1
 
static XLogSegNo readSegNo = 0
 
static uint32 readOff = 0
 
static uint32 readLen = 0
 
static XLogSource readSource = 0
 
static XLogSource currentSource = 0
 
static bool lastSourceFailed = false
 
static TimestampTz XLogReceiptTime = 0
 
static XLogSource XLogReceiptSource = 0
 
static XLogRecPtr ReadRecPtr
 
static XLogRecPtr EndRecPtr
 
static XLogRecPtr minRecoveryPoint
 
static TimeLineID minRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
bool reachedConsistency = false
 
static bool InRedo = false
 
static bool bgwriterLaunched = false
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    (x / ((segsize) / (1024 * 1024)))

Definition at line 741 of file xlog.c.

Referenced by CalculateCheckpointSegments(), ReadControlFile(), and XLOGfileslop().

◆ FALLBACK_PROMOTE_SIGNAL_FILE

#define FALLBACK_PROMOTE_SIGNAL_FILE   "fallback_promote"

Definition at line 85 of file xlog.c.

Referenced by CheckForStandbyTrigger(), CheckPromoteSignal(), and RemovePromoteSignalFiles().

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 721 of file xlog.c.

Referenced by CopyXLogRecordToWAL(), and CreateCheckPoint().

◆ NextBufIdx

#define NextBufIdx (   idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 725 of file xlog.c.

Referenced by XLogWrite().

◆ NUM_XLOGINSERT_LOCKS

◆ PROMOTE_SIGNAL_FILE

#define PROMOTE_SIGNAL_FILE   "promote"

Definition at line 84 of file xlog.c.

Referenced by CheckForStandbyTrigger(), CheckPromoteSignal(), and RemovePromoteSignalFiles().

◆ RECOVERY_COMMAND_DONE

#define RECOVERY_COMMAND_DONE   "recovery.done"

Definition at line 83 of file xlog.c.

Referenced by exitArchiveRecovery().

◆ RECOVERY_COMMAND_FILE

#define RECOVERY_COMMAND_FILE   "recovery.conf"

Definition at line 82 of file xlog.c.

Referenced by exitArchiveRecovery(), and readRecoveryCommandFile().

◆ RecoveryRequiresIntParameter

#define RecoveryRequiresIntParameter (   param_name,
  currValue,
  minValue 
)
Value:
do { \
if ((currValue) < (minValue)) \
ereport(ERROR, \
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
errmsg("hot standby is not possible because " \
"%s = %d is a lower setting than on the master server " \
"(its value was %d)", \
param_name, \
currValue, \
minValue))); \
} while(0)
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
int errmsg(const char *fmt,...)
Definition: elog.c:797

Definition at line 6301 of file xlog.c.

Referenced by CheckRequiredParameterValues().

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 732 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), GetXLogBuffer(), StartupXLOG(), and XLogWrite().

Typedef Documentation

◆ ExclusiveBackupState

◆ WALInsertLockPadded

◆ XLogCtlData

◆ XLogCtlInsert

◆ XLogPageReadPrivate

◆ XLogwrtResult

◆ XLogwrtRqst

Enumeration Type Documentation

◆ ExclusiveBackupState

Enumerator
EXCLUSIVE_BACKUP_NONE 
EXCLUSIVE_BACKUP_STARTING 
EXCLUSIVE_BACKUP_IN_PROGRESS 
EXCLUSIVE_BACKUP_STOPPING 

Definition at line 500 of file xlog.c.

◆ XLogSource

enum XLogSource
Enumerator
XLOG_FROM_ANY 
XLOG_FROM_ARCHIVE 
XLOG_FROM_PG_WAL 
XLOG_FROM_STREAM 

Definition at line 757 of file xlog.c.

758 {
759  XLOG_FROM_ANY = 0, /* request to read WAL from any source */
760  XLOG_FROM_ARCHIVE, /* restored using restore_command */
761  XLOG_FROM_PG_WAL, /* existing file in pg_wal */
762  XLOG_FROM_STREAM /* streamed from master */
763 } XLogSource;
XLogSource
Definition: xlog.c:757

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
bool  opportunistic 
)
static

Definition at line 2081 of file xlog.c.

References Assert, DEBUG1, elog, XLogwrtRqst::Flush, XLogCtlInsert::forcePageWrites, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, Insert(), XLogCtlData::Insert, InvalidXLogRecPtr, XLogCtlData::LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_write_barrier, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, ThisTimeLineID, WaitXLogInsertionsToFinish(), wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLP_LONG_HEADER, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

2082 {
2084  int nextidx;
2085  XLogRecPtr OldPageRqstPtr;
2086  XLogwrtRqst WriteRqst;
2087  XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
2088  XLogRecPtr NewPageBeginPtr;
2089  XLogPageHeader NewPage;
2090  int npages = 0;
2091 
2092  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2093 
2094  /*
2095  * Now that we have the lock, check if someone initialized the page
2096  * already.
2097  */
2098  while (upto >= XLogCtl->InitializedUpTo || opportunistic)
2099  {
2101 
2102  /*
2103  * Get ending-offset of the buffer page we need to replace (this may
2104  * be zero if the buffer hasn't been used yet). Fall through if it's
2105  * already written out.
2106  */
2107  OldPageRqstPtr = XLogCtl->xlblocks[nextidx];
2108  if (LogwrtResult.Write < OldPageRqstPtr)
2109  {
2110  /*
2111  * Nope, got work to do. If we just want to pre-initialize as much
2112  * as we can without flushing, give up now.
2113  */
2114  if (opportunistic)
2115  break;
2116 
2117  /* Before waiting, get info_lck and update LogwrtResult */
2119  if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
2120  XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
2123 
2124  /*
2125  * Now that we have an up-to-date LogwrtResult value, see if we
2126  * still need to write it or if someone else already did.
2127  */
2128  if (LogwrtResult.Write < OldPageRqstPtr)
2129  {
2130  /*
2131  * Must acquire write lock. Release WALBufMappingLock first,
2132  * to make sure that all insertions that we need to wait for
2133  * can finish (up to this same position). Otherwise we risk
2134  * deadlock.
2135  */
2136  LWLockRelease(WALBufMappingLock);
2137 
2138  WaitXLogInsertionsToFinish(OldPageRqstPtr);
2139 
2140  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2141 
2143  if (LogwrtResult.Write >= OldPageRqstPtr)
2144  {
2145  /* OK, someone wrote it already */
2146  LWLockRelease(WALWriteLock);
2147  }
2148  else
2149  {
2150  /* Have to write it ourselves */
2151  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2152  WriteRqst.Write = OldPageRqstPtr;
2153  WriteRqst.Flush = 0;
2154  XLogWrite(WriteRqst, false);
2155  LWLockRelease(WALWriteLock);
2156  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2157  }
2158  /* Re-acquire WALBufMappingLock and retry */
2159  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2160  continue;
2161  }
2162  }
2163 
2164  /*
2165  * Now the next buffer slot is free and we can set it up to be the
2166  * next output page.
2167  */
2168  NewPageBeginPtr = XLogCtl->InitializedUpTo;
2169  NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2170 
2171  Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
2172 
2173  NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2174 
2175  /*
2176  * Be sure to re-zero the buffer so that bytes beyond what we've
2177  * written will look like zeroes and not valid XLOG records...
2178  */
2179  MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
2180 
2181  /*
2182  * Fill the new page's header
2183  */
2184  NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2185 
2186  /* NewPage->xlp_info = 0; */ /* done by memset */
2187  NewPage->xlp_tli = ThisTimeLineID;
2188  NewPage->xlp_pageaddr = NewPageBeginPtr;
2189 
2190  /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2191 
2192  /*
2193  * If online backup is not in progress, mark the header to indicate
2194  * that WAL records beginning in this page have removable backup
2195  * blocks. This allows the WAL archiver to know whether it is safe to
2196  * compress archived WAL data by transforming full-block records into
2197  * the non-full-block format. It is sufficient to record this at the
2198  * page level because we force a page switch (in fact a segment
2199  * switch) when starting a backup, so the flag will be off before any
2200  * records can be written during the backup. At the end of a backup,
2201  * the last page will be marked as all unsafe when perhaps only part
2202  * is unsafe, but at worst the archiver would miss the opportunity to
2203  * compress a few records.
2204  */
2205  if (!Insert->forcePageWrites)
2206  NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2207 
2208  /*
2209  * If first page of an XLOG segment file, make it a long header.
2210  */
2211  if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2212  {
2213  XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2214 
2215  NewLongPage->xlp_sysid = ControlFile->system_identifier;
2216  NewLongPage->xlp_seg_size = wal_segment_size;
2217  NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2218  NewPage->xlp_info |= XLP_LONG_HEADER;
2219  }
2220 
2221  /*
2222  * Make sure the initialization of the page becomes visible to others
2223  * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2224  * holding a lock.
2225  */
2226  pg_write_barrier();
2227 
2228  *((volatile XLogRecPtr *) &XLogCtl->xlblocks[nextidx]) = NewPageEndPtr;
2229 
2230  XLogCtl->InitializedUpTo = NewPageEndPtr;
2231 
2232  npages++;
2233  }
2234  LWLockRelease(WALBufMappingLock);
2235 
2236 #ifdef WAL_DEBUG
2237  if (XLOG_DEBUG && npages > 0)
2238  {
2239  elog(DEBUG1, "initialized %d pages, up to %X/%X",
2240  npages, (uint32) (NewPageEndPtr >> 32), (uint32) NewPageEndPtr);
2241  }
2242 #endif
2243 }
XLogRecPtr InitializedUpTo
Definition: xlog.c:613
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
int wal_segment_size
Definition: xlog.c:113
XLogRecPtr * xlblocks
Definition: xlog.c:621
static XLogwrtResult LogwrtResult
Definition: xlog.c:751
slock_t info_lck
Definition: xlog.c:704
#define MemSet(start, val, len)
Definition: c.h:941
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1744
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:57
XLogCtlInsert Insert
Definition: xlog.c:577
XLogRecPtr Flush
Definition: xlog.c:420
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:74
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:81
bool forcePageWrites
Definition: xlog.c:551
uint64 system_identifier
Definition: pg_control.h:106
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
XLogwrtResult LogwrtResult
Definition: xlog.c:601
unsigned int uint32
Definition: c.h:358
static void Insert(File file)
Definition: fd.c:1062
TimeLineID xlp_tli
Definition: xlog_internal.h:40
static void XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
Definition: xlog.c:2371
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr Write
Definition: xlog.c:419
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
static ControlFileData * ControlFile
Definition: xlog.c:715
XLogwrtRqst LogwrtRqst
Definition: xlog.c:580
TimeLineID ThisTimeLineID
Definition: xlog.c:181
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
#define XLP_LONG_HEADER
Definition: xlog_internal.h:79
size_t Size
Definition: c.h:466
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:732
XLogRecPtr Write
Definition: xlog.c:425
#define elog(elevel,...)
Definition: elog.h:226
#define pg_write_barrier()
Definition: atomics.h:162
char * pages
Definition: xlog.c:620

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2286 of file xlog.c.

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

2287 {
2290 }
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2250
#define newval
double CheckPointCompletionTarget
Definition: checkpointer.c:147

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2279 of file xlog.c.

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

2280 {
2283 }
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2250
int max_wal_size_mb
Definition: xlog.c:89
#define newval

◆ assign_xlog_sync_method()

void assign_xlog_sync_method ( int  new_sync_method,
void *  extra 
)

Definition at line 10265 of file xlog.c.

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), openLogFile, openLogSegNo, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), sync_method, ThisTimeLineID, WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN, XLogFileClose(), and XLogFileNameP().

10266 {
10267  if (sync_method != new_sync_method)
10268  {
10269  /*
10270  * To ensure that no blocks escape unsynced, force an fsync on the
10271  * currently open log segment (if any). Also, if the open flag is
10272  * changing, close the log file so it will be reopened (with new flag
10273  * bit) at next use.
10274  */
10275  if (openLogFile >= 0)
10276  {
10278  if (pg_fsync(openLogFile) != 0)
10279  ereport(PANIC,
10281  errmsg("could not fsync file \"%s\": %m",
10284  if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
10285  XLogFileClose();
10286  }
10287  }
10288 }
static int get_sync_bit(int method)
Definition: xlog.c:10209
#define PANIC
Definition: elog.h:53
static XLogSegNo openLogSegNo
Definition: xlog.c:775
static void XLogFileClose(void)
Definition: xlog.c:3764
char * XLogFileNameP(TimeLineID tli, XLogSegNo segno)
Definition: xlog.c:10343
int errcode_for_file_access(void)
Definition: elog.c:598
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1261
#define ereport(elevel, rest)
Definition: elog.h:141
static int openLogFile
Definition: xlog.c:774
TimeLineID ThisTimeLineID
Definition: xlog.c:181
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1237
int sync_method
Definition: xlog.c:103
int errmsg(const char *fmt,...)
Definition: elog.c:797
int pg_fsync(int fd)
Definition: fd.c:341

◆ BackupInProgress()

bool BackupInProgress ( void  )

Definition at line 11636 of file xlog.c.

References BACKUP_LABEL_FILE, and stat.

Referenced by pg_is_in_backup(), and PostmasterStateMachine().

11637 {
11638  struct stat stat_buf;
11639 
11640  return (stat(BACKUP_LABEL_FILE, &stat_buf) == 0);
11641 }
struct stat stat_buf
Definition: pg_standby.c:102
#define stat(a, b)
Definition: win32_port.h:266
#define BACKUP_LABEL_FILE
Definition: xlog.h:322

◆ BootStrapXLOG()

void BootStrapXLOG ( void  )

Definition at line 5106 of file xlog.c.

References AdvanceOldestClogXid(), Assert, bootstrap_data_checksum_version, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), buffer, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstBootstrapObjectId, FirstMultiXactId, FirstNormalTransactionId, CheckPoint::fullPageWrites, fullPageWrites, gettimeofday(), INIT_CRC32C, InvalidTransactionId, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, VariableCacheData::nextXid, CheckPoint::nextXidEpoch, offsetof, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, palloc(), PANIC, pfree(), pg_backend_random(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetMultiXactIdLimit(), SetTransactionIdLimit(), ShmemVariableCache, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, ControlFileData::state, ControlFileData::system_identifier, CheckPoint::ThisTimeLineID, ThisTimeLineID, CheckPoint::time, ControlFileData::time, track_commit_timestamp, ControlFileData::track_commit_timestamp, TYPEALIGN, ControlFileData::unloggedLSN, WAIT_EVENT_WAL_BOOTSTRAP_SYNC, WAIT_EVENT_WAL_BOOTSTRAP_WRITE, wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by AuxiliaryProcessMain().

5107 {
5108  CheckPoint checkPoint;
5109  char *buffer;
5110  XLogPageHeader page;
5111  XLogLongPageHeader longpage;
5112  XLogRecord *record;
5113  char *recptr;
5114  bool use_existent;
5115  uint64 sysidentifier;
5116  char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
5117  struct timeval tv;
5118  pg_crc32c crc;
5119 
5120  /*
5121  * Select a hopefully-unique system identifier code for this installation.
5122  * We use the result of gettimeofday(), including the fractional seconds
5123  * field, as being about as unique as we can easily get. (Think not to
5124  * use random(), since it hasn't been seeded and there's no portable way
5125  * to seed it other than the system clock value...) The upper half of the
5126  * uint64 value is just the tv_sec part, while the lower half contains the
5127  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5128  * PID for a little extra uniqueness. A person knowing this encoding can
5129  * determine the initialization time of the installation, which could
5130  * perhaps be useful sometimes.
5131  */
5132  gettimeofday(&tv, NULL);
5133  sysidentifier = ((uint64) tv.tv_sec) << 32;
5134  sysidentifier |= ((uint64) tv.tv_usec) << 12;
5135  sysidentifier |= getpid() & 0xFFF;
5136 
5137  /*
5138  * Generate a random nonce. This is used for authentication requests that
5139  * will fail because the user does not exist. The nonce is used to create
5140  * a genuine-looking password challenge for the non-existent user, in lieu
5141  * of an actual stored password.
5142  */
5143  if (!pg_backend_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
5144  ereport(PANIC,
5145  (errcode(ERRCODE_INTERNAL_ERROR),
5146  errmsg("could not generate secret authorization token")));
5147 
5148  /* First timeline ID is always 1 */
5149  ThisTimeLineID = 1;
5150 
5151  /* page buffer must be aligned suitably for O_DIRECT */
5152  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5153  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5154  memset(page, 0, XLOG_BLCKSZ);
5155 
5156  /*
5157  * Set up information for the initial checkpoint record
5158  *
5159  * The initial checkpoint record is written to the beginning of the WAL
5160  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5161  * used, so that we can use 0/0 to mean "before any valid WAL segment".
5162  */
5163  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
5164  checkPoint.ThisTimeLineID = ThisTimeLineID;
5165  checkPoint.PrevTimeLineID = ThisTimeLineID;
5166  checkPoint.fullPageWrites = fullPageWrites;
5167  checkPoint.nextXidEpoch = 0;
5168  checkPoint.nextXid = FirstNormalTransactionId;
5169  checkPoint.nextOid = FirstBootstrapObjectId;
5170  checkPoint.nextMulti = FirstMultiXactId;
5171  checkPoint.nextMultiOffset = 0;
5172  checkPoint.oldestXid = FirstNormalTransactionId;
5173  checkPoint.oldestXidDB = TemplateDbOid;
5174  checkPoint.oldestMulti = FirstMultiXactId;
5175  checkPoint.oldestMultiDB = TemplateDbOid;
5178  checkPoint.time = (pg_time_t) time(NULL);
5180 
5181  ShmemVariableCache->nextXid = checkPoint.nextXid;
5182  ShmemVariableCache->nextOid = checkPoint.nextOid;
5184  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5185  AdvanceOldestClogXid(checkPoint.oldestXid);
5186  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5187  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5189 
5190  /* Set up the XLOG page header */
5191  page->xlp_magic = XLOG_PAGE_MAGIC;
5192  page->xlp_info = XLP_LONG_HEADER;
5193  page->xlp_tli = ThisTimeLineID;
5195  longpage = (XLogLongPageHeader) page;
5196  longpage->xlp_sysid = sysidentifier;
5197  longpage->xlp_seg_size = wal_segment_size;
5198  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5199 
5200  /* Insert the initial checkpoint record */
5201  recptr = ((char *) page + SizeOfXLogLongPHD);
5202  record = (XLogRecord *) recptr;
5203  record->xl_prev = 0;
5204  record->xl_xid = InvalidTransactionId;
5205  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5207  record->xl_rmid = RM_XLOG_ID;
5208  recptr += SizeOfXLogRecord;
5209  /* fill the XLogRecordDataHeaderShort struct */
5210  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5211  *(recptr++) = sizeof(checkPoint);
5212  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5213  recptr += sizeof(checkPoint);
5214  Assert(recptr - (char *) record == record->xl_tot_len);
5215 
5216  INIT_CRC32C(crc);
5217  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5218  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5219  FIN_CRC32C(crc);
5220  record->xl_crc = crc;
5221 
5222  /* Create first XLOG segment file */
5223  use_existent = false;
5224  openLogFile = XLogFileInit(1, &use_existent, false);
5225 
5226  /* Write the first page with the initial record */
5227  errno = 0;
5229  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5230  {
5231  /* if write didn't set errno, assume problem is no disk space */
5232  if (errno == 0)
5233  errno = ENOSPC;
5234  ereport(PANIC,
5236  errmsg("could not write bootstrap write-ahead log file: %m")));
5237  }
5239 
5241  if (pg_fsync(openLogFile) != 0)
5242  ereport(PANIC,
5244  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5246 
5247  if (close(openLogFile))
5248  ereport(PANIC,
5250  errmsg("could not close bootstrap write-ahead log file: %m")));
5251 
5252  openLogFile = -1;
5253 
5254  /* Now create pg_control */
5255 
5256  memset(ControlFile, 0, sizeof(ControlFileData));
5257  /* Initialize pg_control status fields */
5258  ControlFile->system_identifier = sysidentifier;
5259  memcpy(ControlFile->mock_authentication_nonce, mock_auth_nonce, MOCK_AUTH_NONCE_LEN);
5261  ControlFile->time = checkPoint.time;
5262  ControlFile->checkPoint = checkPoint.redo;
5263  ControlFile->checkPointCopy = checkPoint;
5264  ControlFile->unloggedLSN = 1;
5265 
5266  /* Set important parameter values for use when replaying WAL */
5275 
5276  /* some additional ControlFile fields are set in WriteControlFile() */
5277 
5278  WriteControlFile();
5279 
5280  /* Bootstrap the commit log, too */
5281  BootStrapCLOG();
5285 
5286  pfree(buffer);
5287 
5288  /*
5289  * Force control file to be read - in contrast to normal processing we'd
5290  * otherwise never run the checks and GUC related initializations therein.
5291  */
5292  ReadControlFile();
5293 }
static void WriteControlFile(void)
Definition: xlog.c:4467
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
int max_locks_per_xact
Definition: pg_control.h:181
int gettimeofday(struct timeval *tp, struct timezone *tzp)
Definition: gettimeofday.c:105
int max_prepared_xacts
Definition: pg_control.h:180
int64 pg_time_t
Definition: pgtime.h:23
int wal_segment_size
Definition: xlog.c:113
pg_time_t time
Definition: pg_control.h:128
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:848
uint32 oidCount
Definition: transam.h:112
#define write(a, b, c)
Definition: win32.h:14
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:201
int max_worker_processes
Definition: pg_control.h:179
uint32 pg_crc32c
Definition: pg_crc32c.h:38
TransactionId oldestActiveXid
Definition: pg_control.h:63
int wal_level
Definition: xlog.c:104
int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
Definition: xlog.c:3216
void BootStrapMultiXact(void)
Definition: multixact.c:1866
MultiXactId oldestMulti
Definition: pg_control.h:49
TimeLineID PrevTimeLineID
Definition: pg_control.h:39
int errcode(int sqlerrcode)
Definition: elog.c:575
RmgrId xl_rmid
Definition: xlogrecord.h:47
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:57
CheckPoint checkPointCopy
Definition: pg_control.h:131
TransactionId oldestXid
Definition: pg_control.h:47
TransactionId nextXid
Definition: pg_control.h:43
pg_time_t time
Definition: pg_control.h:51
#define PANIC
Definition: elog.h:53
uint32 bootstrap_data_checksum_version
Definition: bootstrap.c:51
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:74
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:27
bool fullPageWrites
Definition: xlog.c:97
void BootStrapSUBTRANS(void)
Definition: subtrans.c:212
MultiXactOffset nextMultiOffset
Definition: pg_control.h:46
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:271
TransactionId oldestCommitTsXid
Definition: pg_control.h:52
void pfree(void *pointer)
Definition: mcxt.c:1031
#define FirstNormalTransactionId
Definition: transam.h:34
int max_prepared_xacts
Definition: twophase.c:117
uint64 system_identifier
Definition: pg_control.h:106
uint32 xl_tot_len
Definition: xlogrecord.h:43
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
TransactionId nextXid
Definition: transam.h:117
static void ReadControlFile(void)
Definition: xlog.c:4559
uint32 nextXidEpoch
Definition: pg_control.h:42
bool track_commit_timestamp
Definition: commit_ts.c:103
uint32 data_checksum_version
Definition: pg_control.h:221
bool pg_backend_random(char *dst, int len)
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:67
XLogRecPtr unloggedLSN
Definition: pg_control.h:133
int errcode_for_file_access(void)
Definition: elog.c:598
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define InvalidTransactionId
Definition: transam.h:31
#define FirstBootstrapObjectId
Definition: transam.h:93
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1261
#define FirstMultiXactId
Definition: multixact.h:24
#define ereport(elevel, rest)
Definition: elog.h:141
int max_locks_per_xact
Definition: lock.c:54
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
TransactionId newestCommitTsXid
Definition: pg_control.h:54
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:228
int MaxConnections
Definition: globals.c:131
Oid oldestMultiDB
Definition: pg_control.h:50
static int openLogFile
Definition: xlog.c:774
static ControlFileData * ControlFile
Definition: xlog.c:715
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2194
TimeLineID ThisTimeLineID
Definition: xlog.c:181
Oid nextOid
Definition: pg_control.h:44
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:678
bool fullPageWrites
Definition: pg_control.h:41
bool wal_log_hints
Definition: xlog.c:98
void BootStrapCLOG(void)
Definition: clog.c:712
bool track_commit_timestamp
Definition: pg_control.h:182
#define Assert(condition)
Definition: c.h:732
#define XLP_LONG_HEADER
Definition: xlog_internal.h:79
Oid oldestXidDB
Definition: pg_control.h:48
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:217
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1237
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:288
uint8 xl_info
Definition: xlogrecord.h:46
MultiXactId nextMulti
Definition: pg_control.h:45
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:224
TransactionId xl_xid
Definition: xlogrecord.h:44
TimeLineID ThisTimeLineID
Definition: pg_control.h:38
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:797
int max_worker_processes
Definition: globals.c:132
int pg_fsync(int fd)
Definition: fd.c:341
#define close(a)
Definition: win32.h:12
void BootStrapCommitTs(void)
Definition: commit_ts.c:523
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:36
#define offsetof(type, field)
Definition: c.h:655
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:72
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2160

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2250 of file xlog.c.

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

2251 {
2252  double target;
2253 
2254  /*-------
2255  * Calculate the distance at which to trigger a checkpoint, to avoid
2256  * exceeding max_wal_size_mb. This is based on two assumptions:
2257  *
2258  * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2259  * WAL for two checkpoint cycles to allow us to recover from the
2260  * secondary checkpoint if the first checkpoint failed, though we
2261  * only did this on the master anyway, not on standby. Keeping just
2262  * one checkpoint simplifies processing and reduces disk space in
2263  * many smaller databases.)
2264  * b) during checkpoint, we consume checkpoint_completion_target *
2265  * number of segments consumed between checkpoints.
2266  *-------
2267  */
2268  target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2270 
2271  /* round down */
2272  CheckPointSegments = (int) target;
2273 
2274  if (CheckPointSegments < 1)
2275  CheckPointSegments = 1;
2276 }
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:741
int wal_segment_size
Definition: xlog.c:113
int max_wal_size_mb
Definition: xlog.c:89
int CheckPointSegments
Definition: xlog.c:126
double CheckPointCompletionTarget
Definition: checkpointer.c:147

◆ CancelBackup()

void CancelBackup ( void  )

Definition at line 11656 of file xlog.c.

References BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, DEBUG1, durable_rename(), ereport, errcode_for_file_access(), errdetail(), errmsg(), LOG, stat, TABLESPACE_MAP, TABLESPACE_MAP_OLD, and WARNING.

Referenced by PostmasterStateMachine().

11657 {
11658  struct stat stat_buf;
11659 
11660  /* if the backup_label file is not there, return */
11661  if (stat(BACKUP_LABEL_FILE, &stat_buf) < 0)
11662  return;
11663 
11664  /* remove leftover file from previously canceled backup if it exists */
11665  unlink(BACKUP_LABEL_OLD);
11666 
11668  {
11669  ereport(WARNING,
11671  errmsg("online backup mode was not canceled"),
11672  errdetail("File \"%s\" could not be renamed to \"%s\": %m.",
11674  return;
11675  }
11676 
11677  /* if the tablespace_map file is not there, return */
11678  if (stat(TABLESPACE_MAP, &stat_buf) < 0)
11679  {
11680  ereport(LOG,
11681  (errmsg("online backup mode canceled"),
11682  errdetail("File \"%s\" was renamed to \"%s\".",
11684  return;
11685  }
11686 
11687  /* remove leftover file from previously canceled backup if it exists */
11688  unlink(TABLESPACE_MAP_OLD);
11689 
11691  {
11692  ereport(LOG,
11693  (errmsg("online backup mode canceled"),
11694  errdetail("Files \"%s\" and \"%s\" were renamed to "
11695  "\"%s\" and \"%s\", respectively.",
11698  }
11699  else
11700  {
11701  ereport(WARNING,
11703  errmsg("online backup mode canceled"),
11704  errdetail("File \"%s\" was renamed to \"%s\", but "
11705  "file \"%s\" could not be renamed to \"%s\": %m.",
11708  }
11709 }
#define DEBUG1
Definition: elog.h:25
#define LOG
Definition: elog.h:26
#define BACKUP_LABEL_OLD
Definition: xlog.h:323
#define TABLESPACE_MAP
Definition: xlog.h:325
struct stat stat_buf
Definition: pg_standby.c:102
int errdetail(const char *fmt,...)
Definition: elog.c:873
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:141
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:601
#define WARNING
Definition: elog.h:40
#define stat(a, b)
Definition: win32_port.h:266
#define TABLESPACE_MAP_OLD
Definition: xlog.h:326
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define BACKUP_LABEL_FILE
Definition: xlog.h:322

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 4897 of file xlog.c.

References XLOGbuffers, and XLOGChooseNumBuffers().

4898 {
4899  /*
4900  * -1 indicates a request for auto-tune.
4901  */
4902  if (*newval == -1)
4903  {
4904  /*
4905  * If we haven't yet changed the boot_val default of -1, just let it
4906  * be. We'll fix it when XLOGShmemSize is called.
4907  */
4908  if (XLOGbuffers == -1)
4909  return true;
4910 
4911  /* Otherwise, substitute the auto-tune value */
4913  }
4914 
4915  /*
4916  * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4917  * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4918  * the case, we just silently treat such values as a request for the
4919  * minimum. (We could throw an error instead, but that doesn't seem very
4920  * helpful.)
4921  */
4922  if (*newval < 4)
4923  *newval = 4;
4924 
4925  return true;
4926 }
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4881
#define newval
int XLOGbuffers
Definition: xlog.c:92

◆ CheckForStandbyTrigger()

static bool CheckForStandbyTrigger ( void  )
static

Definition at line 12356 of file xlog.c.

References ereport, errcode_for_file_access(), errmsg(), ERROR, FALLBACK_PROMOTE_SIGNAL_FILE, fast_promote, IsPromoteTriggered(), LOG, PROMOTE_SIGNAL_FILE, ResetPromoteTriggered(), stat, and TriggerFile.

Referenced by ReadRecord(), recoveryApplyDelay(), and WaitForWALToBecomeAvailable().

12357 {
12358  struct stat stat_buf;
12359  static bool triggered = false;
12360 
12361  if (triggered)
12362  return true;
12363 
12364  if (IsPromoteTriggered())
12365  {
12366  /*
12367  * In 9.1 and 9.2 the postmaster unlinked the promote file inside the
12368  * signal handler. It now leaves the file in place and lets the
12369  * Startup process do the unlink. This allows Startup to know whether
12370  * it should create a full checkpoint before starting up (fallback
12371  * mode). Fast promotion takes precedence.
12372  */
12373  if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
12374  {
12375  unlink(PROMOTE_SIGNAL_FILE);
12377  fast_promote = true;
12378  }
12379  else if (stat(FALLBACK_PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
12380  {
12382  fast_promote = false;
12383  }
12384 
12385  ereport(LOG, (errmsg("received promote request")));
12386 
12388  triggered = true;
12389  return true;
12390  }
12391 
12392  if (TriggerFile == NULL)
12393  return false;
12394 
12395  if (stat(TriggerFile, &stat_buf) == 0)
12396  {
12397  ereport(LOG,
12398  (errmsg("trigger file found: %s", TriggerFile)));
12399  unlink(TriggerFile);
12400  triggered = true;
12401  fast_promote = true;
12402  return true;
12403  }
12404  else if (errno != ENOENT)
12405  ereport(ERROR,
12407  errmsg("could not stat trigger file \"%s\": %m",
12408  TriggerFile)));
12409 
12410  return false;
12411 }
void ResetPromoteTriggered(void)
Definition: startup.c:247
#define FALLBACK_PROMOTE_SIGNAL_FILE
Definition: xlog.c:85
#define LOG
Definition: elog.h:26
static char * TriggerFile
Definition: xlog.c:276
#define ERROR
Definition: elog.h:43
struct stat stat_buf
Definition: pg_standby.c:102
#define PROMOTE_SIGNAL_FILE
Definition: xlog.c:84
int errcode_for_file_access(void)
Definition: elog.c:598
bool IsPromoteTriggered(void)
Definition: startup.c:241
#define ereport(elevel, rest)
Definition: elog.h:141
#define stat(a, b)
Definition: win32_port.h:266
int errmsg(const char *fmt,...)
Definition: elog.c:797
static bool fast_promote
Definition: xlog.c:282

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 9205 of file xlog.c.

References CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckPointSUBTRANS(), and CheckPointTwoPhase().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

9206 {
9207  CheckPointCLOG();
9216  CheckPointBuffers(flags); /* performs all required fsyncs */
9218  /* We deliberately delay 2PC checkpointing as long as possible */
9219  CheckPointTwoPhase(checkPointRedo);
9220 }
void CheckPointBuffers(int flags)
Definition: bufmgr.c:2574
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1208
void CheckPointReplicationOrigin(void)
Definition: origin.c:545
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1922
void CheckPointCLOG(void)
Definition: clog.c:842
void CheckPointMultiXact(void)
Definition: multixact.c:2140
void CheckPointCommitTs(void)
Definition: commit_ts.c:760
void CheckPointSUBTRANS(void)
Definition: subtrans.c:300
void CheckPointRelationMap(void)
Definition: relmapper.c:546
void CheckPointPredicate(void)
Definition: predicate.c:997
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1735
void CheckPointReplicationSlots(void)
Definition: slot.c:1069

◆ CheckPromoteSignal()

bool CheckPromoteSignal ( void  )

Definition at line 12428 of file xlog.c.

References FALLBACK_PROMOTE_SIGNAL_FILE, PROMOTE_SIGNAL_FILE, and stat.

Referenced by sigusr1_handler().

12429 {
12430  struct stat stat_buf;
12431 
12432  if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0 ||
12434  return true;
12435 
12436  return false;
12437 }
#define FALLBACK_PROMOTE_SIGNAL_FILE
Definition: xlog.c:85
struct stat stat_buf
Definition: pg_standby.c:102
#define PROMOTE_SIGNAL_FILE
Definition: xlog.c:84
#define stat(a, b)
Definition: win32_port.h:266

◆ CheckRecoveryConsistency()

static void CheckRecoveryConsistency ( void  )
static

Definition at line 7981 of file xlog.c.

References Assert, ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, DEBUG1, elog, ereport, errmsg(), InArchiveRecovery, XLogCtlData::info_lck, InvalidXLogRecPtr, IsUnderPostmaster, XLogCtlData::lastReplayedEndRecPtr, LocalHotStandbyActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, minRecoveryPoint, PMSIGNAL_BEGIN_HOT_STANDBY, reachedConsistency, SendPostmasterSignal(), XLogCtlData::SharedHotStandbyActive, SpinLockAcquire, SpinLockRelease, STANDBY_SNAPSHOT_READY, standbyState, UpdateControlFile(), XLogCheckInvalidPages(), and XLogRecPtrIsInvalid.

Referenced by ReadRecord(), and StartupXLOG().

7982 {
7983  XLogRecPtr lastReplayedEndRecPtr;
7984 
7985  /*
7986  * During crash recovery, we don't reach a consistent state until we've
7987  * replayed all the WAL.
7988  */
7990  return;
7991 
7993 
7994  /*
7995  * assume that we are called in the startup process, and hence don't need
7996  * a lock to read lastReplayedEndRecPtr
7997  */
7998  lastReplayedEndRecPtr = XLogCtl->lastReplayedEndRecPtr;
7999 
8000  /*
8001  * Have we reached the point where our base backup was completed?
8002  */
8004  ControlFile->backupEndPoint <= lastReplayedEndRecPtr)
8005  {
8006  /*
8007  * We have reached the end of base backup, as indicated by pg_control.
8008  * The data on disk is now consistent. Reset backupStartPoint and
8009  * backupEndPoint, and update minRecoveryPoint to make sure we don't
8010  * allow starting up at an earlier point even if recovery is stopped
8011  * and restarted soon after this.
8012  */
8013  elog(DEBUG1, "end of backup reached");
8014 
8015  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8016 
8017  if (ControlFile->minRecoveryPoint < lastReplayedEndRecPtr)
8018  ControlFile->minRecoveryPoint = lastReplayedEndRecPtr;
8019 
8022  ControlFile->backupEndRequired = false;
8024 
8025  LWLockRelease(ControlFileLock);
8026  }
8027 
8028  /*
8029  * Have we passed our safe starting point? Note that minRecoveryPoint is
8030  * known to be incorrectly set if ControlFile->backupEndRequired, until
8031  * the XLOG_BACKUP_END arrives to advise us of the correct
8032  * minRecoveryPoint. All we know prior to that is that we're not
8033  * consistent yet.
8034  */
8036  minRecoveryPoint <= lastReplayedEndRecPtr &&
8038  {
8039  /*
8040  * Check to see if the XLOG sequence contained any unresolved
8041  * references to uninitialized pages.
8042  */
8044 
8045  reachedConsistency = true;
8046  ereport(LOG,
8047  (errmsg("consistent recovery state reached at %X/%X",
8048  (uint32) (lastReplayedEndRecPtr >> 32),
8049  (uint32) lastReplayedEndRecPtr)));
8050  }
8051 
8052  /*
8053  * Have we got a valid starting snapshot that will allow queries to be
8054  * run? If so, we can tell postmaster that the database is consistent now,
8055  * enabling connections.
8056  */
8061  {
8065 
8066  LocalHotStandbyActive = true;
8067 
8069  }
8070 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
void XLogCheckInvalidPages(void)
Definition: xlogutils.c:220
bool SharedHotStandbyActive
Definition: xlog.c:649
slock_t info_lck
Definition: xlog.c:704
#define LOG
Definition: elog.h:26
bool InArchiveRecovery
Definition: xlog.c:249
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
bool backupEndRequired
Definition: pg_control.h:170
#define SpinLockAcquire(lock)
Definition: spin.h:62
static bool LocalHotStandbyActive
Definition: xlog.c:223
void UpdateControlFile(void)
Definition: xlog.c:4771
bool IsUnderPostmaster
Definition: globals.c:108
unsigned int uint32
Definition: c.h:358
#define ereport(elevel, rest)
Definition: elog.h:141
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define SpinLockRelease(lock)
Definition: spin.h:64
static ControlFileData * ControlFile
Definition: xlog.c:715
XLogRecPtr backupEndPoint
Definition: pg_control.h:169
bool reachedConsistency
Definition: xlog.c:840
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog(elevel,...)
Definition: elog.h:226
void SendPostmasterSignal(PMSignalReason reason)
Definition: pmsignal.c:146
HotStandbyState standbyState
Definition: xlog.c:197
XLogRecPtr backupStartPoint
Definition: pg_control.h:168
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:166
static XLogRecPtr minRecoveryPoint
Definition: xlog.c:831
XLogRecPtr lastReplayedEndRecPtr
Definition: xlog.c:683

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 6323 of file xlog.c.

References ArchiveRecoveryRequested, EnableHotStandby, ereport, errhint(), errmsg(), ERROR, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter, ControlFileData::wal_level, WAL_LEVEL_MINIMAL, WAL_LEVEL_REPLICA, and WARNING.

Referenced by StartupXLOG(), and xlog_redo().

6324 {
6325  /*
6326  * For archive recovery, the WAL must be generated with at least 'replica'
6327  * wal_level.
6328  */
6330  {
6331  ereport(WARNING,
6332  (errmsg("WAL was generated with wal_level=minimal, data may be missing"),
6333  errhint("This happens if you temporarily set wal_level=minimal without taking a new base backup.")));
6334  }
6335 
6336  /*
6337  * For Hot Standby, the WAL must be generated with 'replica' mode, and we
6338  * must have at least as many backend slots as the primary.
6339  */
6341  {
6343  ereport(ERROR,
6344  (errmsg("hot standby is not possible because wal_level was not set to \"replica\" or higher on the master server"),
6345  errhint("Either set wal_level to \"replica\" on the master, or turn off hot_standby here.")));
6346 
6347  /* We ignore autovacuum_max_workers when we make this test. */
6348  RecoveryRequiresIntParameter("max_connections",
6351  RecoveryRequiresIntParameter("max_worker_processes",
6354  RecoveryRequiresIntParameter("max_prepared_transactions",
6357  RecoveryRequiresIntParameter("max_locks_per_transaction",
6360  }
6361 }
bool ArchiveRecoveryRequested
Definition: xlog.c:248
int max_locks_per_xact
Definition: pg_control.h:181
int errhint(const char *fmt,...)
Definition: elog.c:987
int max_prepared_xacts
Definition: pg_control.h:180
int max_worker_processes
Definition: pg_control.h:179
#define ERROR
Definition: elog.h:43
int max_prepared_xacts
Definition: twophase.c:117
#define ereport(elevel, rest)
Definition: elog.h:141
int max_locks_per_xact
Definition: lock.c:54
#define WARNING
Definition: elog.h:40
int MaxConnections
Definition: globals.c:131
static ControlFileData * ControlFile
Definition: xlog.c:715
bool EnableHotStandby
Definition: xlog.c:96
int errmsg(const char *fmt,...)
Definition: elog.c:797
int max_worker_processes
Definition: globals.c:132
#define RecoveryRequiresIntParameter(param_name, currValue, minValue)
Definition: xlog.c:6301

◆ checkTimeLineSwitch()

static void checkTimeLineSwitch ( XLogRecPtr  lsn,
TimeLineID  newTLI,
TimeLineID  prevTLI 
)
static

Definition at line 9759 of file xlog.c.

References ereport, errmsg(), minRecoveryPoint, minRecoveryPointTLI, PANIC, ThisTimeLineID, tliInHistory(), and XLogRecPtrIsInvalid.

Referenced by StartupXLOG().

9760 {
9761  /* Check that the record agrees on what the current (old) timeline is */
9762  if (prevTLI != ThisTimeLineID)
9763  ereport(PANIC,
9764  (errmsg("unexpected previous timeline ID %u (current timeline ID %u) in checkpoint record",
9765  prevTLI, ThisTimeLineID)));
9766 
9767  /*
9768  * The new timeline better be in the list of timelines we expect to see,
9769  * according to the timeline history. It should also not decrease.
9770  */
9771  if (newTLI < ThisTimeLineID || !tliInHistory(newTLI, expectedTLEs))
9772  ereport(PANIC,
9773  (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
9774  newTLI, ThisTimeLineID)));
9775 
9776  /*
9777  * If we have not yet reached min recovery point, and we're about to
9778  * switch to a timeline greater than the timeline of the min recovery
9779  * point: trouble. After switching to the new timeline, we could not
9780  * possibly visit the min recovery point on the correct timeline anymore.
9781  * This can happen if there is a newer timeline in the archive that
9782  * branched before the timeline the min recovery point is on, and you
9783  * attempt to do PITR to the new timeline.
9784  */
9786  lsn < minRecoveryPoint &&
9787  newTLI > minRecoveryPointTLI)
9788  ereport(PANIC,
9789  (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
9790  newTLI,
9791  (uint32) (minRecoveryPoint >> 32),
9794 
9795  /* Looks good */
9796 }
static List * expectedTLEs
Definition: xlog.c:320
#define PANIC
Definition: elog.h:53
unsigned int uint32
Definition: c.h:358
#define ereport(elevel, rest)
Definition: elog.h:141
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
static TimeLineID minRecoveryPointTLI
Definition: xlog.c:832
TimeLineID ThisTimeLineID
Definition: xlog.c:181
int errmsg(const char *fmt,...)
Definition: elog.c:797
bool tliInHistory(TimeLineID tli, List *expectedTLEs)
Definition: timeline.c:515
static XLogRecPtr minRecoveryPoint
Definition: xlog.c:831

◆ checkXLogConsistency()

static void checkXLogConsistency ( XLogReaderState record)
static

Definition at line 1363 of file xlog.c.

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BufferGetPage, BufferIsValid, RelFileNode::dbNode, elog, XLogReaderState::EndRecPtr, ERROR, FATAL, LockBuffer(), master_image_masked, XLogReaderState::max_block_id, PageGetLSN, RBM_NORMAL_NO_LOG, RelFileNode::relNode, replay_image_masked, RestoreBlockImage(), RmgrData::rm_mask, RmgrTable, RelFileNode::spcNode, UnlockReleaseBuffer(), XLogReadBufferExtended(), XLogRecBlockImageApply, XLogRecGetBlockTag(), XLogRecGetInfo, XLogRecGetRmid, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, and XLR_CHECK_CONSISTENCY.

Referenced by StartupXLOG().

1364 {
1365  RmgrId rmid = XLogRecGetRmid(record);
1366  RelFileNode rnode;
1367  ForkNumber forknum;
1368  BlockNumber blkno;
1369  int block_id;
1370 
1371  /* Records with no backup blocks have no need for consistency checks. */
1372  if (!XLogRecHasAnyBlockRefs(record))
1373  return;
1374 
1375  Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
1376 
1377  for (block_id = 0; block_id <= record->max_block_id; block_id++)
1378  {
1379  Buffer buf;
1380  Page page;
1381 
1382  if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
1383  {
1384  /*
1385  * WAL record doesn't contain a block reference with the given id.
1386  * Do nothing.
1387  */
1388  continue;
1389  }
1390 
1391  Assert(XLogRecHasBlockImage(record, block_id));
1392 
1393  if (XLogRecBlockImageApply(record, block_id))
1394  {
1395  /*
1396  * WAL record has already applied the page, so bypass the
1397  * consistency check as that would result in comparing the full
1398  * page stored in the record with itself.
1399  */
1400  continue;
1401  }
1402 
1403  /*
1404  * Read the contents from the current buffer and store it in a
1405  * temporary page.
1406  */
1407  buf = XLogReadBufferExtended(rnode, forknum, blkno,
1409  if (!BufferIsValid(buf))
1410  continue;
1411 
1413  page = BufferGetPage(buf);
1414 
1415  /*
1416  * Take a copy of the local page where WAL has been applied to have a
1417  * comparison base before masking it...
1418  */
1419  memcpy(replay_image_masked, page, BLCKSZ);
1420 
1421  /* No need for this page anymore now that a copy is in. */
1422  UnlockReleaseBuffer(buf);
1423 
1424  /*
1425  * If the block LSN is already ahead of this WAL record, we can't
1426  * expect contents to match. This can happen if recovery is
1427  * restarted.
1428  */
1429  if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
1430  continue;
1431 
1432  /*
1433  * Read the contents from the backup copy, stored in WAL record and
1434  * store it in a temporary page. There is no need to allocate a new
1435  * page here, a local buffer is fine to hold its contents and a mask
1436  * can be directly applied on it.
1437  */
1438  if (!RestoreBlockImage(record, block_id, master_image_masked))
1439  elog(ERROR, "failed to restore block image");
1440 
1441  /*
1442  * If masking function is defined, mask both the master and replay
1443  * images
1444  */
1445  if (RmgrTable[rmid].rm_mask != NULL)
1446  {
1447  RmgrTable[rmid].rm_mask(replay_image_masked, blkno);
1448  RmgrTable[rmid].rm_mask(master_image_masked, blkno);
1449  }
1450 
1451  /* Time to compare the master and replay images. */
1452  if (memcmp(replay_image_masked, master_image_masked, BLCKSZ) != 0)
1453  {
1454  elog(FATAL,
1455  "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
1456  rnode.spcNode, rnode.dbNode, rnode.relNode,
1457  forknum, blkno);
1458  }
1459  }
1460 }
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:235
Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode)
Definition: xlogutils.c:437
const RmgrData RmgrTable[RM_MAX_ID+1]
Definition: rmgr.c:36
uint32 BlockNumber
Definition: block.h:31
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define XLR_CHECK_CONSISTENCY
Definition: xlogrecord.h:80
XLogRecPtr EndRecPtr
Definition: xlogreader.h:120
static char * replay_image_masked
Definition: xlog.c:255
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
#define FATAL
Definition: elog.h:52
static char * buf
Definition: pg_test_fsync.c:67
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:226
ForkNumber
Definition: relpath.h:40
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1358
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
uint8 RmgrId
Definition: rmgr.h:11
#define Assert(condition)
Definition: c.h:732
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:1411
#define PageGetLSN(page)
Definition: bufpage.h:362
#define elog(elevel,...)
Definition: elog.h:226
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:232
#define XLogRecBlockImageApply(decoder, block_id)
Definition: xlogreader.h:237
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74
#define XLogRecGetRmid(decoder)
Definition: xlogreader.h:227
static char * master_image_masked
Definition: xlog.c:256

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3831 of file xlog.c.

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, and XLogFileName.

Referenced by perform_base_backup(), and XLogRead().

3832 {
3833  int save_errno = errno;
3834  XLogSegNo lastRemovedSegNo;
3835 
3837  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3839 
3840  if (segno <= lastRemovedSegNo)
3841  {
3842  char filename[MAXFNAMELEN];
3843 
3844  XLogFileName(filename, tli, segno, wal_segment_size);
3845  errno = save_errno;
3846  ereport(ERROR,
3848  errmsg("requested WAL segment %s has already been removed",
3849  filename)));
3850  }
3851  errno = save_errno;
3852 }
int wal_segment_size
Definition: xlog.c:113
slock_t info_lck
Definition: xlog.c:704
XLogSegNo lastRemovedSegNo
Definition: xlog.c:587
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define ERROR
Definition: elog.h:43
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:141
#define MAXFNAMELEN
#define SpinLockRelease(lock)
Definition: spin.h:64
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static XLogCtlData * XLogCtl
Definition: xlog.c:707
static char * filename
Definition: pg_dumpall.c:88
int errmsg(const char *fmt,...)
Definition: elog.c:797

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4188 of file xlog.c.

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName, MAXPGPATH, ReadDir(), snprintf(), XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_stop_backup().

4189 {
4190  DIR *xldir;
4191  struct dirent *xlde;
4192  char path[MAXPGPATH + sizeof(XLOGDIR)];
4193 
4194  xldir = AllocateDir(XLOGDIR);
4195 
4196  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4197  {
4198  if (IsBackupHistoryFileName(xlde->d_name))
4199  {
4200  if (XLogArchiveCheckDone(xlde->d_name))
4201  {
4202  elog(DEBUG2, "removing WAL backup history file \"%s\"",
4203  xlde->d_name);
4204  snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4205  unlink(path);
4206  XLogArchiveCleanup(xlde->d_name);
4207  }
4208  }
4209  }
4210 
4211  FreeDir(xldir);
4212 }
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
Definition: dirent.h:9
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:752
Definition: dirent.c:25
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:619
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2590
#define IsBackupHistoryFileName(fname)
#define XLOGDIR
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2656
#define elog(elevel,...)
Definition: elog.h:226
char d_name[MAX_PATH]
Definition: dirent.h:14
int FreeDir(DIR *dir)
Definition: fd.c:2708

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos 
)
static

Definition at line 1467 of file xlog.c.

References Assert, XLogRecData::data, elog, GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

1469 {
1470  char *currpos;
1471  int freespace;
1472  int written;
1473  XLogRecPtr CurrPos;
1474  XLogPageHeader pagehdr;
1475 
1476  /*
1477  * Get a pointer to the right place in the right WAL buffer to start
1478  * inserting to.
1479  */
1480  CurrPos = StartPos;
1481  currpos = GetXLogBuffer(CurrPos);
1482  freespace = INSERT_FREESPACE(CurrPos);
1483 
1484  /*
1485  * there should be enough space for at least the first field (xl_tot_len)
1486  * on this page.
1487  */
1488  Assert(freespace >= sizeof(uint32));
1489 
1490  /* Copy record data */
1491  written = 0;
1492  while (rdata != NULL)
1493  {
1494  char *rdata_data = rdata->data;
1495  int rdata_len = rdata->len;
1496 
1497  while (rdata_len > freespace)
1498  {
1499  /*
1500  * Write what fits on this page, and continue on the next page.
1501  */
1502  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1503  memcpy(currpos, rdata_data, freespace);
1504  rdata_data += freespace;
1505  rdata_len -= freespace;
1506  written += freespace;
1507  CurrPos += freespace;
1508 
1509  /*
1510  * Get pointer to beginning of next page, and set the xlp_rem_len
1511  * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1512  *
1513  * It's safe to set the contrecord flag and xlp_rem_len without a
1514  * lock on the page. All the other flags were already set when the
1515  * page was initialized, in AdvanceXLInsertBuffer, and we're the
1516  * only backend that needs to set the contrecord flag.
1517  */
1518  currpos = GetXLogBuffer(CurrPos);
1519  pagehdr = (XLogPageHeader) currpos;
1520  pagehdr->xlp_rem_len = write_len - written;
1521  pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1522 
1523  /* skip over the page header */
1524  if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1525  {
1526  CurrPos += SizeOfXLogLongPHD;
1527  currpos += SizeOfXLogLongPHD;
1528  }
1529  else
1530  {
1531  CurrPos += SizeOfXLogShortPHD;
1532  currpos += SizeOfXLogShortPHD;
1533  }
1534  freespace = INSERT_FREESPACE(CurrPos);
1535  }
1536 
1537  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1538  memcpy(currpos, rdata_data, rdata_len);
1539  currpos += rdata_len;
1540  CurrPos += rdata_len;
1541  freespace -= rdata_len;
1542  written += rdata_len;
1543 
1544  rdata = rdata->next;
1545  }
1546  Assert(written == write_len);
1547 
1548  /*
1549  * If this was an xlog-switch, it's not enough to write the switch record,
1550  * we also have to consume all the remaining space in the WAL segment. We
1551  * have already reserved that space, but we need to actually fill it.
1552  */
1553  if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1554  {
1555  /* An xlog-switch record doesn't contain any data besides the header */
1556  Assert(write_len == SizeOfXLogRecord);
1557 
1558  /* Assert that we did reserve the right amount of space */
1559  Assert(XLogSegmentOffset(EndPos, wal_segment_size) == 0);
1560 
1561  /* Use up all the remaining space on the current page */
1562  CurrPos += freespace;
1563 
1564  /*
1565  * Cause all remaining pages in the segment to be flushed, leaving the
1566  * XLog position where it should be, at the start of the next segment.
1567  * We do this one page at a time, to make sure we don't deadlock
1568  * against ourselves if wal_buffers < wal_segment_size.
1569  */
1570  while (CurrPos < EndPos)
1571  {
1572  /*
1573  * The minimal action to flush the page would be to call
1574  * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1575  * AdvanceXLInsertBuffer(...). The page would be left initialized
1576  * mostly to zeros, except for the page header (always the short
1577  * variant, as this is never a segment's first page).
1578  *
1579  * The large vistas of zeros are good for compressibility, but the
1580  * headers interrupting them every XLOG_BLCKSZ (with values that
1581  * differ from page to page) are not. The effect varies with
1582  * compression tool, but bzip2 for instance compresses about an
1583  * order of magnitude worse if those headers are left in place.
1584  *
1585  * Rather than complicating AdvanceXLInsertBuffer itself (which is
1586  * called in heavily-loaded circumstances as well as this lightly-
1587  * loaded one) with variant behavior, we just use GetXLogBuffer
1588  * (which itself calls the two methods we need) to get the pointer
1589  * and zero most of the page. Then we just zero the page header.
1590  */
1591  currpos = GetXLogBuffer(CurrPos);
1592  MemSet(currpos, 0, SizeOfXLogShortPHD);
1593 
1594  CurrPos += XLOG_BLCKSZ;
1595  }
1596  }
1597  else
1598  {
1599  /* Align the end position, so that the next record starts aligned */
1600  CurrPos = MAXALIGN64(CurrPos);
1601  }
1602 
1603  if (CurrPos != EndPos)
1604  elog(PANIC, "space reserved for WAL record does not match what was written");
1605 }
int wal_segment_size
Definition: xlog.c:113
#define MemSet(start, val, len)
Definition: c.h:941
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:57
#define PANIC
Definition: elog.h:53
static char * GetXLogBuffer(XLogRecPtr ptr)
Definition: xlog.c:1843
#define MAXALIGN64(LEN)
Definition: c.h:710
unsigned int uint32
Definition: c.h:358
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:721
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:55
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:77
struct XLogRecData * next
#define elog(elevel,...)
Definition: elog.h:226
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:72

◆ CreateCheckPoint()

void CreateCheckPoint ( int  flags)

Definition at line 8729 of file xlog.c.

References ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptXid, XLogCtlData::ckptXidEpoch, XLogCtlInsert::CurrBytePos, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, elog, END_CRIT_SECTION, ereport, errmsg(), ERROR, CheckPoint::fullPageWrites, XLogCtlInsert::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestXmin(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, InitXLogInsert(), Insert(), XLogCtlData::Insert, INSERT_FREESPACE, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, CheckPoint::newestCommitTsXid, VariableCacheData::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, VariableCacheData::nextXid, CheckPoint::nextXidEpoch, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, VariableCacheData::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, VariableCacheData::oldestXid, CheckPoint::oldestXidDB, VariableCacheData::oldestXidDB, PANIC, pfree(), pg_usleep(), PreallocXlogFiles(), CheckPoint::PrevTimeLineID, XLogCtlData::PrevTimeLineID, PROCARRAY_FLAGS_DEFAULT, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), ShmemVariableCache, SizeOfXLogLongPHD, SizeOfXLogShortPHD, smgrpostckpt(), smgrpreckpt(), SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, CheckPoint::ThisTimeLineID, ThisTimeLineID, CheckPoint::time, ControlFileData::time, TruncateSUBTRANS(), XLogCtlData::ulsn_lck, ControlFileData::unloggedLSN, XLogCtlData::unloggedLSN, UpdateCheckPointDistanceEstimate(), UpdateControlFile(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), ShutdownXLOG(), and StartupXLOG().

8730 {
8731  bool shutdown;
8732  CheckPoint checkPoint;
8733  XLogRecPtr recptr;
8734  XLogSegNo _logSegNo;
8736  uint32 freespace;
8737  XLogRecPtr PriorRedoPtr;
8738  XLogRecPtr curInsert;
8739  XLogRecPtr last_important_lsn;
8740  VirtualTransactionId *vxids;
8741  int nvxids;
8742 
8743  /*
8744  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
8745  * issued at a different time.
8746  */
8748  shutdown = true;
8749  else
8750  shutdown = false;
8751 
8752  /* sanity check */
8753  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
8754  elog(ERROR, "can't create a checkpoint during recovery");
8755 
8756  /*
8757  * Initialize InitXLogInsert working areas before entering the critical
8758  * section. Normally, this is done by the first call to
8759  * RecoveryInProgress() or LocalSetXLogInsertAllowed(), but when creating
8760  * an end-of-recovery checkpoint, the LocalSetXLogInsertAllowed call is
8761  * done below in a critical section, and InitXLogInsert cannot be called
8762  * in a critical section.
8763  */
8764  InitXLogInsert();
8765 
8766  /*
8767  * Acquire CheckpointLock to ensure only one checkpoint happens at a time.
8768  * (This is just pro forma, since in the present system structure there is
8769  * only one process that is allowed to issue checkpoints at any given
8770  * time.)
8771  */
8772  LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
8773 
8774  /*
8775  * Prepare to accumulate statistics.
8776  *
8777  * Note: because it is possible for log_checkpoints to change while a
8778  * checkpoint proceeds, we always accumulate stats, even if
8779  * log_checkpoints is currently off.
8780  */
8781  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
8783 
8784  /*
8785  * Use a critical section to force system panic if we have trouble.
8786  */
8788 
8789  if (shutdown)
8790  {
8791  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8793  ControlFile->time = (pg_time_t) time(NULL);
8795  LWLockRelease(ControlFileLock);
8796  }
8797 
8798  /*
8799  * Let smgr prepare for checkpoint; this has to happen before we determine
8800  * the REDO pointer. Note that smgr must not do anything that'd have to
8801  * be undone if we decide no checkpoint is needed.
8802  */
8803  smgrpreckpt();
8804 
8805  /* Begin filling in the checkpoint WAL record */
8806  MemSet(&checkPoint, 0, sizeof(checkPoint));
8807  checkPoint.time = (pg_time_t) time(NULL);
8808 
8809  /*
8810  * For Hot Standby, derive the oldestActiveXid before we fix the redo
8811  * pointer. This allows us to begin accumulating changes to assemble our
8812  * starting snapshot of locks and transactions.
8813  */
8814  if (!shutdown && XLogStandbyInfoActive())
8816  else
8818 
8819  /*
8820  * Get location of last important record before acquiring insert locks (as
8821  * GetLastImportantRecPtr() also locks WAL locks).
8822  */
8823  last_important_lsn = GetLastImportantRecPtr();
8824 
8825  /*
8826  * We must block concurrent insertions while examining insert state to
8827  * determine the checkpoint REDO pointer.
8828  */
8830  curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
8831 
8832  /*
8833  * If this isn't a shutdown or forced checkpoint, and if there has been no
8834  * WAL activity requiring a checkpoint, skip it. The idea here is to
8835  * avoid inserting duplicate checkpoints when the system is idle.
8836  */
8837  if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
8838  CHECKPOINT_FORCE)) == 0)
8839  {
8840  if (last_important_lsn == ControlFile->checkPoint)
8841  {
8843  LWLockRelease(CheckpointLock);
8844  END_CRIT_SECTION();
8845  ereport(DEBUG1,
8846  (errmsg("checkpoint skipped because system is idle")));
8847  return;
8848  }
8849  }
8850 
8851  /*
8852  * An end-of-recovery checkpoint is created before anyone is allowed to
8853  * write WAL. To allow us to write the checkpoint record, temporarily
8854  * enable XLogInsertAllowed. (This also ensures ThisTimeLineID is
8855  * initialized, which we need here and in AdvanceXLInsertBuffer.)
8856  */
8857  if (flags & CHECKPOINT_END_OF_RECOVERY)
8859 
8860  checkPoint.ThisTimeLineID = ThisTimeLineID;
8861  if (flags & CHECKPOINT_END_OF_RECOVERY)
8862  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
8863  else
8864  checkPoint.PrevTimeLineID = ThisTimeLineID;
8865 
8866  checkPoint.fullPageWrites = Insert->fullPageWrites;
8867 
8868  /*
8869  * Compute new REDO record ptr = location of next XLOG record.
8870  *
8871  * NB: this is NOT necessarily where the checkpoint record itself will be,
8872  * since other backends may insert more XLOG records while we're off doing
8873  * the buffer flush work. Those XLOG records are logically after the
8874  * checkpoint, even though physically before it. Got that?
8875  */
8876  freespace = INSERT_FREESPACE(curInsert);
8877  if (freespace == 0)
8878  {
8879  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
8880  curInsert += SizeOfXLogLongPHD;
8881  else
8882  curInsert += SizeOfXLogShortPHD;
8883  }
8884  checkPoint.redo = curInsert;
8885 
8886  /*
8887  * Here we update the shared RedoRecPtr for future XLogInsert calls; this
8888  * must be done while holding all the insertion locks.
8889  *
8890  * Note: if we fail to complete the checkpoint, RedoRecPtr will be left
8891  * pointing past where it really needs to point. This is okay; the only
8892  * consequence is that XLogInsert might back up whole buffers that it
8893  * didn't really need to. We can't postpone advancing RedoRecPtr because
8894  * XLogInserts that happen while we are dumping buffers must assume that
8895  * their buffer changes are not included in the checkpoint.
8896  */
8897  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
8898 
8899  /*
8900  * Now we can release the WAL insertion locks, allowing other xacts to
8901  * proceed while we are flushing disk buffers.
8902  */
8904 
8905  /* Update the info_lck-protected copy of RedoRecPtr as well */
8907  XLogCtl->RedoRecPtr = checkPoint.redo;
8909 
8910  /*
8911  * If enabled, log checkpoint start. We postpone this until now so as not
8912  * to log anything if we decided to skip the checkpoint.
8913  */
8914  if (log_checkpoints)
8915  LogCheckpointStart(flags, false);
8916 
8917  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
8918 
8919  /*
8920  * Get the other info we need for the checkpoint record.
8921  *
8922  * We don't need to save oldestClogXid in the checkpoint, it only matters
8923  * for the short period in which clog is being truncated, and if we crash
8924  * during that we'll redo the clog truncation and fix up oldestClogXid
8925  * there.
8926  */
8927  LWLockAcquire(XidGenLock, LW_SHARED);
8928  checkPoint.nextXid = ShmemVariableCache->nextXid;
8929  checkPoint.oldestXid = ShmemVariableCache->oldestXid;
8931  LWLockRelease(XidGenLock);
8932 
8933  LWLockAcquire(CommitTsLock, LW_SHARED);
8936  LWLockRelease(CommitTsLock);
8937 
8938  /* Increase XID epoch if we've wrapped around since last checkpoint */
8940  if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid)
8941  checkPoint.nextXidEpoch++;
8942 
8943  LWLockAcquire(OidGenLock, LW_SHARED);
8944  checkPoint.nextOid = ShmemVariableCache->nextOid;
8945  if (!shutdown)
8946  checkPoint.nextOid += ShmemVariableCache->oidCount;
8947  LWLockRelease(OidGenLock);
8948 
8949  MultiXactGetCheckptMulti(shutdown,
8950  &checkPoint.nextMulti,
8951  &checkPoint.nextMultiOffset,
8952  &checkPoint.oldestMulti,
8953  &checkPoint.oldestMultiDB);
8954 
8955  /*
8956  * Having constructed the checkpoint record, ensure all shmem disk buffers
8957  * and commit-log buffers are flushed to disk.
8958  *
8959  * This I/O could fail for various reasons. If so, we will fail to
8960  * complete the checkpoint, but there is no reason to force a system
8961  * panic. Accordingly, exit critical section while doing it.
8962  */
8963  END_CRIT_SECTION();
8964 
8965  /*
8966  * In some cases there are groups of actions that must all occur on one
8967  * side or the other of a checkpoint record. Before flushing the
8968  * checkpoint record we must explicitly wait for any backend currently
8969  * performing those groups of actions.
8970  *
8971  * One example is end of transaction, so we must wait for any transactions
8972  * that are currently in commit critical sections. If an xact inserted
8973  * its commit record into XLOG just before the REDO point, then a crash
8974  * restart from the REDO point would not replay that record, which means
8975  * that our flushing had better include the xact's update of pg_xact. So
8976  * we wait till he's out of his commit critical section before proceeding.
8977  * See notes in RecordTransactionCommit().
8978  *
8979  * Because we've already released the insertion locks, this test is a bit
8980  * fuzzy: it is possible that we will wait for xacts we didn't really need
8981  * to wait for. But the delay should be short and it seems better to make
8982  * checkpoint take a bit longer than to hold off insertions longer than
8983  * necessary. (In fact, the whole reason we have this issue is that xact.c
8984  * does commit record XLOG insertion and clog update as two separate steps
8985  * protected by different locks, but again that seems best on grounds of
8986  * minimizing lock contention.)
8987  *
8988  * A transaction that has not yet set delayChkpt when we look cannot be at
8989  * risk, since he's not inserted his commit record yet; and one that's
8990  * already cleared it is not at risk either, since he's done fixing clog
8991  * and we will correctly flush the update below. So we cannot miss any
8992  * xacts we need to wait for.
8993  */
8994  vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
8995  if (nvxids > 0)
8996  {
8997  do
8998  {
8999  pg_usleep(10000L); /* wait for 10 msec */
9000  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
9001  }
9002  pfree(vxids);
9003 
9004  CheckPointGuts(checkPoint.redo, flags);
9005 
9006  /*
9007  * Take a snapshot of running transactions and write this to WAL. This
9008  * allows us to reconstruct the state of running transactions during
9009  * archive recovery, if required. Skip, if this info disabled.
9010  *
9011  * If we are shutting down, or Startup process is completing crash
9012  * recovery we don't need to write running xact data.
9013  */
9014  if (!shutdown && XLogStandbyInfoActive())
9016 
9018 
9019  /*
9020  * Now insert the checkpoint record into XLOG.
9021  */
9022  XLogBeginInsert();
9023  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
9024  recptr = XLogInsert(RM_XLOG_ID,
9025  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
9027 
9028  XLogFlush(recptr);
9029 
9030  /*
9031  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
9032  * overwritten at next startup. No-one should even try, this just allows
9033  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
9034  * to just temporarily disable writing until the system has exited
9035  * recovery.
9036  */
9037  if (shutdown)
9038  {
9039  if (flags & CHECKPOINT_END_OF_RECOVERY)
9040  LocalXLogInsertAllowed = -1; /* return to "check" state */
9041  else
9042  LocalXLogInsertAllowed = 0; /* never again write WAL */
9043  }
9044 
9045  /*
9046  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
9047  * = end of actual checkpoint record.
9048  */
9049  if (shutdown && checkPoint.redo != ProcLastRecPtr)
9050  ereport(PANIC,
9051  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
9052 
9053  /*
9054  * Remember the prior checkpoint's redo ptr for
9055  * UpdateCheckPointDistanceEstimate()
9056  */
9057  PriorRedoPtr = ControlFile->checkPointCopy.redo;
9058 
9059  /*
9060  * Update the control file.
9061  */
9062  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9063  if (shutdown)
9066  ControlFile->checkPointCopy = checkPoint;
9067  ControlFile->time = (pg_time_t) time(NULL);
9068  /* crash recovery should always recover to the end of WAL */
9071 
9072  /*
9073  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
9074  * unused on non-shutdown checkpoints, but seems useful to store it always
9075  * for debugging purposes.
9076  */
9080 
9082  LWLockRelease(ControlFileLock);
9083 
9084  /* Update shared-memory copy of checkpoint XID/epoch */
9086  XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
9087  XLogCtl->ckptXid = checkPoint.nextXid;
9089 
9090  /*
9091  * We are now done with critical updates; no need for system panic if we
9092  * have trouble while fooling with old log segments.
9093  */
9094  END_CRIT_SECTION();
9095 
9096  /*
9097  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
9098  */
9099  smgrpostckpt();
9100 
9101  /*
9102  * Update the average distance between checkpoints if the prior checkpoint
9103  * exists.
9104  */
9105  if (PriorRedoPtr != InvalidXLogRecPtr)
9107 
9108  /*
9109  * Delete old log files, those no longer needed for last checkpoint to
9110  * prevent the disk holding the xlog from growing full.
9111  */
9113  KeepLogSeg(recptr, &_logSegNo);
9114  _logSegNo--;
9115  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr);
9116 
9117  /*
9118  * Make more log segments if needed. (Do this after recycling old log
9119  * segments, since that may supply some of the needed files.)
9120  */
9121  if (!shutdown)
9122  PreallocXlogFiles(recptr);
9123 
9124  /*
9125  * Truncate pg_subtrans if possible. We can throw away all data before
9126  * the oldest XMIN of any running transaction. No future transaction will
9127  * attempt to reference any pg_subtrans entry older than that (see Asserts
9128  * in subtrans.c). During recovery, though, we mustn't do this because
9129  * StartupSUBTRANS hasn't been called yet.
9130  */
9131  if (!RecoveryInProgress())
9133 
9134  /* Real work is done, but log and update stats before releasing lock. */
9135  LogCheckpointEnd(false);
9136 
9137  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
9138  NBuffers,
9142 
9143  LWLockRelease(CheckpointLock);
9144 }
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:8432
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:8667
static int LocalXLogInsertAllowed
Definition: xlog.c:235
bool log_checkpoints
Definition: xlog.c:102
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
int64 pg_time_t
Definition: pgtime.h:23
TransactionId ckptXid
Definition: xlog.c:583
static void WALInsertLockRelease(void)
Definition: xlog.c:1685
int wal_segment_size
Definition: xlog.c:113
pg_time_t time
Definition: pg_control.h:128
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:68
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:167
uint32 oidCount
Definition: transam.h:112
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1570
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1954
XLogRecPtr unloggedLSN
Definition: xlog.c:590
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:338
TransactionId oldestActiveXid
Definition: pg_control.h:63
void InitXLogInsert(void)
Definition: xloginsert.c:1028
TimestampTz ckpt_start_t
Definition: xlog.h:199
slock_t info_lck
Definition: xlog.c:704
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:2256
MultiXactId oldestMulti
Definition: pg_control.h:49
TimeLineID PrevTimeLineID
Definition: xlog.c:631
TimeLineID PrevTimeLineID
Definition: pg_control.h:39
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
int ckpt_segs_recycled
Definition: xlog.h:209
TransactionId oldestXid
Definition: transam.h:119
#define MemSet(start, val, len)
Definition: c.h:941
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2118
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:9205
CheckPoint checkPointCopy
Definition: pg_control.h:131
XLogCtlInsert Insert
Definition: xlog.c:577
TransactionId oldestXid
Definition: pg_control.h:47
bool RecoveryInProgress(void)
Definition: xlog.c:8082
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:354
uint32 ckptXidEpoch
Definition: xlog.c:582
TransactionId nextXid
Definition: pg_control.h:43
pg_time_t time
Definition: pg_control.h:51
#define PANIC
Definition: elog.h:53
bool fullPageWrites
Definition: xlog.c:552
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2809
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
MultiXactOffset nextMultiOffset
Definition: pg_control.h:46
void UpdateControlFile(void)
Definition: xlog.c:4771
TransactionId oldestCommitTsXid
Definition: pg_control.h:52
void pfree(void *pointer)
Definition: mcxt.c:1031
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:901
#define ERROR
Definition: elog.h:43
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:8582
TransactionId nextXid
Definition: transam.h:117
uint32 nextXidEpoch
Definition: pg_control.h:42
static XLogRecPtr RedoRecPtr
Definition: xlog.c:352
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:67
XLogRecPtr unloggedLSN
Definition: pg_control.h:133
static void PreallocXlogFiles(XLogRecPtr endptr)
Definition: xlog.c:3798
uint64 XLogSegNo
Definition: xlogdefs.h:34
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:177
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define InvalidTransactionId
Definition: transam.h:31
uint64 CurrBytePos
Definition: xlog.c:528
unsigned int uint32
Definition: c.h:358
XLogRecPtr RedoRecPtr
Definition: xlog.c:581
int ckpt_segs_removed
Definition: xlog.h:208
#define CHECKPOINT_FORCE
Definition: xlog.h:180
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:721
#define ereport(elevel, rest)
Definition: elog.h:141
TransactionId oldestCommitTsXid
Definition: transam.h:129
static void Insert(File file)
Definition: fd.c:1062
int ckpt_bufs_written
Definition: xlog.h:205
static void LocalSetXLogInsertAllowed(void)
Definition: xlog.c:8208
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
TransactionId newestCommitTsXid
Definition: pg_control.h:54
#define PROCARRAY_FLAGS_DEFAULT
Definition: procarray.h:50
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:9525
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
Oid oldestMultiDB
Definition: pg_control.h:50
#define XLogStandbyInfoActive()
Definition: xlog.h:160
static ControlFileData * ControlFile
Definition: xlog.c:715
TimeLineID ThisTimeLineID
Definition: xlog.c:181
Oid nextOid
Definition: pg_control.h:44
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
Definition: xlog.c:3928
bool fullPageWrites
Definition: pg_control.h:41
TransactionId GetOldestXmin(Relation rel, int flags)
Definition: procarray.c:1306
void smgrpreckpt(void)
Definition: smgr.c:759
uint64 XLogRecPtr
Definition: xlogdefs.h:21
Oid oldestXidDB
Definition: pg_control.h:48
TransactionId newestCommitTsXid
Definition: transam.h:130
CheckpointStatsData CheckpointStats
Definition: xlog.c:175
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:55
MultiXactId nextMulti
Definition: pg_control.h:45
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1656
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
int ckpt_segs_added
Definition: xlog.h:207
slock_t ulsn_lck
Definition: xlog.c:591
TimeLineID ThisTimeLineID
Definition: pg_control.h:38
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog(elevel,...)
Definition: elog.h:226
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2094
int NBuffers
Definition: globals.c:130
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:2301
void XLogBeginInsert(void)
Definition: xloginsert.c:120
XLogRecPtr RedoRecPtr
Definition: xlog.c:550
void smgrpostckpt(void)
Definition: smgr.c:789
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:36
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:8564
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:176
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:166
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:72
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 9156 of file xlog.c.

References elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, xl_end_of_recovery::PrevTimeLineID, XLogCtlData::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, ThisTimeLineID, xl_end_of_recovery::ThisTimeLineID, ControlFileData::time, UpdateControlFile(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by StartupXLOG().

9157 {
9158  xl_end_of_recovery xlrec;
9159  XLogRecPtr recptr;
9160 
9161  /* sanity check */
9162  if (!RecoveryInProgress())
9163  elog(ERROR, "can only be used to end recovery");
9164 
9165  xlrec.end_time = GetCurrentTimestamp();
9166 
9171 
9173 
9175 
9176  XLogBeginInsert();
9177  XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
9178  recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
9179 
9180  XLogFlush(recptr);
9181 
9182  /*
9183  * Update the control file so that crash recovery can follow the timeline
9184  * changes to this point.
9185  */
9186  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9187  ControlFile->time = (pg_time_t) time(NULL);
9188  ControlFile->minRecoveryPoint = recptr;
9191  LWLockRelease(ControlFileLock);
9192 
9193  END_CRIT_SECTION();
9194 
9195  LocalXLogInsertAllowed = -1; /* return to "check" state */
9196 }
static int LocalXLogInsertAllowed
Definition: xlog.c:235
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1685
pg_time_t time
Definition: pg_control.h:128
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:167
TimeLineID PrevTimeLineID
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1570
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
TimeLineID PrevTimeLineID
Definition: xlog.c:631
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
bool RecoveryInProgress(void)
Definition: xlog.c:8082
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:76
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2809
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
void UpdateControlFile(void)
Definition: xlog.c:4771
#define ERROR
Definition: elog.h:43
static void LocalSetXLogInsertAllowed(void)
Definition: xlog.c:8208
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
static ControlFileData * ControlFile
Definition: xlog.c:715
TimeLineID ThisTimeLineID
Definition: xlog.c:181
TimestampTz end_time
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1656
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
TimeLineID ThisTimeLineID
#define elog(elevel,...)
Definition: elog.h:226
void XLogBeginInsert(void)
Definition: xloginsert.c:120
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:166

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 9275 of file xlog.c.

References XLogCtlData::archiveCleanupCommand, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStatsData::ckpt_start_t, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestXmin(), GetWalRcvWriteRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, XLogCtlData::Insert, InvalidXLogRecPtr, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, minRecoveryPoint, ControlFileData::minRecoveryPointTLI, minRecoveryPointTLI, PreallocXlogFiles(), PROCARRAY_FLAGS_DEFAULT, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, ThisTimeLineID, ControlFileData::time, timestamptz_to_str(), TruncateSUBTRANS(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

9276 {
9277  XLogRecPtr lastCheckPointRecPtr;
9278  XLogRecPtr lastCheckPointEndPtr;
9279  CheckPoint lastCheckPoint;
9280  XLogRecPtr PriorRedoPtr;
9281  XLogRecPtr receivePtr;
9282  XLogRecPtr replayPtr;
9283  TimeLineID replayTLI;
9284  XLogRecPtr endptr;
9285  XLogSegNo _logSegNo;
9286  TimestampTz xtime;
9287 
9288  /*
9289  * Acquire CheckpointLock to ensure only one restartpoint or checkpoint
9290  * happens at a time.
9291  */
9292  LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
9293 
9294  /* Get a local copy of the last safe checkpoint record. */
9296  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
9297  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
9298  lastCheckPoint = XLogCtl->lastCheckPoint;
9300 
9301  /*
9302  * Check that we're still in recovery mode. It's ok if we exit recovery
9303  * mode after this check, the restart point is valid anyway.
9304  */
9305  if (!RecoveryInProgress())
9306  {
9307  ereport(DEBUG2,
9308  (errmsg("skipping restartpoint, recovery has already ended")));
9309  LWLockRelease(CheckpointLock);
9310  return false;
9311  }
9312 
9313  /*
9314  * If the last checkpoint record we've replayed is already our last
9315  * restartpoint, we can't perform a new restart point. We still update
9316  * minRecoveryPoint in that case, so that if this is a shutdown restart
9317  * point, we won't start up earlier than before. That's not strictly
9318  * necessary, but when hot standby is enabled, it would be rather weird if
9319  * the database opened up for read-only connections at a point-in-time
9320  * before the last shutdown. Such time travel is still possible in case of
9321  * immediate shutdown, though.
9322  *
9323  * We don't explicitly advance minRecoveryPoint when we do create a
9324  * restartpoint. It's assumed that flushing the buffers will do that as a
9325  * side-effect.
9326  */
9327  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
9328  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
9329  {
9330  ereport(DEBUG2,
9331  (errmsg("skipping restartpoint, already performed at %X/%X",
9332  (uint32) (lastCheckPoint.redo >> 32),
9333  (uint32) lastCheckPoint.redo)));
9334 
9336  if (flags & CHECKPOINT_IS_SHUTDOWN)
9337  {
9338  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9340  ControlFile->time = (pg_time_t) time(NULL);
9342  LWLockRelease(ControlFileLock);
9343  }
9344  LWLockRelease(CheckpointLock);
9345  return false;
9346  }
9347 
9348  /*
9349  * Update the shared RedoRecPtr so that the startup process can calculate
9350  * the number of segments replayed since last restartpoint, and request a
9351  * restartpoint if it exceeds CheckPointSegments.
9352  *
9353  * Like in CreateCheckPoint(), hold off insertions to update it, although
9354  * during recovery this is just pro forma, because no WAL insertions are
9355  * happening.
9356  */
9358  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
9360 
9361  /* Also update the info_lck-protected copy */
9363  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
9365 
9366  /*
9367  * Prepare to accumulate statistics.
9368  *
9369  * Note: because it is possible for log_checkpoints to change while a
9370  * checkpoint proceeds, we always accumulate stats, even if
9371  * log_checkpoints is currently off.
9372  */
9373  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
9375 
9376  if (log_checkpoints)
9377  LogCheckpointStart(flags, true);
9378 
9379  CheckPointGuts(lastCheckPoint.redo, flags);
9380 
9381  /*
9382  * Remember the prior checkpoint's redo ptr for
9383  * UpdateCheckPointDistanceEstimate()
9384  */
9385  PriorRedoPtr = ControlFile->checkPointCopy.redo;
9386 
9387  /*
9388  * Update pg_control, using current time. Check that it still shows
9389  * IN_ARCHIVE_RECOVERY state and an older checkpoint, else do nothing;
9390  * this is a quick hack to make sure nothing really bad happens if somehow
9391  * we get here after the end-of-recovery checkpoint.
9392  */
9393  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9395  ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
9396  {
9397  ControlFile->checkPoint = lastCheckPointRecPtr;
9398  ControlFile->checkPointCopy = lastCheckPoint;
9399  ControlFile->time = (pg_time_t) time(NULL);
9400 
9401  /*
9402  * Ensure minRecoveryPoint is past the checkpoint record. Normally,
9403  * this will have happened already while writing out dirty buffers,
9404  * but not necessarily - e.g. because no buffers were dirtied. We do
9405  * this because a non-exclusive base backup uses minRecoveryPoint to
9406  * determine which WAL files must be included in the backup, and the
9407  * file (or files) containing the checkpoint record must be included,
9408  * at a minimum. Note that for an ordinary restart of recovery there's
9409  * no value in having the minimum recovery point any earlier than this
9410  * anyway, because redo will begin just after the checkpoint record.
9411  */
9412  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
9413  {
9414  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
9416 
9417  /* update local copy */
9420  }
9421  if (flags & CHECKPOINT_IS_SHUTDOWN)
9424  }
9425  LWLockRelease(ControlFileLock);
9426 
9427  /*
9428  * Update the average distance between checkpoints/restartpoints if the
9429  * prior checkpoint exists.
9430  */
9431  if (PriorRedoPtr != InvalidXLogRecPtr)
9433 
9434  /*
9435  * Delete old log files, those no longer needed for last restartpoint to
9436  * prevent the disk holding the xlog from growing full.
9437  */
9439 
9440  /*
9441  * Retreat _logSegNo using the current end of xlog replayed or received,
9442  * whichever is later.
9443  */
9444  receivePtr = GetWalRcvWriteRecPtr(NULL, NULL);
9445  replayPtr = GetXLogReplayRecPtr(&replayTLI);
9446  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
9447  KeepLogSeg(endptr, &_logSegNo);
9448  _logSegNo--;
9449 
9450  /*
9451  * Try to recycle segments on a useful timeline. If we've been promoted
9452  * since the beginning of this restartpoint, use the new timeline chosen
9453  * at end of recovery (RecoveryInProgress() sets ThisTimeLineID in that
9454  * case). If we're still in recovery, use the timeline we're currently
9455  * replaying.
9456  *
9457  * There is no guarantee that the WAL segments will be useful on the
9458  * current timeline; if recovery proceeds to a new timeline right after
9459  * this, the pre-allocated WAL segments on this timeline will not be used,
9460  * and will go wasted until recycled on the next restartpoint. We'll live
9461  * with that.
9462  */
9463  if (RecoveryInProgress())
9464  ThisTimeLineID = replayTLI;
9465 
9466  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr);
9467 
9468  /*
9469  * Make more log segments if needed. (Do this after recycling old log
9470  * segments, since that may supply some of the needed files.)
9471  */
9472  PreallocXlogFiles(endptr);
9473 
9474  /*
9475  * ThisTimeLineID is normally not set when we're still in recovery.
9476  * However, recycling/preallocating segments above needed ThisTimeLineID
9477  * to determine which timeline to install the segments on. Reset it now,
9478  * to restore the normal state of affairs for debugging purposes.
9479  */
9480  if (RecoveryInProgress())
9481  ThisTimeLineID = 0;
9482 
9483  /*
9484  * Truncate pg_subtrans if possible. We can throw away all data before
9485  * the oldest XMIN of any running transaction. No future transaction will
9486  * attempt to reference any pg_subtrans entry older than that (see Asserts
9487  * in subtrans.c). When hot standby is disabled, though, we mustn't do
9488  * this because StartupSUBTRANS hasn't been called yet.
9489  */
9490  if (EnableHotStandby)
9492 
9493  /* Real work is done, but log and update before releasing lock. */
9494  LogCheckpointEnd(true);
9495 
9496  xtime = GetLatestXTime();
9498  (errmsg("recovery restart point at %X/%X",
9499  (uint32) (lastCheckPoint.redo >> 32), (uint32) lastCheckPoint.redo),
9500  xtime ? errdetail("Last completed transaction was at log time %s.",
9501  timestamptz_to_str(xtime)) : 0));
9502 
9503  LWLockRelease(CheckpointLock);
9504 
9505  /*
9506  * Finally, execute archive_cleanup_command, if any.
9507  */
9510  "archive_cleanup_command",
9511  false);
9512 
9513  return true;
9514 }
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:8667
bool log_checkpoints
Definition: xlog.c:102
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal)
Definition: xlogarchive.c:330
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void WALInsertLockRelease(void)
Definition: xlog.c:1685
int wal_segment_size
Definition: xlog.c:113
pg_time_t time
Definition: pg_control.h:128
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:167
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1570
int64 TimestampTz
Definition: timestamp.h:39
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2722
TimestampTz ckpt_start_t
Definition: xlog.h:199
slock_t info_lck
Definition: xlog.c:704
#define MemSet(start, val, len)
Definition: c.h:941
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:9205
TimestampTz GetLatestXTime(void)
Definition: xlog.c:6239
CheckPoint checkPointCopy
Definition: pg_control.h:131
XLogCtlInsert Insert
Definition: xlog.c:577
#define LOG
Definition: elog.h:26
bool RecoveryInProgress(void)
Definition: xlog.c:8082
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:354
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:673
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
#define SpinLockAcquire(lock)
Definition: spin.h:62
void UpdateControlFile(void)
Definition: xlog.c:4771
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:8582
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
Definition: xlog.c:11340
#define DEBUG2
Definition: elog.h:24
static XLogRecPtr RedoRecPtr
Definition: xlog.c:352
static void PreallocXlogFiles(XLogRecPtr endptr)
Definition: xlog.c:3798
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errdetail(const char *fmt,...)
Definition: elog.c:873
unsigned int uint32
Definition: c.h:358
XLogRecPtr RedoRecPtr
Definition: xlog.c:581
#define ereport(elevel, rest)
Definition: elog.h:141
CheckPoint lastCheckPoint
Definition: xlog.c:675
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define SpinLockRelease(lock)
Definition: spin.h:64
static TimeLineID minRecoveryPointTLI
Definition: xlog.c:832
#define PROCARRAY_FLAGS_DEFAULT
Definition: procarray.h:50
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:9525
static ControlFileData * ControlFile
Definition: xlog.c:715
TimeLineID ThisTimeLineID
Definition: xlog.c:181
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
Definition: xlog.c:3928
TransactionId GetOldestXmin(Relation rel, int flags)
Definition: procarray.c:1306
uint64 XLogRecPtr
Definition: xlogdefs.h:21
CheckpointStatsData CheckpointStats
Definition: xlog.c:175
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1656
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
char archiveCleanupCommand[MAXPGPATH]
Definition: xlog.c:637
bool EnableHotStandby
Definition: xlog.c:96
TimeLineID ThisTimeLineID
Definition: pg_control.h:38
int errmsg(const char *fmt,...)
Definition: elog.c:797
XLogRecPtr RedoRecPtr
Definition: xlog.c:550
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:674
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:36
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:8564
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:176
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:166
static XLogRecPtr minRecoveryPoint
Definition: xlog.c:831
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1710
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)

◆ DataChecksumsEnabled()

bool DataChecksumsEnabled ( void  )

Definition at line 4841 of file xlog.c.

References Assert, and ControlFileData::data_checksum_version.

Referenced by PageIsVerified(), PageSetChecksumCopy(), PageSetChecksumInplace(), ReadControlFile(), and sendFile().

4842 {
4843  Assert(ControlFile != NULL);
4844  return (ControlFile->data_checksum_version > 0);
4845 }
uint32 data_checksum_version
Definition: pg_control.h:221
static ControlFileData * ControlFile
Definition: xlog.c:715
#define Assert(condition)
Definition: c.h:732

◆ do_pg_abort_backup()

void do_pg_abort_backup ( void  )

Definition at line 11312 of file xlog.c.

References Assert, EXCLUSIVE_BACKUP_NONE, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, XLogCtlData::Insert, XLogCtlInsert::nonExclusiveBackups, SESSION_BACKUP_NON_EXCLUSIVE, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), and WALInsertLockRelease().

Referenced by base_backup_cleanup(), and nonexclusive_base_backup_cleanup().

11313 {
11314  /*
11315  * Quick exit if session is not keeping around a non-exclusive backup
11316  * already started.
11317  */
11319  return;
11320 
11325 
11328  {
11329  XLogCtl->Insert.forcePageWrites = false;
11330  }
11332 }
static void WALInsertLockRelease(void)
Definition: xlog.c:1685
static SessionBackupState sessionBackupState
Definition: xlog.c:512
XLogCtlInsert Insert
Definition: xlog.c:577
bool forcePageWrites
Definition: xlog.c:551
int nonExclusiveBackups
Definition: xlog.c:563
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:562
#define Assert(condition)
Definition: c.h:732
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1656
static XLogCtlData * XLogCtl
Definition: xlog.c:707

◆ do_pg_start_backup()

XLogRecPtr do_pg_start_backup ( const char *  backupidstr,
bool  fast,
TimeLineID starttli_p,
StringInfo  labelfile,
List **  tablespaces,
StringInfo  tblspcmapfile,
bool  infotbssize,
bool  needtblspcmapfile 
)

Definition at line 10390 of file xlog.c.

References AllocateDir(), AllocateFile(), appendStringInfo(), appendStringInfoChar(), BACKUP_LABEL_FILE, backup_started_in_recovery, BoolGetDatum, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, dirent::d_name, StringInfoData::data, DataDir, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_NONE, EXCLUSIVE_BACKUP_STARTING, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, FreeDir(), FreeFile(), CheckPoint::fullPageWrites, XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, StringInfoData::len, log_timezone, LW_SHARED, LWLockAcquire(), LWLockRelease(), makeStringInfo(), MAXFNAMELEN, MAXPGPATH, XLogCtlInsert::nonExclusiveBackups, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, pg_fsync(), pg_localtime(), pg_start_backup_callback(), pg_strftime(), pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, sendTablespace(), SESSION_BACKUP_EXCLUSIVE, SESSION_BACKUP_NON_EXCLUSIVE, sessionBackupState, tablespaceinfo::size, snprintf(), SpinLockAcquire, SpinLockRelease, stat, TABLESPACE_MAP, CheckPoint::ThisTimeLineID, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLByteToSeg, XLogFileName, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_start_backup().

10394 {
10395  bool exclusive = (labelfile == NULL);
10396  bool backup_started_in_recovery = false;
10397  XLogRecPtr checkpointloc;
10398  XLogRecPtr startpoint;
10399  TimeLineID starttli;
10400  pg_time_t stamp_time;
10401  char strfbuf[128];
10402  char xlogfilename[MAXFNAMELEN];
10403  XLogSegNo _logSegNo;
10404  struct stat stat_buf;
10405  FILE *fp;
10406 
10407  backup_started_in_recovery = RecoveryInProgress();
10408 
10409  /*
10410  * Currently only non-exclusive backup can be taken during recovery.
10411  */
10412  if (backup_started_in_recovery && exclusive)
10413  ereport(ERROR,
10414  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10415  errmsg("recovery is in progress"),
10416  errhint("WAL control functions cannot be executed during recovery.")));
10417 
10418  /*
10419  * During recovery, we don't need to check WAL level. Because, if WAL
10420  * level is not sufficient, it's impossible to get here during recovery.
10421  */
10422  if (!backup_started_in_recovery && !XLogIsNeeded())
10423  ereport(ERROR,
10424  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10425  errmsg("WAL level not sufficient for making an online backup"),
10426  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
10427 
10428  if (strlen(backupidstr) > MAXPGPATH)
10429  ereport(ERROR,
10430  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
10431  errmsg("backup label too long (max %d bytes)",
10432  MAXPGPATH)));
10433 
10434  /*
10435  * Mark backup active in shared memory. We must do full-page WAL writes
10436  * during an on-line backup even if not doing so at other times, because
10437  * it's quite possible for the backup dump to obtain a "torn" (partially
10438  * written) copy of a database page if it reads the page concurrently with
10439  * our write to the same page. This can be fixed as long as the first
10440  * write to the page in the WAL sequence is a full-page write. Hence, we
10441  * turn on forcePageWrites and then force a CHECKPOINT, to ensure there
10442  * are no dirty pages in shared memory that might get dumped while the
10443  * backup is in progress without having a corresponding WAL record. (Once
10444  * the backup is complete, we need not force full-page writes anymore,
10445  * since we expect that any pages not modified during the backup interval
10446  * must have been correctly captured by the backup.)
10447  *
10448  * Note that forcePageWrites has no effect during an online backup from
10449  * the standby.
10450  *
10451  * We must hold all the insertion locks to change the value of
10452  * forcePageWrites, to ensure adequate interlocking against
10453  * XLogInsertRecord().
10454  */
10456  if (exclusive)
10457  {
10458  /*
10459  * At first, mark that we're now starting an exclusive backup, to
10460  * ensure that there are no other sessions currently running
10461  * pg_start_backup() or pg_stop_backup().
10462  */
10464  {
10466  ereport(ERROR,
10467  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10468  errmsg("a backup is already in progress"),
10469  errhint("Run pg_stop_backup() and try again.")));
10470  }
10472  }
10473  else
10475  XLogCtl->Insert.forcePageWrites = true;
10477 
10478  /* Ensure we release forcePageWrites if fail below */
10480  {
10481  bool gotUniqueStartpoint = false;
10482  DIR *tblspcdir;
10483  struct dirent *de;
10484  tablespaceinfo *ti;
10485  int datadirpathlen;
10486 
10487  /*
10488  * Force an XLOG file switch before the checkpoint, to ensure that the
10489  * WAL segment the checkpoint is written to doesn't contain pages with
10490  * old timeline IDs. That would otherwise happen if you called
10491  * pg_start_backup() right after restoring from a PITR archive: the
10492  * first WAL segment containing the startup checkpoint has pages in
10493  * the beginning with the old timeline ID. That can cause trouble at
10494  * recovery: we won't have a history file covering the old timeline if
10495  * pg_wal directory was not included in the base backup and the WAL
10496  * archive was cleared too before starting the backup.
10497  *
10498  * This also ensures that we have emitted a WAL page header that has
10499  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
10500  * Therefore, if a WAL archiver (such as pglesslog) is trying to
10501  * compress out removable backup blocks, it won't remove any that
10502  * occur after this point.
10503  *
10504  * During recovery, we skip forcing XLOG file switch, which means that
10505  * the backup taken during recovery is not available for the special
10506  * recovery case described above.
10507  */
10508  if (!backup_started_in_recovery)
10509  RequestXLogSwitch(false);
10510 
10511  do
10512  {
10513  bool checkpointfpw;
10514 
10515  /*
10516  * Force a CHECKPOINT. Aside from being necessary to prevent torn
10517  * page problems, this guarantees that two successive backup runs
10518  * will have different checkpoint positions and hence different
10519  * history file names, even if nothing happened in between.
10520  *
10521  * During recovery, establish a restartpoint if possible. We use
10522  * the last restartpoint as the backup starting checkpoint. This
10523  * means that two successive backup runs can have same checkpoint
10524  * positions.
10525  *
10526  * Since the fact that we are executing do_pg_start_backup()
10527  * during recovery means that checkpointer is running, we can use
10528  * RequestCheckpoint() to establish a restartpoint.
10529  *
10530  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
10531  * passing fast = true). Otherwise this can take awhile.
10532  */
10534  (fast ? CHECKPOINT_IMMEDIATE : 0));
10535 
10536  /*
10537  * Now we need to fetch the checkpoint record location, and also
10538  * its REDO pointer. The oldest point in WAL that would be needed
10539  * to restore starting from the checkpoint is precisely the REDO
10540  * pointer.
10541  */
10542  LWLockAcquire(ControlFileLock, LW_SHARED);
10543  checkpointloc = ControlFile->checkPoint;
10544  startpoint = ControlFile->checkPointCopy.redo;
10546  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
10547  LWLockRelease(ControlFileLock);
10548 
10549  if (backup_started_in_recovery)
10550  {
10551  XLogRecPtr recptr;
10552 
10553  /*
10554  * Check to see if all WAL replayed during online backup
10555  * (i.e., since last restartpoint used as backup starting
10556  * checkpoint) contain full-page writes.
10557  */
10559  recptr = XLogCtl->lastFpwDisableRecPtr;
10561 
10562  if (!checkpointfpw || startpoint <= recptr)
10563  ereport(ERROR,
10564  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10565  errmsg("WAL generated with full_page_writes=off was replayed "
10566  "since last restartpoint"),
10567  errhint("This means that the backup being taken on the standby "
10568  "is corrupt and should not be used. "
10569  "Enable full_page_writes and run CHECKPOINT on the master, "
10570  "and then try an online backup again.")));
10571 
10572  /*
10573  * During recovery, since we don't use the end-of-backup WAL
10574  * record and don't write the backup history file, the
10575  * starting WAL location doesn't need to be unique. This means
10576  * that two base backups started at the same time might use
10577  * the same checkpoint as starting locations.
10578  */
10579  gotUniqueStartpoint = true;
10580  }
10581 
10582  /*
10583  * If two base backups are started at the same time (in WAL sender
10584  * processes), we need to make sure that they use different
10585  * checkpoints as starting locations, because we use the starting
10586  * WAL location as a unique identifier for the base backup in the
10587  * end-of-backup WAL record and when we write the backup history
10588  * file. Perhaps it would be better generate a separate unique ID
10589  * for each backup instead of forcing another checkpoint, but
10590  * taking a checkpoint right after another is not that expensive
10591  * either because only few buffers have been dirtied yet.
10592  */
10594  if (XLogCtl->Insert.lastBackupStart < startpoint)
10595  {
10596  XLogCtl->Insert.lastBackupStart = startpoint;
10597  gotUniqueStartpoint = true;
10598  }
10600  } while (!gotUniqueStartpoint);
10601 
10602  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
10603  XLogFileName(xlogfilename, starttli, _logSegNo, wal_segment_size);
10604 
10605  /*
10606  * Construct tablespace_map file
10607  */
10608  if (exclusive)
10609  tblspcmapfile = makeStringInfo();
10610 
10611  datadirpathlen = strlen(DataDir);
10612 
10613  /* Collect information about all tablespaces */
10614  tblspcdir = AllocateDir("pg_tblspc");
10615  while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
10616  {
10617  char fullpath[MAXPGPATH + 10];
10618  char linkpath[MAXPGPATH];
10619  char *relpath = NULL;
10620  int rllen;
10621  StringInfoData buflinkpath;
10622  char *s = linkpath;
10623 
10624  /* Skip special stuff */
10625  if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
10626  continue;
10627 
10628  snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
10629 
10630 #if defined(HAVE_READLINK) || defined(WIN32)
10631  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
10632  if (rllen < 0)
10633  {
10634  ereport(WARNING,
10635  (errmsg("could not read symbolic link \"%s\": %m",
10636  fullpath)));
10637  continue;
10638  }
10639  else if (rllen >= sizeof(linkpath))
10640  {
10641  ereport(WARNING,
10642  (errmsg("symbolic link \"%s\" target is too long",
10643  fullpath)));
10644  continue;
10645  }
10646  linkpath[rllen] = '\0';
10647 
10648  /*
10649  * Add the escape character '\\' before newline in a string to
10650  * ensure that we can distinguish between the newline in the
10651  * tablespace path and end of line while reading tablespace_map
10652  * file during archive recovery.
10653  */
10654  initStringInfo(&buflinkpath);
10655 
10656  while (*s)
10657  {
10658  if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
10659  appendStringInfoChar(&buflinkpath, '\\');
10660  appendStringInfoChar(&buflinkpath, *s++);
10661  }
10662 
10663  /*
10664  * Relpath holds the relative path of the tablespace directory
10665  * when it's located within PGDATA, or NULL if it's located
10666  * elsewhere.
10667  */
10668  if (rllen > datadirpathlen &&
10669  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
10670  IS_DIR_SEP(linkpath[datadirpathlen]))
10671  relpath = linkpath + datadirpathlen + 1;
10672 
10673  ti = palloc(sizeof(tablespaceinfo));
10674  ti->oid = pstrdup(de->d_name);
10675  ti->path = pstrdup(buflinkpath.data);
10676  ti->rpath = relpath ? pstrdup(relpath) : NULL;
10677  ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
10678 
10679  if (tablespaces)
10680  *tablespaces = lappend(*tablespaces, ti);
10681 
10682  appendStringInfo(tblspcmapfile, "%s %s\n", ti->oid, ti->path);
10683 
10684  pfree(buflinkpath.data);
10685 #else
10686 
10687  /*
10688  * If the platform does not have symbolic links, it should not be
10689  * possible to have tablespaces - clearly somebody else created
10690  * them. Warn about it and ignore.
10691  */
10692  ereport(WARNING,
10693  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
10694  errmsg("tablespaces are not supported on this platform")));
10695 #endif
10696  }
10697  FreeDir(tblspcdir);
10698 
10699  /*
10700  * Construct backup label file
10701  */
10702  if (exclusive)
10703  labelfile = makeStringInfo();
10704 
10705  /* Use the log timezone here, not the session timezone */
10706  stamp_time = (pg_time_t) time(NULL);
10707  pg_strftime(strfbuf, sizeof(strfbuf),
10708  "%Y-%m-%d %H:%M:%S %Z",
10709  pg_localtime(&stamp_time, log_timezone));
10710  appendStringInfo(labelfile, "START WAL LOCATION: %X/%X (file %s)\n",
10711  (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename);
10712  appendStringInfo(labelfile, "CHECKPOINT LOCATION: %X/%X\n",
10713  (uint32) (checkpointloc >> 32), (uint32) checkpointloc);
10714  appendStringInfo(labelfile, "BACKUP METHOD: %s\n",
10715  exclusive ? "pg_start_backup" : "streamed");
10716  appendStringInfo(labelfile, "BACKUP FROM: %s\n",
10717  backup_started_in_recovery ? "standby" : "master");
10718  appendStringInfo(labelfile, "START TIME: %s\n", strfbuf);
10719  appendStringInfo(labelfile, "LABEL: %s\n", backupidstr);
10720  appendStringInfo(labelfile, "START TIMELINE: %u\n", starttli);
10721 
10722  /*
10723  * Okay, write the file, or return its contents to caller.
10724  */
10725  if (exclusive)
10726  {
10727  /*
10728  * Check for existing backup label --- implies a backup is already
10729  * running. (XXX given that we checked exclusiveBackupState
10730  * above, maybe it would be OK to just unlink any such label
10731  * file?)
10732  */
10733  if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
10734  {
10735  if (errno != ENOENT)
10736  ereport(ERROR,
10738  errmsg("could not stat file \"%s\": %m",
10739  BACKUP_LABEL_FILE)));
10740  }
10741  else
10742  ereport(ERROR,
10743  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10744  errmsg("a backup is already in progress"),
10745  errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
10746  BACKUP_LABEL_FILE)));
10747 
10748  fp = AllocateFile(BACKUP_LABEL_FILE, "w");
10749 
10750  if (!fp)
10751  ereport(ERROR,
10753  errmsg("could not create file \"%s\": %m",
10754  BACKUP_LABEL_FILE)));
10755  if (fwrite(labelfile->data, labelfile->len, 1, fp) != 1 ||
10756  fflush(fp) != 0 ||
10757  pg_fsync(fileno(fp)) != 0 ||
10758  ferror(fp) ||
10759  FreeFile(fp))
10760  ereport(ERROR,
10762  errmsg("could not write file \"%s\": %m",
10763  BACKUP_LABEL_FILE)));
10764  /* Allocated locally for exclusive backups, so free separately */
10765  pfree(labelfile->data);
10766  pfree(labelfile);
10767 
10768  /* Write backup tablespace_map file. */
10769  if (tblspcmapfile->len > 0)
10770  {
10771  if (stat(TABLESPACE_MAP, &stat_buf) != 0)
10772  {
10773  if (errno != ENOENT)
10774  ereport(ERROR,
10776  errmsg("could not stat file \"%s\": %m",
10777  TABLESPACE_MAP)));
10778  }
10779  else
10780  ereport(ERROR,
10781  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10782  errmsg("a backup is already in progress"),
10783  errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
10784  TABLESPACE_MAP)));
10785 
10786  fp = AllocateFile(TABLESPACE_MAP, "w");
10787 
10788  if (!fp)
10789  ereport(ERROR,
10791  errmsg("could not create file \"%s\": %m",
10792  TABLESPACE_MAP)));
10793  if (fwrite(tblspcmapfile->data, tblspcmapfile->len, 1, fp) != 1 ||
10794  fflush(fp) != 0 ||
10795  pg_fsync(fileno(fp)) != 0 ||
10796  ferror(fp) ||
10797  FreeFile(fp))
10798  ereport(ERROR,
10800  errmsg("could not write file \"%s\": %m",
10801  TABLESPACE_MAP)));
10802  }
10803 
10804  /* Allocated locally for exclusive backups, so free separately */
10805  pfree(tblspcmapfile->data);
10806  pfree(tblspcmapfile);
10807  }
10808  }
10810 
10811  /*
10812  * Mark that start phase has correctly finished for an exclusive backup.
10813  * Session-level locks are updated as well to reflect that state.
10814  *
10815  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating backup
10816  * counters and session-level lock. Otherwise they can be updated
10817  * inconsistently, and which might cause do_pg_abort_backup() to fail.
10818  */
10819  if (exclusive)
10820  {
10823 
10824  /* Set session-level lock */
10827  }
10828  else
10830 
10831  /*
10832  * We're done. As a convenience, return the starting WAL location.
10833  */
10834  if (starttli_p)
10835  *starttli_p = starttli;
10836  return startpoint;
10837 }
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:122
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:9602
int errhint(const char *fmt,...)
Definition: elog.c:987
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1685
int wal_segment_size
Definition: xlog.c:113
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:702
static SessionBackupState sessionBackupState
Definition: xlog.c:512
XLogRecPtr lastBackupStart
Definition: xlog.c:564
char * pstrdup(const char *in)
Definition: mcxt.c:1161
#define XLogIsNeeded()
Definition: xlog.h:146
char * rpath
Definition: basebackup.h:28
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
slock_t info_lck
Definition: xlog.c:704
int errcode(int sqlerrcode)
Definition: elog.c:575
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
CheckPoint checkPointCopy
Definition: pg_control.h:131
XLogCtlInsert Insert
Definition: xlog.c:577
bool RecoveryInProgress(void)
Definition: xlog.c:8082
static bool backup_started_in_recovery
Definition: basebackup.c:78
Definition: dirent.h:9
#define IS_DIR_SEP(ch)
Definition: port.h:84
pg_tz * log_timezone
Definition: pgtz.c:31
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
#define TABLESPACE_MAP
Definition: xlog.h:325
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
void pfree(void *pointer)
Definition: mcxt.c:1031
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:78
bool forcePageWrites
Definition: xlog.c:551
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
struct stat stat_buf
Definition: pg_standby.c:102
#define MAXPGPATH
uint64 XLogSegNo
Definition: xlogdefs.h:34
#define readlink(path, buf, size)
Definition: win32_port.h:233
int errcode_for_file_access(void)
Definition: elog.c:598
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2336
unsigned int uint32
Definition: c.h:358
int64 sendTablespace(char *path, bool sizeonly)
Definition: basebackup.c:962
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2590
#define CHECKPOINT_FORCE
Definition: xlog.h:180
#define ereport(elevel, rest)
Definition: elog.h:141
List * lappend(List *list, void *datum)
Definition: list.c:128
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
#define WARNING
Definition: elog.h:40
int nonExclusiveBackups
Definition: xlog.c:563
#define stat(a, b)
Definition: win32_port.h:266
#define MAXFNAMELEN
#define SpinLockRelease(lock)
Definition: spin.h:64
static void pg_start_backup_callback(int code, Datum arg)
Definition: xlog.c:10841
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:562
uintptr_t Datum
Definition: postgres.h:367
static ControlFileData * ControlFile
Definition: xlog.c:715
#define BoolGetDatum(X)
Definition: postgres.h:387
bool fullPageWrites
Definition: pg_control.h:41
#define CHECKPOINT_WAIT
Definition: xlog.h:184
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2656
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1656
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1314
int FreeFile(FILE *file)
Definition: fd.c:2528
void * palloc(Size size)
Definition: mcxt.c:924
TimeLineID ThisTimeLineID
Definition: pg_control.h:38
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:179
#define relpath(rnode, forknum)
Definition: relpath.h:87
char * DataDir
Definition: globals.c:61
#define BACKUP_LABEL_FILE
Definition: xlog.h:322
int pg_fsync(int fd)
Definition: fd.c:341
char d_name[MAX_PATH]
Definition: dirent.h:14
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:36
int FreeDir(DIR *dir)
Definition: fd.c:2708
void RequestCheckpoint(int flags)
Definition: checkpointer.c:951
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)

◆ do_pg_stop_backup()

XLogRecPtr do_pg_stop_backup ( char *  labelfile,
bool  waitforarchive,
TimeLineID stoptli_p 
)

Definition at line 10907 of file xlog.c.

References AllocateFile(), Assert, BACKUP_LABEL_FILE, backup_started_in_recovery, BackupHistoryFileName, BackupHistoryFilePath, BoolGetDatum, CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), DEBUG1, durable_unlink(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_NONE, EXCLUSIVE_BACKUP_STOPPING, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::lastFpwDisableRecPtr, log_timezone, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlInsert::nonExclusiveBackups, NOTICE, palloc(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, pg_localtime(), pg_stop_backup_callback(), pg_strftime(), pg_usleep(), RecoveryInProgress(), remaining, RequestXLogSwitch(), SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, stat, TABLESPACE_MAP, ThisTimeLineID, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogFileName, XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), pg_stop_backup(), and pg_stop_backup_v2().

10908 {
10909  bool exclusive = (labelfile == NULL);
10910  bool backup_started_in_recovery = false;
10911  XLogRecPtr startpoint;
10912  XLogRecPtr stoppoint;
10913  TimeLineID stoptli;
10914  pg_time_t stamp_time;
10915  char strfbuf[128];
10916  char histfilepath[MAXPGPATH];
10917  char startxlogfilename[MAXFNAMELEN];
10918  char stopxlogfilename[MAXFNAMELEN];
10919  char lastxlogfilename[MAXFNAMELEN];
10920  char histfilename[MAXFNAMELEN];
10921  char backupfrom[20];
10922  XLogSegNo _logSegNo;
10923  FILE *lfp;
10924  FILE *fp;
10925  char ch;
10926  int seconds_before_warning;
10927  int waits = 0;
10928  bool reported_waiting = false;
10929  char *remaining;
10930  char *ptr;
10931  uint32 hi,
10932  lo;
10933 
10934  backup_started_in_recovery = RecoveryInProgress();
10935 
10936  /*
10937  * Currently only non-exclusive backup can be taken during recovery.
10938  */
10939  if (backup_started_in_recovery && exclusive)
10940  ereport(ERROR,
10941  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10942  errmsg("recovery is in progress"),
10943  errhint("WAL control functions cannot be executed during recovery.")));
10944 
10945  /*
10946  * During recovery, we don't need to check WAL level. Because, if WAL
10947  * level is not sufficient, it's impossible to get here during recovery.
10948  */
10949  if (!backup_started_in_recovery && !XLogIsNeeded())
10950  ereport(ERROR,
10951  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10952  errmsg("WAL level not sufficient for making an online backup"),
10953  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
10954 
10955  if (exclusive)
10956  {
10957  /*
10958  * At first, mark that we're now stopping an exclusive backup, to
10959  * ensure that there are no other sessions currently running
10960  * pg_start_backup() or pg_stop_backup().
10961  */
10964  {
10966  ereport(ERROR,
10967  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10968  errmsg("exclusive backup not in progress")));
10969  }
10972 
10973  /*
10974  * Remove backup_label. In case of failure, the state for an exclusive
10975  * backup is switched back to in-progress.
10976  */
10978  {
10979  /*
10980  * Read the existing label file into memory.
10981  */
10982  struct stat statbuf;
10983  int r;
10984 
10985  if (stat(BACKUP_LABEL_FILE, &statbuf))
10986  {
10987  /* should not happen per the upper checks */
10988  if (errno != ENOENT)
10989  ereport(ERROR,
10991  errmsg("could not stat file \"%s\": %m",
10992  BACKUP_LABEL_FILE)));
10993  ereport(ERROR,
10994  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10995  errmsg("a backup is not in progress")));
10996  }
10997 
10998  lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
10999  if (!lfp)
11000  {
11001  ereport(ERROR,
11003  errmsg("could not read file \"%s\": %m",
11004  BACKUP_LABEL_FILE)));
11005  }
11006  labelfile = palloc(statbuf.st_size + 1);
11007  r = fread(labelfile, statbuf.st_size, 1, lfp);
11008  labelfile[statbuf.st_size] = '\0';
11009 
11010  /*
11011  * Close and remove the backup label file
11012  */
11013  if (r != 1 || ferror(lfp) || FreeFile(lfp))
11014  ereport(ERROR,
11016  errmsg("could not read file \"%s\": %m",
11017  BACKUP_LABEL_FILE)));
11019 
11020  /*
11021  * Remove tablespace_map file if present, it is created only if
11022  * there are tablespaces.
11023  */
11025  }
11027  }
11028 
11029  /*
11030  * OK to update backup counters, forcePageWrites and session-level lock.
11031  *
11032  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them.
11033  * Otherwise they can be updated inconsistently, and which might cause
11034  * do_pg_abort_backup() to fail.
11035  */
11037  if (exclusive)
11038  {
11040  }
11041  else
11042  {
11043  /*
11044  * The user-visible pg_start/stop_backup() functions that operate on
11045  * exclusive backups can be called at any time, but for non-exclusive
11046  * backups, it is expected that each do_pg_start_backup() call is
11047  * matched by exactly one do_pg_stop_backup() call.
11048  */
11051  }
11052 
11055  {
11056  XLogCtl->Insert.forcePageWrites = false;
11057  }
11058 
11059  /*
11060  * Clean up session-level lock.
11061  *
11062  * You might think that WALInsertLockRelease() can be called before
11063  * cleaning up session-level lock because session-level lock doesn't need
11064  * to be protected with WAL insertion lock. But since
11065  * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
11066  * cleaned up before it.
11067  */
11069 
11071 
11072  /*
11073  * Read and parse the START WAL LOCATION line (this code is pretty crude,
11074  * but we are not expecting any variability in the file format).
11075  */
11076  if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c",
11077  &hi, &lo, startxlogfilename,
11078  &ch) != 4 || ch != '\n')
11079  ereport(ERROR,
11080  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11081  errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
11082  startpoint = ((uint64) hi) << 32 | lo;
11083  remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */
11084 
11085  /*
11086  * Parse the BACKUP FROM line. If we are taking an online backup from the
11087  * standby, we confirm that the standby has not been promoted during the
11088  * backup.
11089  */
11090  ptr = strstr(remaining, "BACKUP FROM:");
11091  if (!ptr || sscanf(ptr, "BACKUP FROM: %19s\n", backupfrom) != 1)
11092  ereport(ERROR,
11093  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11094  errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
11095  if (strcmp(backupfrom, "standby") == 0 && !backup_started_in_recovery)
11096  ereport(ERROR,
11097  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11098  errmsg("the standby was promoted during online backup"),
11099  errhint("This means that the backup being taken is corrupt "
11100  "and should not be used. "
11101  "Try taking another online backup.")));
11102 
11103  /*
11104  * During recovery, we don't write an end-of-backup record. We assume that
11105  * pg_control was backed up last and its minimum recovery point can be
11106  * available as the backup end location. Since we don't have an
11107  * end-of-backup record, we use the pg_control value to check whether
11108  * we've reached the end of backup when starting recovery from this
11109  * backup. We have no way of checking if pg_control wasn't backed up last
11110  * however.
11111  *
11112  * We don't force a switch to new WAL file but it is still possible to
11113  * wait for all the required files to be archived if waitforarchive is
11114  * true. This is okay if we use the backup to start a standby and fetch
11115  * the missing WAL using streaming replication. But in the case of an
11116  * archive recovery, a user should set waitforarchive to true and wait for
11117  * them to be archived to ensure that all the required files are
11118  * available.
11119  *
11120  * We return the current minimum recovery point as the backup end
11121  * location. Note that it can be greater than the exact backup end
11122  * location if the minimum recovery point is updated after the backup of
11123  * pg_control. This is harmless for current uses.
11124  *
11125  * XXX currently a backup history file is for informational and debug
11126  * purposes only. It's not essential for an online backup. Furthermore,
11127  * even if it's created, it will not be archived during recovery because
11128  * an archiver is not invoked. So it doesn't seem worthwhile to write a
11129  * backup history file during recovery.
11130  */
11131  if (backup_started_in_recovery)
11132  {
11133  XLogRecPtr recptr;
11134 
11135  /*
11136  * Check to see if all WAL replayed during online backup contain
11137  * full-page writes.
11138  */
11140  recptr = XLogCtl->lastFpwDisableRecPtr;
11142 
11143  if (startpoint <= recptr)
11144  ereport(ERROR,
11145  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11146  errmsg("WAL generated with full_page_writes=off was replayed "
11147  "during online backup"),
11148  errhint("This means that the backup being taken on the standby "
11149  "is corrupt and should not be used. "
11150  "Enable full_page_writes and run CHECKPOINT on the master, "
11151  "and then try an online backup again.")));
11152 
11153 
11154  LWLockAcquire(ControlFileLock, LW_SHARED);
11155  stoppoint = ControlFile->minRecoveryPoint;
11156  stoptli = ControlFile->minRecoveryPointTLI;
11157  LWLockRelease(ControlFileLock);
11158  }
11159  else
11160  {
11161  /*
11162  * Write the backup-end xlog record
11163  */
11164  XLogBeginInsert();
11165  XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
11166  stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
11167  stoptli = ThisTimeLineID;
11168 
11169  /*
11170  * Force a switch to a new xlog segment file, so that the backup is
11171  * valid as soon as archiver moves out the current segment file.
11172  */
11173  RequestXLogSwitch(false);
11174 
11175  XLByteToPrevSeg(stoppoint, _logSegNo, wal_segment_size);
11176  XLogFileName(stopxlogfilename, stoptli, _logSegNo, wal_segment_size);
11177 
11178  /* Use the log timezone here, not the session timezone */
11179  stamp_time = (pg_time_t) time(NULL);
11180  pg_strftime(strfbuf, sizeof(strfbuf),
11181  "%Y-%m-%d %H:%M:%S %Z",
11182  pg_localtime(&stamp_time, log_timezone));
11183 
11184  /*
11185  * Write the backup history file
11186  */
11187  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
11188  BackupHistoryFilePath(histfilepath, stoptli, _logSegNo,
11189  startpoint, wal_segment_size);
11190  fp = AllocateFile(histfilepath, "w");
11191  if (!fp)
11192  ereport(ERROR,
11194  errmsg("could not create file \"%s\": %m",
11195  histfilepath)));
11196  fprintf(fp, "START WAL LOCATION: %X/%X (file %s)\n",
11197  (uint32) (startpoint >> 32), (uint32) startpoint, startxlogfilename);
11198  fprintf(fp, "STOP WAL LOCATION: %X/%X (file %s)\n",
11199  (uint32) (stoppoint >> 32), (uint32) stoppoint, stopxlogfilename);
11200 
11201  /*
11202  * Transfer remaining lines including label and start timeline to
11203  * history file.
11204  */
11205  fprintf(fp, "%s", remaining);
11206  fprintf(fp, "STOP TIME: %s\n", strfbuf);
11207  fprintf(fp, "STOP TIMELINE: %u\n", stoptli);
11208  if (fflush(fp) || ferror(fp) || FreeFile(fp))
11209  ereport(ERROR,
11211  errmsg("could not write file \"%s\": %m",
11212  histfilepath)));
11213 
11214  /*
11215  * Clean out any no-longer-needed history files. As a side effect,
11216  * this will post a .ready file for the newly created history file,
11217  * notifying the archiver that history file may be archived
11218  * immediately.
11219  */
11221  }
11222 
11223  /*
11224  * If archiving is enabled, wait for all the required WAL files to be
11225  * archived before returning. If archiving isn't enabled, the required WAL
11226  * needs to be transported via streaming replication (hopefully with
11227  * wal_keep_segments set high enough), or some more exotic mechanism like
11228  * polling and copying files from pg_wal with script. We have no knowledge
11229  * of those mechanisms, so it's up to the user to ensure that he gets all
11230  * the required WAL.
11231  *
11232  * We wait until both the last WAL file filled during backup and the
11233  * history file have been archived, and assume that the alphabetic sorting
11234  * property of the WAL files ensures any earlier WAL files are safely
11235  * archived as well.
11236  *
11237  * We wait forever, since archive_command is supposed to work and we
11238  * assume the admin wanted his backup to work completely. If you don't
11239  * wish to wait, then either waitforarchive should be passed in as false,
11240  * or you can set statement_timeout. Also, some notices are issued to
11241  * clue in anyone who might be doing this interactively.
11242  */
11243 
11244  if (waitforarchive &&
11245  ((!backup_started_in_recovery && XLogArchivingActive()) ||
11246  (backup_started_in_recovery && XLogArchivingAlways())))
11247  {
11248  XLByteToPrevSeg(stoppoint, _logSegNo, wal_segment_size);
11249  XLogFileName(lastxlogfilename, stoptli, _logSegNo, wal_segment_size);
11250 
11251  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
11252  BackupHistoryFileName(histfilename, stoptli, _logSegNo,
11253  startpoint, wal_segment_size);
11254 
11255  seconds_before_warning = 60;
11256  waits = 0;
11257 
11258  while (XLogArchiveIsBusy(lastxlogfilename) ||
11259  XLogArchiveIsBusy(histfilename))
11260  {
11262 
11263  if (!reported_waiting && waits > 5)
11264  {
11265  ereport(NOTICE,
11266  (errmsg("pg_stop_backup cleanup done, waiting for required WAL segments to be archived")));
11267  reported_waiting = true;
11268  }
11269 
11270  pg_usleep(1000000L);
11271 
11272  if (++waits >= seconds_before_warning)
11273  {
11274  seconds_before_warning *= 2; /* This wraps in >10 years... */
11275  ereport(WARNING,
11276  (errmsg("pg_stop_backup still waiting for all required WAL segments to be archived (%d seconds elapsed)",
11277  waits),
11278  errhint("Check that your archive_command is executing properly. "
11279  "pg_stop_backup can be canceled safely, "
11280  "but the database backup will not be usable without all the WAL segments.")));
11281  }
11282  }
11283 
11284  ereport(NOTICE,
11285  (errmsg("pg_stop_backup complete, all required WAL segments have been archived")));
11286  }
11287  else if (waitforarchive)
11288  ereport(NOTICE,
11289  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
11290 
11291  /*
11292  * We're done. As a convenience, return the ending WAL location.
11293  */
11294  if (stoptli_p)
11295  *stoptli_p = stoptli;
11296  return stoppoint;
11297 }
int remaining
Definition: informix.c:692
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:122
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:9602
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:987
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1685
int wal_segment_size
Definition: xlog.c:113
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:167
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:702
static SessionBackupState sessionBackupState
Definition: xlog.c:512
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogIsNeeded()
Definition: xlog.h:146
slock_t info_lck
Definition: xlog.c:704
int errcode(int sqlerrcode)
Definition: elog.c:575
XLogCtlInsert Insert
Definition: xlog.c:577
#define BackupHistoryFileName(fname, tli, logSegNo, startpoint, wal_segsz_bytes)
bool RecoveryInProgress(void)
Definition: xlog.c:8082
static bool backup_started_in_recovery
Definition: basebackup.c:78
pg_tz * log_timezone
Definition: pgtz.c:31
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1725
#define TABLESPACE_MAP
Definition: xlog.h:325
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define XLogArchivingAlways()
Definition: xlog.h:138
bool forcePageWrites
Definition: xlog.c:551
#define ERROR
Definition: elog.h:43
static void CleanupBackupHistory(void)
Definition: xlog.c:4188
#define MAXPGPATH
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errcode_for_file_access(void)
Definition: elog.c:598
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2336
unsigned int uint32
Definition: c.h:358
#define ereport(elevel, rest)
Definition: elog.h:141
#define XLOG_BACKUP_END
Definition: pg_control.h:72
#define WARNING
Definition: elog.h:40
int nonExclusiveBackups
Definition: xlog.c:563
#define stat(a, b)
Definition: win32_port.h:266
#define MAXFNAMELEN
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define SpinLockRelease(lock)
Definition: spin.h:64
#define BackupHistoryFilePath(path, tli, logSegNo, startpoint, wal_segsz_bytes)
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:562
uintptr_t Datum
Definition: postgres.h:367
static ControlFileData * ControlFile
Definition: xlog.c:715
#define BoolGetDatum(X)
Definition: postgres.h:387
TimeLineID ThisTimeLineID
Definition: xlog.c:181
#define NOTICE
Definition: elog.h:37
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:659
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
#define XLogArchivingActive()
Definition: xlog.h:135
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1656
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1121
static void pg_stop_backup_callback(int code, Datum arg)
Definition: xlog.c:10870
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1314
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:684
int FreeFile(FILE *file)
Definition: fd.c:2528
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define BACKUP_LABEL_FILE
Definition: xlog.h:322
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
void XLogBeginInsert(void)
Definition: xloginsert.c:120
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:166
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)

◆ emode_for_corrupt_record()

static int emode_for_corrupt_record ( int  emode,
XLogRecPtr  RecPtr 
)
static

Definition at line 12337 of file xlog.c.

References DEBUG1, LOG, readSource, and XLOG_FROM_PG_WAL.

Referenced by ReadRecord(), and XLogPageRead().

12338 {
12339  static XLogRecPtr lastComplaint = 0;
12340 
12341  if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
12342  {
12343  if (RecPtr == lastComplaint)
12344  emode = DEBUG1;
12345  else
12346  lastComplaint = RecPtr;
12347  }
12348  return emode;
12349 }
#define DEBUG1
Definition: elog.h:25
static XLogSource readSource
Definition: xlog.c:790
#define LOG
Definition: elog.h:26
uint64 XLogRecPtr
Definition: xlogdefs.h:21

◆ exitArchiveRecovery()

static void exitArchiveRecovery ( TimeLineID  endTLI,
XLogRecPtr  endOfLog 
)
static

Definition at line 5643 of file xlog.c.

References Assert, close, durable_rename(), ereport, errcode_for_file_access(), errmsg(), ERROR, FATAL, fd(), InArchiveRecovery, InvalidXLogRecPtr, LOG, MAXFNAMELEN, MAXPGPATH, readFile, RECOVERY_COMMAND_DONE, RECOVERY_COMMAND_FILE, snprintf(), ThisTimeLineID, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLOGDIR, XLogFileCopy(), XLogFileInit(), XLogFileName, XLogFileNameP(), and XLogSegmentOffset.

Referenced by StartupXLOG().

5644 {
5645  char recoveryPath[MAXPGPATH];
5646  char xlogfname[MAXFNAMELEN];
5647  XLogSegNo endLogSegNo;
5648  XLogSegNo startLogSegNo;
5649 
5650  /* we always switch to a new timeline after archive recovery */
5651  Assert(endTLI != ThisTimeLineID);
5652 
5653  /*
5654  * We are no longer in archive recovery state.
5655  */
5656  InArchiveRecovery = false;
5657 
5658  /*
5659  * Update min recovery point one last time.
5660  */
5662 
5663  /*
5664  * If the ending log segment is still open, close it (to avoid problems on
5665  * Windows with trying to rename or delete an open file).
5666  */
5667  if (readFile >= 0)
5668  {
5669  close(readFile);
5670  readFile = -1;
5671  }
5672 
5673  /*
5674  * Calculate the last segment on the old timeline, and the first segment
5675  * on the new timeline. If the switch happens in the middle of a segment,
5676  * they are the same, but if the switch happens exactly at a segment
5677  * boundary, startLogSegNo will be endLogSegNo + 1.
5678  */
5679  XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size);
5680  XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size);
5681 
5682  /*
5683  * Initialize the starting WAL segment for the new timeline. If the switch
5684  * happens in the middle of a segment, copy data from the last WAL segment
5685  * of the old timeline up to the switch point, to the starting WAL segment
5686  * on the new timeline.
5687  */
5688  if (endLogSegNo == startLogSegNo)
5689  {
5690  /*
5691  * Make a copy of the file on the new timeline.
5692  *
5693  * Writing WAL isn't allowed yet, so there are no locking
5694  * considerations. But we should be just as tense as XLogFileInit to
5695  * avoid emplacing a bogus file.
5696  */
5697  XLogFileCopy(endLogSegNo, endTLI, endLogSegNo,