PostgreSQL Source Code  git master
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogutils.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "commands/progress.h"
#include "commands/tablespace.h"
#include "common/controldata_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "port/pg_iovec.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walwriter.h"
#include "replication/basebackup.h"
#include "replication/logical.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/pmsignal.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/pg_rusage.h"
#include "utils/snapmgr.h"
#include "utils/timestamp.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 
struct  XLogPageReadPrivate
 

Macros

#define RECOVERY_COMMAND_FILE   "recovery.conf"
 
#define RECOVERY_COMMAND_DONE   "recovery.done"
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)   (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)   (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)   (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef enum ExclusiveBackupState ExclusiveBackupState
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 
typedef struct XLogPageReadPrivate XLogPageReadPrivate
 

Enumerations

enum  ExclusiveBackupState { EXCLUSIVE_BACKUP_NONE = 0, EXCLUSIVE_BACKUP_STARTING, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_STOPPING }
 
enum  XLogSource { XLOG_FROM_ANY = 0, XLOG_FROM_ARCHIVE, XLOG_FROM_PG_WAL, XLOG_FROM_STREAM }
 

Functions

static void readRecoverySignalFile (void)
 
static void validateRecoveryParameters (void)
 
static void exitArchiveRecovery (TimeLineID endTLI, XLogRecPtr endOfLog)
 
static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog)
 
static bool recoveryStopsBefore (XLogReaderState *record)
 
static bool recoveryStopsAfter (XLogReaderState *record)
 
static char * getRecoveryStopReason (void)
 
static void ConfirmRecoveryPaused (void)
 
static void recoveryPausesHere (bool endOfRecovery)
 
static bool recoveryApplyDelay (XLogReaderState *record)
 
static void SetLatestXTime (TimestampTz xtime)
 
static void SetCurrentChunkStartTime (TimestampTz xtime)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static void checkTimeLineSwitch (XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI)
 
static void VerifyOverwriteContrecord (xl_overwrite_contrecord *xlrec, XLogReaderState *state)
 
static void LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, bool opportunistic)
 
static bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
static void XLogWrite (XLogwrtRqst WriteRqst, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno)
 
static int XLogFileRead (XLogSegNo segno, int emode, TimeLineID tli, XLogSource source, bool notfoundOk)
 
static int XLogFileReadAnyTLI (XLogSegNo segno, int emode, XLogSource source)
 
static int XLogPageRead (XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
 
static bool WaitForWALToBecomeAvailable (XLogRecPtr RecPtr, bool randAccess, bool fetching_ckpt, XLogRecPtr tliRecPtr)
 
static void XLogShutdownWalRcv (void)
 
static int emode_for_corrupt_record (int emode, XLogRecPtr RecPtr)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr)
 
static void RemoveXlogFile (const char *segname, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static XLogRecordReadRecord (XLogReaderState *xlogreader, int emode, bool fetching_ckpt)
 
static void CheckRecoveryConsistency (void)
 
static bool PerformRecoveryXLogAction (void)
 
static XLogRecordReadCheckpointRecord (XLogReaderState *xlogreader, XLogRecPtr RecPtr, int whichChkpt, bool report)
 
static bool rescanLatestTimeLine (void)
 
static void InitControlFile (uint64 sysidentifier)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static char * str_time (pg_time_t tnow)
 
static void SetPromoteIsTriggered (void)
 
static bool CheckForStandbyTrigger (void)
 
static void xlog_block_info (StringInfo buf, XLogReaderState *record)
 
static void xlog_outdesc (StringInfo buf, XLogReaderState *record)
 
static void pg_start_backup_callback (int code, Datum arg)
 
static void pg_stop_backup_callback (int code, Datum arg)
 
static bool read_backup_label (XLogRecPtr *checkPointLoc, bool *backupEndRequired, bool *backupFromStandby)
 
static bool read_tablespace_map (List **tablespaces)
 
static void rm_redo_error_callback (void *arg)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void checkXLogConsistency (XLogReaderState *record)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno)
 
static void XLogFileCopy (XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
static void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
void UpdateControlFile (void)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
void LocalProcessControlFile (bool reset)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (void)
 
static bool getRecordTimestamp (XLogReaderState *record, TimestampTz *recordXtime)
 
RecoveryPauseState GetRecoveryPauseState (void)
 
void SetRecoveryPause (bool recoveryPause)
 
TimestampTz GetLatestXTime (void)
 
TimestampTz GetCurrentChunkReplayStartTime (void)
 
void GetXLogReceiptTime (TimestampTz *rtime, bool *fromStream)
 
static void RecoveryRequiresIntParameter (const char *param_name, int currValue, int minValue)
 
void StartupXLOG (void)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool HotStandbyActive (void)
 
bool HotStandbyActiveInReplay (void)
 
bool XLogInsertAllowed (void)
 
void InitXLOGAccess (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
void CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_xlog_sync_method (int new_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno)
 
XLogRecPtr do_pg_start_backup (const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
XLogRecPtr do_pg_stop_backup (char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogReplayRecPtr (TimeLineID *replayTLI)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
bool BackupInProgress (void)
 
void CancelBackup (void)
 
void StartupRequestWalReceiverRestart (void)
 
bool PromoteIsTriggered (void)
 
void RemovePromoteSignalFiles (void)
 
bool CheckPromoteSignal (void)
 
void WakeupRecovery (void)
 
void SetWalWriterSleeping (bool sleeping)
 
void XLogRequestWalReceiverReply (void)
 

Variables

uint32 bootstrap_data_checksum_version
 
int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
char * wal_consistency_checking_string = NULL
 
boolwal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = false
 
int sync_method = DEFAULT_SYNC_METHOD
 
int wal_level = WAL_LEVEL_MINIMAL
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
const struct config_enum_entry sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
const struct config_enum_entry recovery_target_action_options []
 
CheckpointStatsData CheckpointStats
 
TimeLineID ThisTimeLineID = 0
 
static XLogRecPtr LastRec
 
static XLogRecPtr flushedUpto = 0
 
static TimeLineID receiveTLI = 0
 
static XLogRecPtr abortedRecPtr
 
static XLogRecPtr missingContrecPtr
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static bool LocalHotStandbyActive = false
 
static bool LocalPromoteIsTriggered = false
 
static int LocalXLogInsertAllowed = -1
 
bool ArchiveRecoveryRequested = false
 
bool InArchiveRecovery = false
 
static bool standby_signal_file_found = false
 
static bool recovery_signal_file_found = false
 
static char * replay_image_masked = NULL
 
static char * primary_image_masked = NULL
 
char * recoveryRestoreCommand = NULL
 
char * recoveryEndCommand = NULL
 
char * archiveCleanupCommand = NULL
 
RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET
 
bool recoveryTargetInclusive = true
 
int recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE
 
TransactionId recoveryTargetXid
 
char * recovery_target_time_string
 
static TimestampTz recoveryTargetTime
 
const char * recoveryTargetName
 
XLogRecPtr recoveryTargetLSN
 
int recovery_min_apply_delay = 0
 
bool StandbyModeRequested = false
 
char * PrimaryConnInfo = NULL
 
char * PrimarySlotName = NULL
 
char * PromoteTriggerFile = NULL
 
bool wal_receiver_create_temp_slot = false
 
bool StandbyMode = false
 
static TransactionId recoveryStopXid
 
static TimestampTz recoveryStopTime
 
static XLogRecPtr recoveryStopLSN
 
static char recoveryStopName [MAXFNAMELEN]
 
static bool recoveryStopAfter
 
RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal = RECOVERY_TARGET_TIMELINE_LATEST
 
TimeLineID recoveryTargetTLIRequested = 0
 
TimeLineID recoveryTargetTLI = 0
 
static ListexpectedTLEs
 
static TimeLineID curFileTLI
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static bool doRequestWalReceiverReply
 
static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static const char *const xlogSourceNames [] = {"any", "archive", "pg_wal", "stream"}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static int readFile = -1
 
static XLogSegNo readSegNo = 0
 
static uint32 readOff = 0
 
static uint32 readLen = 0
 
static XLogSource readSource = XLOG_FROM_ANY
 
static XLogSource currentSource = XLOG_FROM_ANY
 
static bool lastSourceFailed = false
 
static bool pendingWalRcvRestart = false
 
static TimestampTz XLogReceiptTime = 0
 
static XLogSource XLogReceiptSource = XLOG_FROM_ANY
 
static XLogRecPtr ReadRecPtr
 
static XLogRecPtr EndRecPtr
 
static XLogRecPtr minRecoveryPoint
 
static TimeLineID minRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
bool reachedConsistency = false
 
static bool InRedo = false
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 752 of file xlog.c.

Referenced by CopyXLogRecordToWAL(), and CreateCheckPoint().

◆ NextBufIdx

#define NextBufIdx (   idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 756 of file xlog.c.

Referenced by XLogWrite().

◆ NUM_XLOGINSERT_LOCKS

◆ RECOVERY_COMMAND_DONE

#define RECOVERY_COMMAND_DONE   "recovery.done"

Definition at line 88 of file xlog.c.

Referenced by readRecoverySignalFile().

◆ RECOVERY_COMMAND_FILE

#define RECOVERY_COMMAND_FILE   "recovery.conf"

Definition at line 87 of file xlog.c.

Referenced by readRecoverySignalFile().

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 763 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), GetXLogBuffer(), StartupXLOG(), and XLogWrite().

Typedef Documentation

◆ ExclusiveBackupState

◆ WALInsertLockPadded

◆ XLogCtlData

typedef struct XLogCtlData XLogCtlData

◆ XLogCtlInsert

typedef struct XLogCtlInsert XLogCtlInsert

◆ XLogPageReadPrivate

◆ XLogwrtResult

typedef struct XLogwrtResult XLogwrtResult

◆ XLogwrtRqst

typedef struct XLogwrtRqst XLogwrtRqst

Enumeration Type Documentation

◆ ExclusiveBackupState

Enumerator
EXCLUSIVE_BACKUP_NONE 
EXCLUSIVE_BACKUP_STARTING 
EXCLUSIVE_BACKUP_IN_PROGRESS 
EXCLUSIVE_BACKUP_STOPPING 

Definition at line 510 of file xlog.c.

◆ XLogSource

enum XLogSource
Enumerator
XLOG_FROM_ANY 
XLOG_FROM_ARCHIVE 
XLOG_FROM_PG_WAL 
XLOG_FROM_STREAM 

Definition at line 790 of file xlog.c.

791 {
792  XLOG_FROM_ANY = 0, /* request to read WAL from any source */
793  XLOG_FROM_ARCHIVE, /* restored using restore_command */
794  XLOG_FROM_PG_WAL, /* existing file in pg_wal */
795  XLOG_FROM_STREAM /* streamed from primary */
796 } XLogSource;
XLogSource
Definition: xlog.c:790

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
bool  opportunistic 
)
static

Definition at line 2136 of file xlog.c.

References Assert, DEBUG1, elog, XLogwrtRqst::Flush, XLogCtlInsert::forcePageWrites, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, Insert(), XLogCtlData::Insert, InvalidXLogRecPtr, XLogCtlData::LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), PgStat_MsgWal::m_wal_buffers_full, MemSet, missingContrecPtr, XLogCtlData::pages, pg_write_barrier, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, ThisTimeLineID, WaitXLogInsertionsToFinish(), wal_segment_size, WalStats, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLP_FIRST_IS_OVERWRITE_CONTRECORD, XLP_LONG_HEADER, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

2137 {
2139  int nextidx;
2140  XLogRecPtr OldPageRqstPtr;
2141  XLogwrtRqst WriteRqst;
2142  XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
2143  XLogRecPtr NewPageBeginPtr;
2144  XLogPageHeader NewPage;
2145  int npages = 0;
2146 
2147  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2148 
2149  /*
2150  * Now that we have the lock, check if someone initialized the page
2151  * already.
2152  */
2153  while (upto >= XLogCtl->InitializedUpTo || opportunistic)
2154  {
2156 
2157  /*
2158  * Get ending-offset of the buffer page we need to replace (this may
2159  * be zero if the buffer hasn't been used yet). Fall through if it's
2160  * already written out.
2161  */
2162  OldPageRqstPtr = XLogCtl->xlblocks[nextidx];
2163  if (LogwrtResult.Write < OldPageRqstPtr)
2164  {
2165  /*
2166  * Nope, got work to do. If we just want to pre-initialize as much
2167  * as we can without flushing, give up now.
2168  */
2169  if (opportunistic)
2170  break;
2171 
2172  /* Before waiting, get info_lck and update LogwrtResult */
2174  if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
2175  XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
2178 
2179  /*
2180  * Now that we have an up-to-date LogwrtResult value, see if we
2181  * still need to write it or if someone else already did.
2182  */
2183  if (LogwrtResult.Write < OldPageRqstPtr)
2184  {
2185  /*
2186  * Must acquire write lock. Release WALBufMappingLock first,
2187  * to make sure that all insertions that we need to wait for
2188  * can finish (up to this same position). Otherwise we risk
2189  * deadlock.
2190  */
2191  LWLockRelease(WALBufMappingLock);
2192 
2193  WaitXLogInsertionsToFinish(OldPageRqstPtr);
2194 
2195  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2196 
2198  if (LogwrtResult.Write >= OldPageRqstPtr)
2199  {
2200  /* OK, someone wrote it already */
2201  LWLockRelease(WALWriteLock);
2202  }
2203  else
2204  {
2205  /* Have to write it ourselves */
2206  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2207  WriteRqst.Write = OldPageRqstPtr;
2208  WriteRqst.Flush = 0;
2209  XLogWrite(WriteRqst, false);
2210  LWLockRelease(WALWriteLock);
2212  TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2213  }
2214  /* Re-acquire WALBufMappingLock and retry */
2215  LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2216  continue;
2217  }
2218  }
2219 
2220  /*
2221  * Now the next buffer slot is free and we can set it up to be the
2222  * next output page.
2223  */
2224  NewPageBeginPtr = XLogCtl->InitializedUpTo;
2225  NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2226 
2227  Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
2228 
2229  NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2230 
2231  /*
2232  * Be sure to re-zero the buffer so that bytes beyond what we've
2233  * written will look like zeroes and not valid XLOG records...
2234  */
2235  MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
2236 
2237  /*
2238  * Fill the new page's header
2239  */
2240  NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2241 
2242  /* NewPage->xlp_info = 0; */ /* done by memset */
2243  NewPage->xlp_tli = ThisTimeLineID;
2244  NewPage->xlp_pageaddr = NewPageBeginPtr;
2245 
2246  /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2247 
2248  /*
2249  * If online backup is not in progress, mark the header to indicate
2250  * that WAL records beginning in this page have removable backup
2251  * blocks. This allows the WAL archiver to know whether it is safe to
2252  * compress archived WAL data by transforming full-block records into
2253  * the non-full-block format. It is sufficient to record this at the
2254  * page level because we force a page switch (in fact a segment
2255  * switch) when starting a backup, so the flag will be off before any
2256  * records can be written during the backup. At the end of a backup,
2257  * the last page will be marked as all unsafe when perhaps only part
2258  * is unsafe, but at worst the archiver would miss the opportunity to
2259  * compress a few records.
2260  */
2261  if (!Insert->forcePageWrites)
2262  NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2263 
2264  /*
2265  * If a record was found to be broken at the end of recovery, and
2266  * we're going to write on the page where its first contrecord was
2267  * lost, set the XLP_FIRST_IS_OVERWRITE_CONTRECORD flag on the page
2268  * header. See CreateOverwriteContrecordRecord().
2269  */
2270  if (missingContrecPtr == NewPageBeginPtr)
2271  {
2274  }
2275 
2276  /*
2277  * If first page of an XLOG segment file, make it a long header.
2278  */
2279  if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2280  {
2281  XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2282 
2283  NewLongPage->xlp_sysid = ControlFile->system_identifier;
2284  NewLongPage->xlp_seg_size = wal_segment_size;
2285  NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2286  NewPage->xlp_info |= XLP_LONG_HEADER;
2287  }
2288 
2289  /*
2290  * Make sure the initialization of the page becomes visible to others
2291  * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2292  * holding a lock.
2293  */
2294  pg_write_barrier();
2295 
2296  *((volatile XLogRecPtr *) &XLogCtl->xlblocks[nextidx]) = NewPageEndPtr;
2297 
2298  XLogCtl->InitializedUpTo = NewPageEndPtr;
2299 
2300  npages++;
2301  }
2302  LWLockRelease(WALBufMappingLock);
2303 
2304 #ifdef WAL_DEBUG
2305  if (XLOG_DEBUG && npages > 0)
2306  {
2307  elog(DEBUG1, "initialized %d pages, up to %X/%X",
2308  npages, LSN_FORMAT_ARGS(NewPageEndPtr));
2309  }
2310 #endif
2311 }
XLogRecPtr InitializedUpTo
Definition: xlog.c:623
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
int wal_segment_size
Definition: xlog.c:119
XLogRecPtr * xlblocks
Definition: xlog.c:631
static XLogwrtResult LogwrtResult
Definition: xlog.c:784
slock_t info_lck
Definition: xlog.c:735
#define MemSet(start, val, len)
Definition: c.h:1008
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1799
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
XLogCtlInsert Insert
Definition: xlog.c:588
XLogRecPtr Flush
Definition: xlog.c:430
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:78
bool forcePageWrites
Definition: xlog.c:562
uint64 system_identifier
Definition: pg_control.h:108
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
XLogwrtResult LogwrtResult
Definition: xlog.c:611
static void Insert(File file)
Definition: fd.c:1302
PgStat_Counter m_wal_buffers_full
Definition: pgstat.h:492
TimeLineID xlp_tli
Definition: xlog_internal.h:40
static void XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
Definition: xlog.c:2439
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr Write
Definition: xlog.c:429
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
PgStat_MsgWal WalStats
Definition: pgstat.c:133
static ControlFileData * ControlFile
Definition: xlog.c:746
XLogwrtRqst LogwrtRqst
Definition: xlog.c:591
TimeLineID ThisTimeLineID
Definition: xlog.c:194
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80
size_t Size
Definition: c.h:540
static XLogCtlData * XLogCtl
Definition: xlog.c:738
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:763
static XLogRecPtr missingContrecPtr
Definition: xlog.c:209
XLogRecPtr Write
Definition: xlog.c:435
#define elog(elevel,...)
Definition: elog.h:232
#define pg_write_barrier()
Definition: atomics.h:159
char * pages
Definition: xlog.c:630

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2354 of file xlog.c.

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

2355 {
2358 }
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2318
#define newval
double CheckPointCompletionTarget
Definition: checkpointer.c:145

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2347 of file xlog.c.

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

2348 {
2351 }
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2318
int max_wal_size_mb
Definition: xlog.c:91
#define newval

◆ assign_xlog_sync_method()

void assign_xlog_sync_method ( int  new_sync_method,
void *  extra 
)

Definition at line 10784 of file xlog.c.

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), sync_method, ThisTimeLineID, WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN, wal_segment_size, XLogFileClose(), and XLogFileName.

10785 {
10786  if (sync_method != new_sync_method)
10787  {
10788  /*
10789  * To ensure that no blocks escape unsynced, force an fsync on the
10790  * currently open log segment (if any). Also, if the open flag is
10791  * changing, close the log file so it will be reopened (with new flag
10792  * bit) at next use.
10793  */
10794  if (openLogFile >= 0)
10795  {
10797  if (pg_fsync(openLogFile) != 0)
10798  {
10799  char xlogfname[MAXFNAMELEN];
10800  int save_errno;
10801 
10802  save_errno = errno;
10805  errno = save_errno;
10806  ereport(PANIC,
10808  errmsg("could not fsync file \"%s\": %m", xlogfname)));
10809  }
10810 
10812  if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
10813  XLogFileClose();
10814  }
10815  }
10816 }
static void pgstat_report_wait_end(void)
Definition: wait_event.h:274
int wal_segment_size
Definition: xlog.c:119
static int get_sync_bit(int method)
Definition: xlog.c:10728
#define PANIC
Definition: elog.h:50
static XLogSegNo openLogSegNo
Definition: xlog.c:808
static void XLogFileClose(void)
Definition: xlog.c:3911
int errcode_for_file_access(void)
Definition: elog.c:721
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:258
#define MAXFNAMELEN
static int openLogFile
Definition: xlog.c:807
TimeLineID ThisTimeLineID
Definition: xlog.c:194
#define ereport(elevel,...)
Definition: elog.h:157
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
int sync_method
Definition: xlog.c:107
int errmsg(const char *fmt,...)
Definition: elog.c:909
int pg_fsync(int fd)
Definition: fd.c:357

◆ BackupInProgress()

bool BackupInProgress ( void  )

Definition at line 12238 of file xlog.c.

References BACKUP_LABEL_FILE, and stat.

Referenced by pg_is_in_backup(), and PostmasterStateMachine().

12239 {
12240  struct stat stat_buf;
12241 
12242  return (stat(BACKUP_LABEL_FILE, &stat_buf) == 0);
12243 }
#define BACKUP_LABEL_FILE
Definition: xlog.h:363
#define stat
Definition: win32_port.h:283

◆ BootStrapXLOG()

void BootStrapXLOG ( void  )

Definition at line 5283 of file xlog.c.

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, CheckPoint::fullPageWrites, fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), XLogCtlData::InstallXLogFileSegmentActive, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, VariableCacheData::nextXid, offsetof, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetMultiXactIdLimit(), SetTransactionIdLimit(), ShmemVariableCache, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, ThisTimeLineID, CheckPoint::time, ControlFileData::time, TYPEALIGN, WAIT_EVENT_WAL_BOOTSTRAP_SYNC, WAIT_EVENT_WAL_BOOTSTRAP_WRITE, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

5284 {
5285  CheckPoint checkPoint;
5286  char *buffer;
5287  XLogPageHeader page;
5288  XLogLongPageHeader longpage;
5289  XLogRecord *record;
5290  char *recptr;
5291  uint64 sysidentifier;
5292  struct timeval tv;
5293  pg_crc32c crc;
5294 
5295  /* allow ordinary WAL segment creation, like StartupXLOG() would */
5296  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
5298  LWLockRelease(ControlFileLock);
5299 
5300  /*
5301  * Select a hopefully-unique system identifier code for this installation.
5302  * We use the result of gettimeofday(), including the fractional seconds
5303  * field, as being about as unique as we can easily get. (Think not to
5304  * use random(), since it hasn't been seeded and there's no portable way
5305  * to seed it other than the system clock value...) The upper half of the
5306  * uint64 value is just the tv_sec part, while the lower half contains the
5307  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5308  * PID for a little extra uniqueness. A person knowing this encoding can
5309  * determine the initialization time of the installation, which could
5310  * perhaps be useful sometimes.
5311  */
5312  gettimeofday(&tv, NULL);
5313  sysidentifier = ((uint64) tv.tv_sec) << 32;
5314  sysidentifier |= ((uint64) tv.tv_usec) << 12;
5315  sysidentifier |= getpid() & 0xFFF;
5316 
5317  /* First timeline ID is always 1 */
5318  ThisTimeLineID = 1;
5319 
5320  /* page buffer must be aligned suitably for O_DIRECT */
5321  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5322  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5323  memset(page, 0, XLOG_BLCKSZ);
5324 
5325  /*
5326  * Set up information for the initial checkpoint record
5327  *
5328  * The initial checkpoint record is written to the beginning of the WAL
5329  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5330  * used, so that we can use 0/0 to mean "before any valid WAL segment".
5331  */
5332  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
5333  checkPoint.ThisTimeLineID = ThisTimeLineID;
5334  checkPoint.PrevTimeLineID = ThisTimeLineID;
5335  checkPoint.fullPageWrites = fullPageWrites;
5336  checkPoint.nextXid =
5338  checkPoint.nextOid = FirstGenbkiObjectId;
5339  checkPoint.nextMulti = FirstMultiXactId;
5340  checkPoint.nextMultiOffset = 0;
5341  checkPoint.oldestXid = FirstNormalTransactionId;
5342  checkPoint.oldestXidDB = TemplateDbOid;
5343  checkPoint.oldestMulti = FirstMultiXactId;
5344  checkPoint.oldestMultiDB = TemplateDbOid;
5347  checkPoint.time = (pg_time_t) time(NULL);
5349 
5350  ShmemVariableCache->nextXid = checkPoint.nextXid;
5351  ShmemVariableCache->nextOid = checkPoint.nextOid;
5353  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5354  AdvanceOldestClogXid(checkPoint.oldestXid);
5355  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5356  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5358 
5359  /* Set up the XLOG page header */
5360  page->xlp_magic = XLOG_PAGE_MAGIC;
5361  page->xlp_info = XLP_LONG_HEADER;
5362  page->xlp_tli = ThisTimeLineID;
5364  longpage = (XLogLongPageHeader) page;
5365  longpage->xlp_sysid = sysidentifier;
5366  longpage->xlp_seg_size = wal_segment_size;
5367  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5368 
5369  /* Insert the initial checkpoint record */
5370  recptr = ((char *) page + SizeOfXLogLongPHD);
5371  record = (XLogRecord *) recptr;
5372  record->xl_prev = 0;
5373  record->xl_xid = InvalidTransactionId;
5374  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5376  record->xl_rmid = RM_XLOG_ID;
5377  recptr += SizeOfXLogRecord;
5378  /* fill the XLogRecordDataHeaderShort struct */
5379  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5380  *(recptr++) = sizeof(checkPoint);
5381  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5382  recptr += sizeof(checkPoint);
5383  Assert(recptr - (char *) record == record->xl_tot_len);
5384 
5385  INIT_CRC32C(crc);
5386  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5387  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5388  FIN_CRC32C(crc);
5389  record->xl_crc = crc;
5390 
5391  /* Create first XLOG segment file */
5393 
5394  /*
5395  * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5396  * close the file again in a moment.
5397  */
5398 
5399  /* Write the first page with the initial record */
5400  errno = 0;
5402  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5403  {
5404  /* if write didn't set errno, assume problem is no disk space */
5405  if (errno == 0)
5406  errno = ENOSPC;
5407  ereport(PANIC,
5409  errmsg("could not write bootstrap write-ahead log file: %m")));
5410  }
5412 
5414  if (pg_fsync(openLogFile) != 0)
5415  ereport(PANIC,
5417  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5419 
5420  if (close(openLogFile) != 0)
5421  ereport(PANIC,
5423  errmsg("could not close bootstrap write-ahead log file: %m")));
5424 
5425  openLogFile = -1;
5426 
5427  /* Now create pg_control */
5428  InitControlFile(sysidentifier);
5429  ControlFile->time = checkPoint.time;
5430  ControlFile->checkPoint = checkPoint.redo;
5431  ControlFile->checkPointCopy = checkPoint;
5432 
5433  /* some additional ControlFile fields are set in WriteControlFile() */
5434  WriteControlFile();
5435 
5436  /* Bootstrap the commit log, too */
5437  BootStrapCLOG();
5441 
5442  pfree(buffer);
5443 
5444  /*
5445  * Force control file to be read - in contrast to normal processing we'd
5446  * otherwise never run the checks and GUC related initializations therein.
5447  */
5448  ReadControlFile();
5449 }
static void WriteControlFile(void)
Definition: xlog.c:4698
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
int gettimeofday(struct timeval *tp, struct timezone *tzp)
Definition: gettimeofday.c:104
int64 pg_time_t
Definition: pgtime.h:23
static void pgstat_report_wait_end(void)
Definition: wait_event.h:274
int wal_segment_size
Definition: xlog.c:119
pg_time_t time
Definition: pg_control.h:130
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:877
uint32 oidCount
Definition: transam.h:215
#define write(a, b, c)
Definition: win32.h:14
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:204
uint32 pg_crc32c
Definition: pg_crc32c.h:38
TransactionId oldestActiveXid
Definition: pg_control.h:63
void BootStrapMultiXact(void)
Definition: multixact.c:1894
MultiXactId oldestMulti
Definition: pg_control.h:49
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
static void InitControlFile(uint64 sysidentifier)
Definition: xlog.c:4663
int XLogFileInit(XLogSegNo logsegno)
Definition: xlog.c:3472
RmgrId xl_rmid
Definition: xlogrecord.h:47
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
CheckPoint checkPointCopy
Definition: pg_control.h:133
TransactionId oldestXid
Definition: pg_control.h:47
FullTransactionId nextXid
Definition: transam.h:220
pg_time_t time
Definition: pg_control.h:51
#define PANIC
Definition: elog.h:50
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
bool fullPageWrites
Definition: xlog.c:99
void BootStrapSUBTRANS(void)
Definition: subtrans.c:211
MultiXactOffset nextMultiOffset
Definition: pg_control.h:46
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:328
TransactionId oldestCommitTsXid
Definition: pg_control.h:52
void pfree(void *pointer)
Definition: mcxt.c:1169
#define FirstNormalTransactionId
Definition: transam.h:34
uint32 xl_tot_len
Definition: xlogrecord.h:43
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
static void ReadControlFile(void)
Definition: xlog.c:4789
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:67
int errcode_for_file_access(void)
Definition: elog.c:721
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define InvalidTransactionId
Definition: transam.h:31
#define FirstMultiXactId
Definition: multixact.h:25
bool InstallXLogFileSegmentActive
Definition: xlog.c:663
TimeLineID xlp_tli
Definition: xlog_internal.h:40
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:258
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
TransactionId newestCommitTsXid
Definition: pg_control.h:54
Oid oldestMultiDB
Definition: pg_control.h:50
static int openLogFile
Definition: xlog.c:807
static ControlFileData * ControlFile
Definition: xlog.c:746
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2213
TimeLineID ThisTimeLineID
Definition: xlog.c:194
Oid nextOid
Definition: pg_control.h:44
#define ereport(elevel,...)
Definition: elog.h:157
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:750
bool fullPageWrites
Definition: pg_control.h:42
void BootStrapCLOG(void)
Definition: clog.c:712
#define Assert(condition)
Definition: c.h:804
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
Oid oldestXidDB
Definition: pg_control.h:48
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:345
uint8 xl_info
Definition: xlogrecord.h:46
MultiXactId nextMulti
Definition: pg_control.h:45
static XLogCtlData * XLogCtl
Definition: xlog.c:738
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:228
TransactionId xl_xid
Definition: xlogrecord.h:44
#define FirstGenbkiObjectId
Definition: transam.h:195
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
int pg_fsync(int fd)
Definition: fd.c:357
#define close(a)
Definition: win32.h:12
void BootStrapCommitTs(void)
Definition: commit_ts.c:570
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
XLogRecPtr checkPoint
Definition: pg_control.h:131
XLogRecPtr redo
Definition: pg_control.h:37
#define offsetof(type, field)
Definition: c.h:727
FullTransactionId nextXid
Definition: pg_control.h:43
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2179

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2318 of file xlog.c.

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

2319 {
2320  double target;
2321 
2322  /*-------
2323  * Calculate the distance at which to trigger a checkpoint, to avoid
2324  * exceeding max_wal_size_mb. This is based on two assumptions:
2325  *
2326  * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2327  * WAL for two checkpoint cycles to allow us to recover from the
2328  * secondary checkpoint if the first checkpoint failed, though we
2329  * only did this on the primary anyway, not on standby. Keeping just
2330  * one checkpoint simplifies processing and reduces disk space in
2331  * many smaller databases.)
2332  * b) during checkpoint, we consume checkpoint_completion_target *
2333  * number of segments consumed between checkpoints.
2334  *-------
2335  */
2336  target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2338 
2339  /* round down */
2340  CheckPointSegments = (int) target;
2341 
2342  if (CheckPointSegments < 1)
2343  CheckPointSegments = 1;
2344 }
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:775
int wal_segment_size
Definition: xlog.c:119
int max_wal_size_mb
Definition: xlog.c:91
int CheckPointSegments
Definition: xlog.c:132
double CheckPointCompletionTarget
Definition: checkpointer.c:145

◆ CancelBackup()

void CancelBackup ( void  )

Definition at line 12258 of file xlog.c.

References BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, DEBUG1, durable_rename(), ereport, errcode_for_file_access(), errdetail(), errmsg(), LOG, stat, TABLESPACE_MAP, TABLESPACE_MAP_OLD, and WARNING.

Referenced by PostmasterStateMachine().

12259 {
12260  struct stat stat_buf;
12261 
12262  /* if the backup_label file is not there, return */
12263  if (stat(BACKUP_LABEL_FILE, &stat_buf) < 0)
12264  return;
12265 
12266  /* remove leftover file from previously canceled backup if it exists */
12267  unlink(BACKUP_LABEL_OLD);
12268 
12270  {
12271  ereport(WARNING,
12273  errmsg("online backup mode was not canceled"),
12274  errdetail("File \"%s\" could not be renamed to \"%s\": %m.",
12276  return;
12277  }
12278 
12279  /* if the tablespace_map file is not there, return */
12280  if (stat(TABLESPACE_MAP, &stat_buf) < 0)
12281  {
12282  ereport(LOG,
12283  (errmsg("online backup mode canceled"),
12284  errdetail("File \"%s\" was renamed to \"%s\".",
12286  return;
12287  }
12288 
12289  /* remove leftover file from previously canceled backup if it exists */
12290  unlink(TABLESPACE_MAP_OLD);
12291 
12293  {
12294  ereport(LOG,
12295  (errmsg("online backup mode canceled"),
12296  errdetail("Files \"%s\" and \"%s\" were renamed to "
12297  "\"%s\" and \"%s\", respectively.",
12300  }
12301  else
12302  {
12303  ereport(WARNING,
12305  errmsg("online backup mode canceled"),
12306  errdetail("File \"%s\" was renamed to \"%s\", but "
12307  "file \"%s\" could not be renamed to \"%s\": %m.",
12310  }
12311 }
#define DEBUG1
Definition: elog.h:25
#define LOG
Definition: elog.h:26
#define BACKUP_LABEL_OLD
Definition: xlog.h:364
#define TABLESPACE_MAP
Definition: xlog.h:366
int errdetail(const char *fmt,...)
Definition: elog.c:1042
int errcode_for_file_access(void)
Definition: elog.c:721
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:697
#define WARNING
Definition: elog.h:40
#define ereport(elevel,...)
Definition: elog.h:157
#define TABLESPACE_MAP_OLD
Definition: xlog.h:367
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define BACKUP_LABEL_FILE
Definition: xlog.h:363
#define stat
Definition: win32_port.h:283

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 5074 of file xlog.c.

References XLOGbuffers, and XLOGChooseNumBuffers().

5075 {
5076  /*
5077  * -1 indicates a request for auto-tune.
5078  */
5079  if (*newval == -1)
5080  {
5081  /*
5082  * If we haven't yet changed the boot_val default of -1, just let it
5083  * be. We'll fix it when XLOGShmemSize is called.
5084  */
5085  if (XLOGbuffers == -1)
5086  return true;
5087 
5088  /* Otherwise, substitute the auto-tune value */
5090  }
5091 
5092  /*
5093  * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
5094  * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
5095  * the case, we just silently treat such values as a request for the
5096  * minimum. (We could throw an error instead, but that doesn't seem very
5097  * helpful.)
5098  */
5099  if (*newval < 4)
5100  *newval = 4;
5101 
5102  return true;
5103 }
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:5058
#define newval
int XLOGbuffers
Definition: xlog.c:94

◆ CheckForStandbyTrigger()

static bool CheckForStandbyTrigger ( void  )
static

Definition at line 13104 of file xlog.c.

References CheckPromoteSignal(), ereport, errcode_for_file_access(), errmsg(), ERROR, IsPromoteSignaled(), LocalPromoteIsTriggered, LOG, PromoteTriggerFile, RemovePromoteSignalFiles(), ResetPromoteSignaled(), SetPromoteIsTriggered(), and stat.

Referenced by ReadRecord(), recoveryApplyDelay(), recoveryPausesHere(), RecoveryRequiresIntParameter(), and WaitForWALToBecomeAvailable().

13105 {
13106  struct stat stat_buf;
13107 
13109  return true;
13110 
13112  {
13113  ereport(LOG, (errmsg("received promote request")));
13117  return true;
13118  }
13119 
13120  if (PromoteTriggerFile == NULL || strcmp(PromoteTriggerFile, "") == 0)
13121  return false;
13122 
13123  if (stat(PromoteTriggerFile, &stat_buf) == 0)
13124  {
13125  ereport(LOG,
13126  (errmsg("promote trigger file found: %s", PromoteTriggerFile)));
13127  unlink(PromoteTriggerFile);
13129  return true;
13130  }
13131  else if (errno != ENOENT)
13132  ereport(ERROR,
13134  errmsg("could not stat promote trigger file \"%s\": %m",
13135  PromoteTriggerFile)));
13136 
13137  return false;
13138 }
char * PromoteTriggerFile
Definition: xlog.c:289
bool CheckPromoteSignal(void)
Definition: xlog.c:13153
bool IsPromoteSignaled(void)
Definition: startup.c:275
static bool LocalPromoteIsTriggered
Definition: xlog.c:236
#define LOG
Definition: elog.h:26
static void SetPromoteIsTriggered(void)
Definition: xlog.c:13082
#define ERROR
Definition: elog.h:46
int errcode_for_file_access(void)
Definition: elog.c:721
void RemovePromoteSignalFiles(void)
Definition: xlog.c:13144
#define ereport(elevel,...)
Definition: elog.h:157
void ResetPromoteSignaled(void)
Definition: startup.c:281
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define stat
Definition: win32_port.h:283

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 9530 of file xlog.c.

References CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

9531 {
9537 
9538  /* Write out all dirty data in SLRUs and the main buffer pool */
9539  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
9541  CheckPointCLOG();
9546  CheckPointBuffers(flags);
9547 
9548  /* Perform all queued up fsyncs */
9549  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
9553  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
9554 
9555  /* We deliberately delay 2PC checkpointing as long as possible */
9556  CheckPointTwoPhase(checkPointRedo);
9557 }
void ProcessSyncRequests(void)
Definition: sync.c:257
void CheckPointBuffers(int flags)
Definition: bufmgr.c:2724
TimestampTz ckpt_sync_end_t
Definition: xlog.h:224
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1192
void CheckPointReplicationOrigin(void)
Definition: origin.c:557
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1922
void CheckPointCLOG(void)
Definition: clog.c:818
void CheckPointMultiXact(void)
Definition: multixact.c:2155
void CheckPointCommitTs(void)
Definition: commit_ts.c:791
CheckpointStatsData CheckpointStats
Definition: xlog.c:188
TimestampTz ckpt_write_t
Definition: xlog.h:222
void CheckPointSUBTRANS(void)
Definition: subtrans.c:284
void CheckPointRelationMap(void)
Definition: relmapper.c:546
TimestampTz ckpt_sync_t
Definition: xlog.h:223
void CheckPointPredicate(void)
Definition: predicate.c:1069
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1707
void CheckPointReplicationSlots(void)
Definition: slot.c:1360

◆ CheckPromoteSignal()

bool CheckPromoteSignal ( void  )

Definition at line 13153 of file xlog.c.

References PROMOTE_SIGNAL_FILE, and stat.

Referenced by CheckForStandbyTrigger(), and sigusr1_handler().

13154 {
13155  struct stat stat_buf;
13156 
13157  if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
13158  return true;
13159 
13160  return false;
13161 }
#define PROMOTE_SIGNAL_FILE
Definition: xlog.h:370
#define stat
Definition: win32_port.h:283

◆ CheckRecoveryConsistency()

static void CheckRecoveryConsistency ( void  )
static

Definition at line 8174 of file xlog.c.

References Assert, ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, DEBUG1, elog, ereport, errmsg(), InArchiveRecovery, XLogCtlData::info_lck, InvalidXLogRecPtr, IsUnderPostmaster, XLogCtlData::lastReplayedEndRecPtr, LocalHotStandbyActive, LOG, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, minRecoveryPoint, PMSIGNAL_BEGIN_HOT_STANDBY, reachedConsistency, SendPostmasterSignal(), XLogCtlData::SharedHotStandbyActive, SpinLockAcquire, SpinLockRelease, STANDBY_SNAPSHOT_READY, standbyState, UpdateControlFile(), XLogCheckInvalidPages(), and XLogRecPtrIsInvalid.

Referenced by ReadRecord(), and StartupXLOG().

8175 {
8176  XLogRecPtr lastReplayedEndRecPtr;
8177 
8178  /*
8179  * During crash recovery, we don't reach a consistent state until we've
8180  * replayed all the WAL.
8181  */
8183  return;
8184 
8186 
8187  /*
8188  * assume that we are called in the startup process, and hence don't need
8189  * a lock to read lastReplayedEndRecPtr
8190  */
8191  lastReplayedEndRecPtr = XLogCtl->lastReplayedEndRecPtr;
8192 
8193  /*
8194  * Have we reached the point where our base backup was completed?
8195  */
8197  ControlFile->backupEndPoint <= lastReplayedEndRecPtr)
8198  {
8199  /*
8200  * We have reached the end of base backup, as indicated by pg_control.
8201  * The data on disk is now consistent. Reset backupStartPoint and
8202  * backupEndPoint, and update minRecoveryPoint to make sure we don't
8203  * allow starting up at an earlier point even if recovery is stopped
8204  * and restarted soon after this.
8205  */
8206  elog(DEBUG1, "end of backup reached");
8207 
8208  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8209 
8210  if (ControlFile->minRecoveryPoint < lastReplayedEndRecPtr)
8211  ControlFile->minRecoveryPoint = lastReplayedEndRecPtr;
8212 
8215  ControlFile->backupEndRequired = false;
8217 
8218  LWLockRelease(ControlFileLock);
8219  }
8220 
8221  /*
8222  * Have we passed our safe starting point? Note that minRecoveryPoint is
8223  * known to be incorrectly set if ControlFile->backupEndRequired, until
8224  * the XLOG_BACKUP_END arrives to advise us of the correct
8225  * minRecoveryPoint. All we know prior to that is that we're not
8226  * consistent yet.
8227  */
8229  minRecoveryPoint <= lastReplayedEndRecPtr &&
8231  {
8232  /*
8233  * Check to see if the XLOG sequence contained any unresolved
8234  * references to uninitialized pages.
8235  */
8237 
8238  reachedConsistency = true;
8239  ereport(LOG,
8240  (errmsg("consistent recovery state reached at %X/%X",
8241  LSN_FORMAT_ARGS(lastReplayedEndRecPtr))));
8242  }
8243 
8244  /*
8245  * Have we got a valid starting snapshot that will allow queries to be
8246  * run? If so, we can tell postmaster that the database is consistent now,
8247  * enabling connections.
8248  */
8253  {
8257 
8258  LocalHotStandbyActive = true;
8259 
8261  }
8262 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
void XLogCheckInvalidPages(void)
Definition: xlogutils.c:243
bool SharedHotStandbyActive
Definition: xlog.c:653
slock_t info_lck
Definition: xlog.c:735
#define LOG
Definition: elog.h:26
bool InArchiveRecovery
Definition: xlog.c:262
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
bool backupEndRequired
Definition: pg_control.h:172
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
static bool LocalHotStandbyActive
Definition: xlog.c:230
void UpdateControlFile(void)
Definition: xlog.c:4989
HotStandbyState standbyState
Definition: xlogutils.c:55
bool IsUnderPostmaster
Definition: globals.c:112
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define SpinLockRelease(lock)
Definition: spin.h:64
static ControlFileData * ControlFile
Definition: xlog.c:746
#define ereport(elevel,...)
Definition: elog.h:157
XLogRecPtr backupEndPoint
Definition: pg_control.h:171
bool reachedConsistency
Definition: xlog.c:877
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
static XLogCtlData * XLogCtl
Definition: xlog.c:738
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define elog(elevel,...)
Definition: elog.h:232
void SendPostmasterSignal(PMSignalReason reason)
Definition: pmsignal.c:153
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
static XLogRecPtr minRecoveryPoint
Definition: xlog.c:868
XLogRecPtr lastReplayedEndRecPtr
Definition: xlog.c:713

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 6574 of file xlog.c.

References ArchiveRecoveryRequested, EnableHotStandby, ereport, errdetail(), errhint(), errmsg(), FATAL, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

6575 {
6576  /*
6577  * For archive recovery, the WAL must be generated with at least 'replica'
6578  * wal_level.
6579  */
6581  {
6582  ereport(FATAL,
6583  (errmsg("WAL was generated with wal_level=minimal, cannot continue recovering"),
6584  errdetail("This happens if you temporarily set wal_level=minimal on the server."),
6585  errhint("Use a backup taken after setting wal_level to higher than minimal.")));
6586  }
6587 
6588  /*
6589  * For Hot Standby, the WAL must be generated with 'replica' mode, and we
6590  * must have at least as many backend slots as the primary.
6591  */
6593  {
6594  /* We ignore autovacuum_max_workers when we make this test. */
6595  RecoveryRequiresIntParameter("max_connections",
6598  RecoveryRequiresIntParameter("max_worker_processes",
6601  RecoveryRequiresIntParameter("max_wal_senders",
6604  RecoveryRequiresIntParameter("max_prepared_transactions",
6607  RecoveryRequiresIntParameter("max_locks_per_transaction",
6610  }
6611 }
bool ArchiveRecoveryRequested
Definition: xlog.c:261
int max_locks_per_xact
Definition: pg_control.h:184
int errhint(const char *fmt,...)
Definition: elog.c:1156
int max_prepared_xacts
Definition: pg_control.h:183
static void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)
Definition: xlog.c:6489
int max_worker_processes
Definition: pg_control.h:181
int max_prepared_xacts
Definition: twophase.c:117
#define FATAL
Definition: elog.h:49
int errdetail(const char *fmt,...)
Definition: elog.c:1042
int max_locks_per_xact
Definition: lock.c:55
int max_wal_senders
Definition: walsender.c:121
int MaxConnections
Definition: globals.c:136
static ControlFileData * ControlFile
Definition: xlog.c:746
#define ereport(elevel,...)
Definition: elog.h:157
bool EnableHotStandby
Definition: xlog.c:98
int errmsg(const char *fmt,...)
Definition: elog.c:909
int max_worker_processes
Definition: globals.c:137

◆ checkTimeLineSwitch()

static void checkTimeLineSwitch ( XLogRecPtr  lsn,
TimeLineID  newTLI,
TimeLineID  prevTLI 
)
static

Definition at line 10228 of file xlog.c.

References ereport, errmsg(), LSN_FORMAT_ARGS, minRecoveryPoint, minRecoveryPointTLI, PANIC, ThisTimeLineID, tliInHistory(), and XLogRecPtrIsInvalid.

Referenced by StartupXLOG().

10229 {
10230  /* Check that the record agrees on what the current (old) timeline is */
10231  if (prevTLI != ThisTimeLineID)
10232  ereport(PANIC,
10233  (errmsg("unexpected previous timeline ID %u (current timeline ID %u) in checkpoint record",
10234  prevTLI, ThisTimeLineID)));
10235 
10236  /*
10237  * The new timeline better be in the list of timelines we expect to see,
10238  * according to the timeline history. It should also not decrease.
10239  */
10240  if (newTLI < ThisTimeLineID || !tliInHistory(newTLI, expectedTLEs))
10241  ereport(PANIC,
10242  (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
10243  newTLI, ThisTimeLineID)));
10244 
10245  /*
10246  * If we have not yet reached min recovery point, and we're about to
10247  * switch to a timeline greater than the timeline of the min recovery
10248  * point: trouble. After switching to the new timeline, we could not
10249  * possibly visit the min recovery point on the correct timeline anymore.
10250  * This can happen if there is a newer timeline in the archive that
10251  * branched before the timeline the min recovery point is on, and you
10252  * attempt to do PITR to the new timeline.
10253  */
10255  lsn < minRecoveryPoint &&
10256  newTLI > minRecoveryPointTLI)
10257  ereport(PANIC,
10258  (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
10259  newTLI,
10262 
10263  /* Looks good */
10264 }
static List * expectedTLEs
Definition: xlog.c:334
#define PANIC
Definition: elog.h:50
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
static TimeLineID minRecoveryPointTLI
Definition: xlog.c:869
TimeLineID ThisTimeLineID
Definition: xlog.c:194
#define ereport(elevel,...)
Definition: elog.h:157
int errmsg(const char *fmt,...)
Definition: elog.c:909
bool tliInHistory(TimeLineID tli, List *expectedTLEs)
Definition: timeline.c:534
static XLogRecPtr minRecoveryPoint
Definition: xlog.c:868

◆ checkXLogConsistency()

static void checkXLogConsistency ( XLogReaderState record)
static

Definition at line 1418 of file xlog.c.

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BufferGetPage, BufferIsValid, RelFileNode::dbNode, elog, XLogReaderState::EndRecPtr, ERROR, FATAL, LockBuffer(), XLogReaderState::max_block_id, PageGetLSN, primary_image_masked, RBM_NORMAL_NO_LOG, RelFileNode::relNode, replay_image_masked, RestoreBlockImage(), RmgrData::rm_mask, RmgrTable, RelFileNode::spcNode, UnlockReleaseBuffer(), XLogReadBufferExtended(), XLogRecBlockImageApply, XLogRecGetBlockTag(), XLogRecGetInfo, XLogRecGetRmid, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, and XLR_CHECK_CONSISTENCY.

Referenced by StartupXLOG().

1419 {
1420  RmgrId rmid = XLogRecGetRmid(record);
1421  RelFileNode rnode;
1422  ForkNumber forknum;
1423  BlockNumber blkno;
1424  int block_id;
1425 
1426  /* Records with no backup blocks have no need for consistency checks. */
1427  if (!XLogRecHasAnyBlockRefs(record))
1428  return;
1429 
1430  Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
1431 
1432  for (block_id = 0; block_id <= record->max_block_id; block_id++)
1433  {
1434  Buffer buf;
1435  Page page;
1436 
1437  if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
1438  {
1439  /*
1440  * WAL record doesn't contain a block reference with the given id.
1441  * Do nothing.
1442  */
1443  continue;
1444  }
1445 
1446  Assert(XLogRecHasBlockImage(record, block_id));
1447 
1448  if (XLogRecBlockImageApply(record, block_id))
1449  {
1450  /*
1451  * WAL record has already applied the page, so bypass the
1452  * consistency check as that would result in comparing the full
1453  * page stored in the record with itself.
1454  */
1455  continue;
1456  }
1457 
1458  /*
1459  * Read the contents from the current buffer and store it in a
1460  * temporary page.
1461  */
1462  buf = XLogReadBufferExtended(rnode, forknum, blkno,
1464  if (!BufferIsValid(buf))
1465  continue;
1466 
1468  page = BufferGetPage(buf);
1469 
1470  /*
1471  * Take a copy of the local page where WAL has been applied to have a
1472  * comparison base before masking it...
1473  */
1474  memcpy(replay_image_masked, page, BLCKSZ);
1475 
1476  /* No need for this page anymore now that a copy is in. */
1477  UnlockReleaseBuffer(buf);
1478 
1479  /*
1480  * If the block LSN is already ahead of this WAL record, we can't
1481  * expect contents to match. This can happen if recovery is
1482  * restarted.
1483  */
1484  if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
1485  continue;
1486 
1487  /*
1488  * Read the contents from the backup copy, stored in WAL record and
1489  * store it in a temporary page. There is no need to allocate a new
1490  * page here, a local buffer is fine to hold its contents and a mask
1491  * can be directly applied on it.
1492  */
1493  if (!RestoreBlockImage(record, block_id, primary_image_masked))
1494  elog(ERROR, "failed to restore block image");
1495 
1496  /*
1497  * If masking function is defined, mask both the primary and replay
1498  * images
1499  */
1500  if (RmgrTable[rmid].rm_mask != NULL)
1501  {
1502  RmgrTable[rmid].rm_mask(replay_image_masked, blkno);
1503  RmgrTable[rmid].rm_mask(primary_image_masked, blkno);
1504  }
1505 
1506  /* Time to compare the primary and replay images. */
1507  if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0)
1508  {
1509  elog(FATAL,
1510  "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
1511  rnode.spcNode, rnode.dbNode, rnode.relNode,
1512  forknum, blkno);
1513  }
1514  }
1515 }
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:325
Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode)
Definition: xlogutils.c:459
const RmgrData RmgrTable[RM_MAX_ID+1]
Definition: rmgr.c:36
uint32 BlockNumber
Definition: block.h:31
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
#define XLR_CHECK_CONSISTENCY
Definition: xlogrecord.h:80
XLogRecPtr EndRecPtr
Definition: xlogreader.h:176
static char * replay_image_masked
Definition: xlog.c:268
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3791
#define ERROR
Definition: elog.h:46
#define FATAL
Definition: elog.h:49
static char * buf
Definition: pg_test_fsync.c:68
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:315
ForkNumber
Definition: relpath.h:40
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1531
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4007
uint8 RmgrId
Definition: rmgr.h:11
#define Assert(condition)
Definition: c.h:804
static char * primary_image_masked
Definition: xlog.c:269
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:1584
#define PageGetLSN(page)
Definition: bufpage.h:366
#define elog(elevel,...)
Definition: elog.h:232
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:322
#define XLogRecBlockImageApply(decoder, block_id)
Definition: xlogreader.h:327
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:78
#define XLogRecGetRmid(decoder)
Definition: xlogreader.h:316

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3999 of file xlog.c.

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, and XLogFileName.

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

4000 {
4001  int save_errno = errno;
4002  XLogSegNo lastRemovedSegNo;
4003 
4005  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
4007 
4008  if (segno <= lastRemovedSegNo)
4009  {
4010  char filename[MAXFNAMELEN];
4011 
4012  XLogFileName(filename, tli, segno, wal_segment_size);
4013  errno = save_errno;
4014  ereport(ERROR,
4016  errmsg("requested WAL segment %s has already been removed",
4017  filename)));
4018  }
4019  errno = save_errno;
4020 }
int wal_segment_size
Definition: xlog.c:119
slock_t info_lck
Definition: xlog.c:735
XLogSegNo lastRemovedSegNo
Definition: xlog.c:597
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define ERROR
Definition: elog.h:46
uint64 XLogSegNo
Definition: xlogdefs.h:48
int errcode_for_file_access(void)
Definition: elog.c:721
#define MAXFNAMELEN
#define SpinLockRelease(lock)
Definition: spin.h:64
#define ereport(elevel,...)
Definition: elog.h:157
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static XLogCtlData * XLogCtl
Definition: xlog.c:738
static char * filename
Definition: pg_dumpall.c:92
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog 
)
static

Definition at line 5741 of file xlog.c.

References durable_rename(), ERROR, ExecuteRecoveryCommand(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, ThisTimeLineID, wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName, XLogFilePath, and XLogSegmentOffset.

Referenced by StartupXLOG().

5742 {
5743  /*
5744  * Execute the recovery_end_command, if any.
5745  */
5746  if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
5748  "recovery_end_command",
5749  true);
5750 
5751  /*
5752  * We switched to a new timeline. Clean up segments on the old timeline.
5753  *
5754  * If there are any higher-numbered segments on the old timeline, remove
5755  * them. They might contain valid WAL, but they might also be pre-allocated
5756  * files containing garbage. In any case, they are not part of the new
5757  * timeline's history so we don't need them.
5758  */
5760 
5761  /*
5762  * If the switch happened in the middle of a segment, what to do with the
5763  * last, partial segment on the old timeline? If we don't archive it, and
5764  * the server that created the WAL never archives it either (e.g. because it
5765  * was hit by a meteor), it will never make it to the archive. That's OK
5766  * from our point of view, because the new segment that we created with the
5767  * new TLI contains all the WAL from the old timeline up to the switch
5768  * point. But if you later try to do PITR to the "missing" WAL on the old
5769  * timeline, recovery won't find it in the archive. It's physically present
5770  * in the new file with new TLI, but recovery won't look there when it's
5771  * recovering to the older timeline. On the other hand, if we archive the
5772  * partial segment, and the original server on that timeline is still
5773  * running and archives the completed version of the same segment later, it
5774  * will fail. (We used to do that in 9.4 and below, and it caused such
5775  * problems).
5776  *
5777  * As a compromise, we rename the last segment with the .partial suffix, and
5778  * archive it. Archive recovery will never try to read .partial segments, so
5779  * they will normally go unused. But in the odd PITR case, the administrator
5780  * can copy them manually to the pg_wal directory (removing the suffix).
5781  * They can be useful in debugging, too.
5782  *
5783  * If a .done or .ready file already exists for the old timeline, however,
5784  * we had already determined that the segment is complete, so we can let it
5785  * be archived normally. (In particular, if it was restored from the archive
5786  * to begin with, it's expected to have a .done file).
5787  */
5788  if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
5790  {
5791  char origfname[MAXFNAMELEN];
5792  XLogSegNo endLogSegNo;
5793 
5794  XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
5795  XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
5796 
5797  if (!XLogArchiveIsReadyOrDone(origfname))
5798  {
5799  char origpath[MAXPGPATH];
5800  char partialfname[MAXFNAMELEN];
5801  char partialpath[MAXPGPATH];
5802 
5803  XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
5804  snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5805  snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5806 
5807  /*
5808  * Make sure there's no .done or .ready file for the .partial
5809  * file.
5810  */
5811  XLogArchiveCleanup(partialfname);
5812 
5813  durable_rename(origpath, partialpath, ERROR);
5814  XLogArchiveNotify(partialfname);
5815  }
5816  }
5817 }
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal)
Definition: xlogarchive.c:287
bool XLogArchiveIsReadyOrDone(const char *xlog)
Definition: xlogarchive.c:671
int wal_segment_size
Definition: xlog.c:119
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
char * recoveryEndCommand
Definition: xlog.c:273
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:719
void XLogArchiveNotify(const char *xlog)
Definition: xlogarchive.c:467
#define ERROR
Definition: elog.h:46
#define MAXPGPATH
uint64 XLogSegNo
Definition: xlogdefs.h:48
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:697
#define MAXFNAMELEN
static void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition: xlog.c:4170
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
TimeLineID ThisTimeLineID
Definition: xlog.c:194
#define XLogArchivingActive()
Definition: xlog.h:155
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define XLogFilePath(path, tli, logSegNo, wal_segsz_bytes)
#define snprintf
Definition: port.h:217

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4364 of file xlog.c.

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName, MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_stop_backup().

4365 {
4366  DIR *xldir;
4367  struct dirent *xlde;
4368  char path[MAXPGPATH + sizeof(XLOGDIR)];
4369 
4370  xldir = AllocateDir(XLOGDIR);
4371 
4372  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4373  {
4374  if (IsBackupHistoryFileName(xlde->d_name))
4375  {
4376  if (XLogArchiveCheckDone(xlde->d_name))
4377  {
4378  elog(DEBUG2, "removing WAL backup history file \"%s\"",
4379  xlde->d_name);
4380  snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4381  unlink(path);
4382  XLogArchiveCleanup(xlde->d_name);
4383  }
4384  }
4385  }
4386 
4387  FreeDir(xldir);
4388 }
Definition: dirent.h:9
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:719
Definition: dirent.c:25
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:572
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2720
#define IsBackupHistoryFileName(fname)
#define XLOGDIR
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2786
#define elog(elevel,...)
Definition: elog.h:232
char d_name[MAX_PATH]
Definition: dirent.h:15
#define snprintf
Definition: port.h:217
int FreeDir(DIR *dir)
Definition: fd.c:2838

◆ ConfirmRecoveryPaused()

static void ConfirmRecoveryPaused ( void  )
static

Definition at line 6297 of file xlog.c.

References XLogCtlData::info_lck, RECOVERY_PAUSE_REQUESTED, RECOVERY_PAUSED, XLogCtlData::recoveryPauseState, SpinLockAcquire, and SpinLockRelease.

Referenced by recoveryPausesHere(), and RecoveryRequiresIntParameter().

6298 {
6299  /* If recovery pause is requested then set it paused */
6304 }
RecoveryPauseState recoveryPauseState
Definition: xlog.c:726
slock_t info_lck
Definition: xlog.c:735
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:738

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos 
)
static

Definition at line 1522 of file xlog.c.

References Assert, XLogRecData::data, elog, GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

1524 {
1525  char *currpos;
1526  int freespace;
1527  int written;
1528  XLogRecPtr CurrPos;
1529  XLogPageHeader pagehdr;
1530 
1531  /*
1532  * Get a pointer to the right place in the right WAL buffer to start
1533  * inserting to.
1534  */
1535  CurrPos = StartPos;
1536  currpos = GetXLogBuffer(CurrPos);
1537  freespace = INSERT_FREESPACE(CurrPos);
1538 
1539  /*
1540  * there should be enough space for at least the first field (xl_tot_len)
1541  * on this page.
1542  */
1543  Assert(freespace >= sizeof(uint32));
1544 
1545  /* Copy record data */
1546  written = 0;
1547  while (rdata != NULL)
1548  {
1549  char *rdata_data = rdata->data;
1550  int rdata_len = rdata->len;
1551 
1552  while (rdata_len > freespace)
1553  {
1554  /*
1555  * Write what fits on this page, and continue on the next page.
1556  */
1557  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1558  memcpy(currpos, rdata_data, freespace);
1559  rdata_data += freespace;
1560  rdata_len -= freespace;
1561  written += freespace;
1562  CurrPos += freespace;
1563 
1564  /*
1565  * Get pointer to beginning of next page, and set the xlp_rem_len
1566  * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1567  *
1568  * It's safe to set the contrecord flag and xlp_rem_len without a
1569  * lock on the page. All the other flags were already set when the
1570  * page was initialized, in AdvanceXLInsertBuffer, and we're the
1571  * only backend that needs to set the contrecord flag.
1572  */
1573  currpos = GetXLogBuffer(CurrPos);
1574  pagehdr = (XLogPageHeader) currpos;
1575  pagehdr->xlp_rem_len = write_len - written;
1576  pagehdr->xlp_info |= XLP_FIRST_IS_CONTRECORD;
1577 
1578  /* skip over the page header */
1579  if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1580  {
1581  CurrPos += SizeOfXLogLongPHD;
1582  currpos += SizeOfXLogLongPHD;
1583  }
1584  else
1585  {
1586  CurrPos += SizeOfXLogShortPHD;
1587  currpos += SizeOfXLogShortPHD;
1588  }
1589  freespace = INSERT_FREESPACE(CurrPos);
1590  }
1591 
1592  Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1593  memcpy(currpos, rdata_data, rdata_len);
1594  currpos += rdata_len;
1595  CurrPos += rdata_len;
1596  freespace -= rdata_len;
1597  written += rdata_len;
1598 
1599  rdata = rdata->next;
1600  }
1601  Assert(written == write_len);
1602 
1603  /*
1604  * If this was an xlog-switch, it's not enough to write the switch record,
1605  * we also have to consume all the remaining space in the WAL segment. We
1606  * have already reserved that space, but we need to actually fill it.
1607  */
1608  if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1609  {
1610  /* An xlog-switch record doesn't contain any data besides the header */
1611  Assert(write_len == SizeOfXLogRecord);
1612 
1613  /* Assert that we did reserve the right amount of space */
1614  Assert(XLogSegmentOffset(EndPos, wal_segment_size) == 0);
1615 
1616  /* Use up all the remaining space on the current page */
1617  CurrPos += freespace;
1618 
1619  /*
1620  * Cause all remaining pages in the segment to be flushed, leaving the
1621  * XLog position where it should be, at the start of the next segment.
1622  * We do this one page at a time, to make sure we don't deadlock
1623  * against ourselves if wal_buffers < wal_segment_size.
1624  */
1625  while (CurrPos < EndPos)
1626  {
1627  /*
1628  * The minimal action to flush the page would be to call
1629  * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1630  * AdvanceXLInsertBuffer(...). The page would be left initialized
1631  * mostly to zeros, except for the page header (always the short
1632  * variant, as this is never a segment's first page).
1633  *
1634  * The large vistas of zeros are good for compressibility, but the
1635  * headers interrupting them every XLOG_BLCKSZ (with values that
1636  * differ from page to page) are not. The effect varies with
1637  * compression tool, but bzip2 for instance compresses about an
1638  * order of magnitude worse if those headers are left in place.
1639  *
1640  * Rather than complicating AdvanceXLInsertBuffer itself (which is
1641  * called in heavily-loaded circumstances as well as this lightly-
1642  * loaded one) with variant behavior, we just use GetXLogBuffer
1643  * (which itself calls the two methods we need) to get the pointer
1644  * and zero most of the page. Then we just zero the page header.
1645  */
1646  currpos = GetXLogBuffer(CurrPos);
1647  MemSet(currpos, 0, SizeOfXLogShortPHD);
1648 
1649  CurrPos += XLOG_BLCKSZ;
1650  }
1651  }
1652  else
1653  {
1654  /* Align the end position, so that the next record starts aligned */
1655  CurrPos = MAXALIGN64(CurrPos);
1656  }
1657 
1658  if (CurrPos != EndPos)
1659  elog(PANIC, "space reserved for WAL record does not match what was written");
1660 }
int wal_segment_size
Definition: xlog.c:119
#define MemSet(start, val, len)
Definition: c.h:1008
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define PANIC
Definition: elog.h:50
static char * GetXLogBuffer(XLogRecPtr ptr)
Definition: xlog.c:1898
#define MAXALIGN64(LEN)
Definition: c.h:782
unsigned int uint32
Definition: c.h:441
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:752
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
struct XLogRecData * next
#define elog(elevel,...)
Definition: elog.h:232
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69

◆ CreateCheckPoint()

void CreateCheckPoint ( int  flags)

Definition at line 9010 of file xlog.c.

References ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, XLogCtlInsert::CurrBytePos, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, XLogCtlInsert::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, InitXLogInsert(), Insert(), XLogCtlData::Insert, INSERT_FREESPACE, InvalidateObsoleteReplicationSlots(), InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, CheckPoint::newestCommitTsXid, VariableCacheData::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, VariableCacheData::nextXid, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, VariableCacheData::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, VariableCacheData::oldestXid, CheckPoint::oldestXidDB, VariableCacheData::oldestXidDB, PANIC, pfree(), pg_usleep(), PreallocXlogFiles(), CheckPoint::PrevTimeLineID, XLogCtlData::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), ShmemVariableCache, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, ThisTimeLineID, CheckPoint::time, ControlFileData::time, TruncateSUBTRANS(), XLogCtlData::ulsn_lck, ControlFileData::unloggedLSN, XLogCtlData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

9011 {
9012  bool shutdown;
9013  CheckPoint checkPoint;
9014  XLogRecPtr recptr;
9015  XLogSegNo _logSegNo;
9017  uint32 freespace;
9018  XLogRecPtr PriorRedoPtr;
9019  XLogRecPtr curInsert;
9020  XLogRecPtr last_important_lsn;
9021  VirtualTransactionId *vxids;
9022  int nvxids;
9023 
9024  /*
9025  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
9026  * issued at a different time.
9027  */
9029  shutdown = true;
9030  else
9031  shutdown = false;
9032 
9033  /* sanity check */
9034  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
9035  elog(ERROR, "can't create a checkpoint during recovery");
9036 
9037  /*
9038  * Initialize InitXLogInsert working areas before entering the critical
9039  * section. Normally, this is done by the first call to
9040  * RecoveryInProgress() or LocalSetXLogInsertAllowed(), but when creating
9041  * an end-of-recovery checkpoint, the LocalSetXLogInsertAllowed call is
9042  * done below in a critical section, and InitXLogInsert cannot be called
9043  * in a critical section.
9044  */
9045  InitXLogInsert();
9046 
9047  /*
9048  * Prepare to accumulate statistics.
9049  *
9050  * Note: because it is possible for log_checkpoints to change while a
9051  * checkpoint proceeds, we always accumulate stats, even if
9052  * log_checkpoints is currently off.
9053  */
9054  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
9056 
9057  /*
9058  * Use a critical section to force system panic if we have trouble.
9059  */
9061 
9062  if (shutdown)
9063  {
9064  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9066  ControlFile->time = (pg_time_t) time(NULL);
9068  LWLockRelease(ControlFileLock);
9069  }
9070 
9071  /*
9072  * Let smgr prepare for checkpoint; this has to happen before we determine
9073  * the REDO pointer. Note that smgr must not do anything that'd have to
9074  * be undone if we decide no checkpoint is needed.
9075  */
9077 
9078  /* Begin filling in the checkpoint WAL record */
9079  MemSet(&checkPoint, 0, sizeof(checkPoint));
9080  checkPoint.time = (pg_time_t) time(NULL);
9081 
9082  /*
9083  * For Hot Standby, derive the oldestActiveXid before we fix the redo
9084  * pointer. This allows us to begin accumulating changes to assemble our
9085  * starting snapshot of locks and transactions.
9086  */
9087  if (!shutdown && XLogStandbyInfoActive())
9089  else
9091 
9092  /*
9093  * Get location of last important record before acquiring insert locks (as
9094  * GetLastImportantRecPtr() also locks WAL locks).
9095  */
9096  last_important_lsn = GetLastImportantRecPtr();
9097 
9098  /*
9099  * We must block concurrent insertions while examining insert state to
9100  * determine the checkpoint REDO pointer.
9101  */
9103  curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
9104 
9105  /*
9106  * If this isn't a shutdown or forced checkpoint, and if there has been no
9107  * WAL activity requiring a checkpoint, skip it. The idea here is to
9108  * avoid inserting duplicate checkpoints when the system is idle.
9109  */
9110  if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
9111  CHECKPOINT_FORCE)) == 0)
9112  {
9113  if (last_important_lsn == ControlFile->checkPoint)
9114  {
9116  END_CRIT_SECTION();
9117  ereport(DEBUG1,
9118  (errmsg_internal("checkpoint skipped because system is idle")));
9119  return;
9120  }
9121  }
9122 
9123  /*
9124  * An end-of-recovery checkpoint is created before anyone is allowed to
9125  * write WAL. To allow us to write the checkpoint record, temporarily
9126  * enable XLogInsertAllowed. (This also ensures ThisTimeLineID is
9127  * initialized, which we need here and in AdvanceXLInsertBuffer.)
9128  */
9129  if (flags & CHECKPOINT_END_OF_RECOVERY)
9131 
9132  checkPoint.ThisTimeLineID = ThisTimeLineID;
9133  if (flags & CHECKPOINT_END_OF_RECOVERY)
9134  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
9135  else
9136  checkPoint.PrevTimeLineID = ThisTimeLineID;
9137 
9138  checkPoint.fullPageWrites = Insert->fullPageWrites;
9139 
9140  /*
9141  * Compute new REDO record ptr = location of next XLOG record.
9142  *
9143  * NB: this is NOT necessarily where the checkpoint record itself will be,
9144  * since other backends may insert more XLOG records while we're off doing
9145  * the buffer flush work. Those XLOG records are logically after the
9146  * checkpoint, even though physically before it. Got that?
9147  */
9148  freespace = INSERT_FREESPACE(curInsert);
9149  if (freespace == 0)
9150  {
9151  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
9152  curInsert += SizeOfXLogLongPHD;
9153  else
9154  curInsert += SizeOfXLogShortPHD;
9155  }
9156  checkPoint.redo = curInsert;
9157 
9158  /*
9159  * Here we update the shared RedoRecPtr for future XLogInsert calls; this
9160  * must be done while holding all the insertion locks.
9161  *
9162  * Note: if we fail to complete the checkpoint, RedoRecPtr will be left
9163  * pointing past where it really needs to point. This is okay; the only
9164  * consequence is that XLogInsert might back up whole buffers that it
9165  * didn't really need to. We can't postpone advancing RedoRecPtr because
9166  * XLogInserts that happen while we are dumping buffers must assume that
9167  * their buffer changes are not included in the checkpoint.
9168  */
9169  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
9170 
9171  /*
9172  * Now we can release the WAL insertion locks, allowing other xacts to
9173  * proceed while we are flushing disk buffers.
9174  */
9176 
9177  /* Update the info_lck-protected copy of RedoRecPtr as well */
9179  XLogCtl->RedoRecPtr = checkPoint.redo;
9181 
9182  /*
9183  * If enabled, log checkpoint start. We postpone this until now so as not
9184  * to log anything if we decided to skip the checkpoint.
9185  */
9186  if (log_checkpoints)
9187  LogCheckpointStart(flags, false);
9188 
9189  /* Update the process title */
9190  update_checkpoint_display(flags, false, false);
9191 
9192  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
9193 
9194  /*
9195  * Get the other info we need for the checkpoint record.
9196  *
9197  * We don't need to save oldestClogXid in the checkpoint, it only matters
9198  * for the short period in which clog is being truncated, and if we crash
9199  * during that we'll redo the clog truncation and fix up oldestClogXid
9200  * there.
9201  */
9202  LWLockAcquire(XidGenLock, LW_SHARED);
9203  checkPoint.nextXid = ShmemVariableCache->nextXid;
9204  checkPoint.oldestXid = ShmemVariableCache->oldestXid;
9206  LWLockRelease(XidGenLock);
9207 
9208  LWLockAcquire(CommitTsLock, LW_SHARED);
9211  LWLockRelease(CommitTsLock);
9212 
9213  LWLockAcquire(OidGenLock, LW_SHARED);
9214  checkPoint.nextOid = ShmemVariableCache->nextOid;
9215  if (!shutdown)
9216  checkPoint.nextOid += ShmemVariableCache->oidCount;
9217  LWLockRelease(OidGenLock);
9218 
9219  MultiXactGetCheckptMulti(shutdown,
9220  &checkPoint.nextMulti,
9221  &checkPoint.nextMultiOffset,
9222  &checkPoint.oldestMulti,
9223  &checkPoint.oldestMultiDB);
9224 
9225  /*
9226  * Having constructed the checkpoint record, ensure all shmem disk buffers
9227  * and commit-log buffers are flushed to disk.
9228  *
9229  * This I/O could fail for various reasons. If so, we will fail to
9230  * complete the checkpoint, but there is no reason to force a system
9231  * panic. Accordingly, exit critical section while doing it.
9232  */
9233  END_CRIT_SECTION();
9234 
9235  /*
9236  * In some cases there are groups of actions that must all occur on one
9237  * side or the other of a checkpoint record. Before flushing the
9238  * checkpoint record we must explicitly wait for any backend currently
9239  * performing those groups of actions.
9240  *
9241  * One example is end of transaction, so we must wait for any transactions
9242  * that are currently in commit critical sections. If an xact inserted
9243  * its commit record into XLOG just before the REDO point, then a crash
9244  * restart from the REDO point would not replay that record, which means
9245  * that our flushing had better include the xact's update of pg_xact. So
9246  * we wait till he's out of his commit critical section before proceeding.
9247  * See notes in RecordTransactionCommit().
9248  *
9249  * Because we've already released the insertion locks, this test is a bit
9250  * fuzzy: it is possible that we will wait for xacts we didn't really need
9251  * to wait for. But the delay should be short and it seems better to make
9252  * checkpoint take a bit longer than to hold off insertions longer than
9253  * necessary. (In fact, the whole reason we have this issue is that xact.c
9254  * does commit record XLOG insertion and clog update as two separate steps
9255  * protected by different locks, but again that seems best on grounds of
9256  * minimizing lock contention.)
9257  *
9258  * A transaction that has not yet set delayChkpt when we look cannot be at
9259  * risk, since he's not inserted his commit record yet; and one that's
9260  * already cleared it is not at risk either, since he's done fixing clog
9261  * and we will correctly flush the update below. So we cannot miss any
9262  * xacts we need to wait for.
9263  */
9264  vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
9265  if (nvxids > 0)
9266  {
9267  do
9268  {
9269  pg_usleep(10000L); /* wait for 10 msec */
9270  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
9271  }
9272  pfree(vxids);
9273 
9274  CheckPointGuts(checkPoint.redo, flags);
9275 
9276  /*
9277  * Take a snapshot of running transactions and write this to WAL. This
9278  * allows us to reconstruct the state of running transactions during
9279  * archive recovery, if required. Skip, if this info disabled.
9280  *
9281  * If we are shutting down, or Startup process is completing crash
9282  * recovery we don't need to write running xact data.
9283  */
9284  if (!shutdown && XLogStandbyInfoActive())
9286 
9288 
9289  /*
9290  * Now insert the checkpoint record into XLOG.
9291  */
9292  XLogBeginInsert();
9293  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
9294  recptr = XLogInsert(RM_XLOG_ID,
9295  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
9297 
9298  XLogFlush(recptr);
9299 
9300  /*
9301  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
9302  * overwritten at next startup. No-one should even try, this just allows
9303  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
9304  * to just temporarily disable writing until the system has exited
9305  * recovery.
9306  */
9307  if (shutdown)
9308  {
9309  if (flags & CHECKPOINT_END_OF_RECOVERY)
9310  LocalXLogInsertAllowed = -1; /* return to "check" state */
9311  else
9312  LocalXLogInsertAllowed = 0; /* never again write WAL */
9313  }
9314 
9315  /*
9316  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
9317  * = end of actual checkpoint record.
9318  */
9319  if (shutdown && checkPoint.redo != ProcLastRecPtr)
9320  ereport(PANIC,
9321  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
9322 
9323  /*
9324  * Remember the prior checkpoint's redo ptr for
9325  * UpdateCheckPointDistanceEstimate()
9326  */
9327  PriorRedoPtr = ControlFile->checkPointCopy.redo;
9328 
9329  /*
9330  * Update the control file.
9331  */
9332  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9333  if (shutdown)
9336  ControlFile->checkPointCopy = checkPoint;
9337  ControlFile->time = (pg_time_t) time(NULL);
9338  /* crash recovery should always recover to the end of WAL */
9341 
9342  /*
9343  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
9344  * unused on non-shutdown checkpoints, but seems useful to store it always
9345  * for debugging purposes.
9346  */
9350 
9352  LWLockRelease(ControlFileLock);
9353 
9354  /* Update shared-memory copy of checkpoint XID/epoch */
9356  XLogCtl->ckptFullXid = checkPoint.nextXid;
9358 
9359  /*
9360  * We are now done with critical updates; no need for system panic if we
9361  * have trouble while fooling with old log segments.
9362  */
9363  END_CRIT_SECTION();
9364 
9365  /*
9366  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
9367  */
9369 
9370  /*
9371  * Update the average distance between checkpoints if the prior checkpoint
9372  * exists.
9373  */
9374  if (PriorRedoPtr != InvalidXLogRecPtr)
9376 
9377  /*
9378  * Delete old log files, those no longer needed for last checkpoint to
9379  * prevent the disk holding the xlog from growing full.
9380  */
9382  KeepLogSeg(recptr, &_logSegNo);
9383  if (InvalidateObsoleteReplicationSlots(_logSegNo))
9384  {
9385  /*
9386  * Some slots have been invalidated; recalculate the old-segment
9387  * horizon, starting again from RedoRecPtr.
9388  */
9390  KeepLogSeg(recptr, &_logSegNo);
9391  }
9392  _logSegNo--;
9393  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr);
9394 
9395  /*
9396  * Make more log segments if needed. (Do this after recycling old log
9397  * segments, since that may supply some of the needed files.)
9398  */
9399  if (!shutdown)
9400  PreallocXlogFiles(recptr);
9401 
9402  /*
9403  * Truncate pg_subtrans if possible. We can throw away all data before
9404  * the oldest XMIN of any running transaction. No future transaction will
9405  * attempt to reference any pg_subtrans entry older than that (see Asserts
9406  * in subtrans.c). During recovery, though, we mustn't do this because
9407  * StartupSUBTRANS hasn't been called yet.
9408  */
9409  if (!RecoveryInProgress())
9411 
9412  /* Real work is done; log and update stats. */
9413  LogCheckpointEnd(false);
9414 
9415  /* Reset the process title */
9416  update_checkpoint_display(flags, false, true);
9417 
9418  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
9419  NBuffers,
9423 }
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:8698
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:8915
static int LocalXLogInsertAllowed
Definition: xlog.c:248
bool log_checkpoints
Definition: xlog.c:106
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1740
int wal_segment_size
Definition: xlog.c:119
pg_time_t time
Definition: pg_control.h:130
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:68
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
uint32 oidCount
Definition: transam.h:215
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:2009
XLogRecPtr unloggedLSN
Definition: xlog.c:600
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:352
TransactionId oldestActiveXid
Definition: pg_control.h:63
void InitXLogInsert(void)
Definition: xloginsert.c:1251
TimestampTz ckpt_start_t
Definition: xlog.h:221
slock_t info_lck
Definition: xlog.c:735
#define END_CRIT_SECTION()
Definition: miscadmin.h:149
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:3043
MultiXactId oldestMulti
Definition: pg_control.h:49
TimeLineID PrevTimeLineID
Definition: xlog.c:641
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
#define START_CRIT_SECTION()
Definition: miscadmin.h:147
int ckpt_segs_recycled
Definition: xlog.h:231
TransactionId oldestXid
Definition: transam.h:222
#define MemSet(start, val, len)
Definition: c.h:1008
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2133
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:9530
CheckPoint checkPointCopy
Definition: pg_control.h:133
XLogCtlInsert Insert
Definition: xlog.c:588
TransactionId oldestXid
Definition: pg_control.h:47
bool RecoveryInProgress(void)
Definition: xlog.c:8328
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:338
FullTransactionId nextXid
Definition: transam.h:220
pg_time_t time
Definition: pg_control.h:51
#define PANIC
Definition: elog.h:50
bool fullPageWrites
Definition: xlog.c:563
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2887
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:8953
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
MultiXactOffset nextMultiOffset
Definition: pg_control.h:46
void UpdateControlFile(void)
Definition: xlog.c:4989
TransactionId oldestCommitTsXid
Definition: pg_control.h:52
void pfree(void *pointer)
Definition: mcxt.c:1169
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1220
#define ERROR
Definition: elog.h:46
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:8823
bool InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
Definition: slot.c:1317
static XLogRecPtr RedoRecPtr
Definition: xlog.c:366
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:67
XLogRecPtr unloggedLSN
Definition: pg_control.h:135
static void PreallocXlogFiles(XLogRecPtr endptr)
Definition: xlog.c:3962
uint64 XLogSegNo
Definition: xlogdefs.h:48
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:197
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define InvalidTransactionId
Definition: transam.h:31
uint64 CurrBytePos
Definition: xlog.c:538
unsigned int uint32
Definition: c.h:441
XLogRecPtr RedoRecPtr
Definition: xlog.c:592
int ckpt_segs_removed
Definition: xlog.h:230
#define CHECKPOINT_FORCE
Definition: xlog.h:200
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:752
TransactionId oldestCommitTsXid
Definition: transam.h:232
static void Insert(File file)
Definition: fd.c:1302
int ckpt_bufs_written
Definition: xlog.h:227
static void LocalSetXLogInsertAllowed(void)
Definition: xlog.c:8472
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:340
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:434
TransactionId newestCommitTsXid
Definition: pg_control.h:54
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:9963
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
Oid oldestMultiDB
Definition: pg_control.h:50
FullTransactionId ckptFullXid
Definition: xlog.c:593
#define XLogStandbyInfoActive()
Definition: xlog.h:180
static ControlFileData * ControlFile
Definition: xlog.c:746
TimeLineID ThisTimeLineID
Definition: xlog.c:194
Oid nextOid
Definition: pg_control.h:44
#define ereport(elevel,...)
Definition: elog.h:157
bool fullPageWrites
Definition: pg_control.h:42
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996
uint64 XLogRecPtr
Definition: xlogdefs.h:21
Oid oldestXidDB
Definition: pg_control.h:48
TransactionId newestCommitTsXid
Definition: transam.h:233
CheckpointStatsData CheckpointStats
Definition: xlog.c:188
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52
MultiXactId nextMulti
Definition: pg_control.h:45
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1711
static XLogCtlData * XLogCtl
Definition: xlog.c:738
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
int ckpt_segs_added
Definition: xlog.h:229
slock_t ulsn_lck
Definition: xlog.c:601
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define elog(elevel,...)
Definition: elog.h:232
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr)
Definition: xlog.c:4097
void SyncPostCheckpoint(void)
Definition: sync.c:195
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2882
int NBuffers
Definition: globals.c:135
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:3087
void XLogBeginInsert(void)
Definition: xloginsert.c:135
XLogRecPtr RedoRecPtr
Definition: xlog.c:561
XLogRecPtr checkPoint
Definition: pg_control.h:131
XLogRecPtr redo
Definition: pg_control.h:37
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:8791
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:196
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
void SyncPreCheckpoint(void)
Definition: sync.c:180
FullTransactionId nextXid
Definition: pg_control.h:43
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 9434 of file xlog.c.

References elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, xl_end_of_recovery::PrevTimeLineID, XLogCtlData::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, ThisTimeLineID, xl_end_of_recovery::ThisTimeLineID, ControlFileData::time, UpdateControlFile(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

9435 {
9436  xl_end_of_recovery xlrec;
9437  XLogRecPtr recptr;
9438 
9439  /* sanity check */
9440  if (!RecoveryInProgress())
9441  elog(ERROR, "can only be used to end recovery");
9442 
9443  xlrec.end_time = GetCurrentTimestamp();
9444 
9449 
9451 
9453 
9454  XLogBeginInsert();
9455  XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
9456  recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
9457 
9458  XLogFlush(recptr);
9459 
9460  /*
9461  * Update the control file so that crash recovery can follow the timeline
9462  * changes to this point.
9463  */
9464  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9465  ControlFile->time = (pg_time_t) time(NULL);
9466  ControlFile->minRecoveryPoint = recptr;
9469  LWLockRelease(ControlFileLock);
9470 
9471  END_CRIT_SECTION();
9472 
9473  LocalXLogInsertAllowed = -1; /* return to "check" state */
9474 }
static int LocalXLogInsertAllowed
Definition: xlog.c:248
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1740
pg_time_t time
Definition: pg_control.h:130
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TimeLineID PrevTimeLineID
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
#define END_CRIT_SECTION()
Definition: miscadmin.h:149
TimeLineID PrevTimeLineID
Definition: xlog.c:641
#define START_CRIT_SECTION()
Definition: miscadmin.h:147
bool RecoveryInProgress(void)
Definition: xlog.c:8328
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:76
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2887
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
void UpdateControlFile(void)
Definition: xlog.c:4989
#define ERROR
Definition: elog.h:46
static void LocalSetXLogInsertAllowed(void)
Definition: xlog.c:8472
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:340
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:434
static ControlFileData * ControlFile
Definition: xlog.c:746
TimeLineID ThisTimeLineID
Definition: xlog.c:194
TimestampTz end_time
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1711
static XLogCtlData * XLogCtl
Definition: xlog.c:738
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
TimeLineID ThisTimeLineID
#define elog(elevel,...)
Definition: elog.h:232
void XLogBeginInsert(void)
Definition: xloginsert.c:135
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn)
static

Definition at line 9497 of file xlog.c.

References elog, END_CRIT_SECTION, ERROR, GetCurrentTimestamp(), xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, RecoveryInProgress(), START_CRIT_SECTION, XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by StartupXLOG().

9498 {
9500  XLogRecPtr recptr;
9501 
9502  /* sanity check */
9503  if (!RecoveryInProgress())
9504  elog(ERROR, "can only be used at end of recovery");
9505 
9506  xlrec.overwritten_lsn = aborted_lsn;
9508 
9510 
9511  XLogBeginInsert();
9512  XLogRegisterData((char *) &xlrec, sizeof(xl_overwrite_contrecord));
9513 
9514  recptr = XLogInsert(RM_XLOG_ID, XLOG_OVERWRITE_CONTRECORD);
9515 
9516  XLogFlush(recptr);
9517 
9518  END_CRIT_SECTION();
9519 
9520  return recptr;
9521 }
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
#define END_CRIT_SECTION()
Definition: miscadmin.h:149
#define START_CRIT_SECTION()
Definition: miscadmin.h:147
bool RecoveryInProgress(void)
Definition: xlog.c:8328
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2887
#define ERROR
Definition: elog.h:46
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:80
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:340
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:434
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define elog(elevel,...)
Definition: elog.h:232
void XLogBeginInsert(void)
Definition: xloginsert.c:135

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 9611 of file xlog.c.

References archiveCleanupCommand, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStatsData::ckpt_start_t, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, XLogCtlData::Insert, InvalidateObsoleteReplicationSlots(), InvalidXLogRecPtr, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, minRecoveryPoint, ControlFileData::minRecoveryPointTLI, minRecoveryPointTLI, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, ThisTimeLineID, ControlFileData::time, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

9612 {
9613  XLogRecPtr lastCheckPointRecPtr;
9614  XLogRecPtr lastCheckPointEndPtr;
9615  CheckPoint lastCheckPoint;
9616  XLogRecPtr PriorRedoPtr;
9617  XLogRecPtr receivePtr;
9618  XLogRecPtr replayPtr;
9619  TimeLineID replayTLI;
9620  XLogRecPtr endptr;
9621  XLogSegNo _logSegNo;
9622  TimestampTz xtime;
9623 
9624  /* Get a local copy of the last safe checkpoint record. */
9626  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
9627  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
9628  lastCheckPoint = XLogCtl->lastCheckPoint;
9630 
9631  /*
9632  * Check that we're still in recovery mode. It's ok if we exit recovery
9633  * mode after this check, the restart point is valid anyway.
9634  */
9635  if (!RecoveryInProgress())
9636  {
9637  ereport(DEBUG2,
9638  (errmsg_internal("skipping restartpoint, recovery has already ended")));
9639  return false;
9640  }
9641 
9642  /*
9643  * If the last checkpoint record we've replayed is already our last
9644  * restartpoint, we can't perform a new restart point. We still update
9645  * minRecoveryPoint in that case, so that if this is a shutdown restart
9646  * point, we won't start up earlier than before. That's not strictly
9647  * necessary, but when hot standby is enabled, it would be rather weird if
9648  * the database opened up for read-only connections at a point-in-time
9649  * before the last shutdown. Such time travel is still possible in case of
9650  * immediate shutdown, though.
9651  *
9652  * We don't explicitly advance minRecoveryPoint when we do create a
9653  * restartpoint. It's assumed that flushing the buffers will do that as a
9654  * side-effect.
9655  */
9656  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
9657  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
9658  {
9659  ereport(DEBUG2,
9660  (errmsg_internal("skipping restartpoint, already performed at %X/%X",
9661  LSN_FORMAT_ARGS(lastCheckPoint.redo))));
9662 
9664  if (flags & CHECKPOINT_IS_SHUTDOWN)
9665  {
9666  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9668  ControlFile->time = (pg_time_t) time(NULL);
9670  LWLockRelease(ControlFileLock);
9671  }
9672  return false;
9673  }
9674 
9675  /*
9676  * Update the shared RedoRecPtr so that the startup process can calculate
9677  * the number of segments replayed since last restartpoint, and request a
9678  * restartpoint if it exceeds CheckPointSegments.
9679  *
9680  * Like in CreateCheckPoint(), hold off insertions to update it, although
9681  * during recovery this is just pro forma, because no WAL insertions are
9682  * happening.
9683  */
9685  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
9687 
9688  /* Also update the info_lck-protected copy */
9690  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
9692 
9693  /*
9694  * Prepare to accumulate statistics.
9695  *
9696  * Note: because it is possible for log_checkpoints to change while a
9697  * checkpoint proceeds, we always accumulate stats, even if
9698  * log_checkpoints is currently off.
9699  */
9700  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
9702 
9703  if (log_checkpoints)
9704  LogCheckpointStart(flags, true);
9705 
9706  /* Update the process title */
9707  update_checkpoint_display(flags, true, false);
9708 
9709  CheckPointGuts(lastCheckPoint.redo, flags);
9710 
9711  /*
9712  * Remember the prior checkpoint's redo ptr for
9713  * UpdateCheckPointDistanceEstimate()
9714  */
9715  PriorRedoPtr = ControlFile->checkPointCopy.redo;
9716 
9717  /*
9718  * Update pg_control, using current time. Check that it still shows
9719  * DB_IN_ARCHIVE_RECOVERY state and an older checkpoint, else do nothing;
9720  * this is a quick hack to make sure nothing really bad happens if somehow
9721  * we get here after the end-of-recovery checkpoint.
9722  */
9723  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9725  ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
9726  {
9727  ControlFile->checkPoint = lastCheckPointRecPtr;
9728  ControlFile->checkPointCopy = lastCheckPoint;
9729  ControlFile->time = (pg_time_t) time(NULL);
9730 
9731  /*
9732  * Ensure minRecoveryPoint is past the checkpoint record. Normally,
9733  * this will have happened already while writing out dirty buffers,
9734  * but not necessarily - e.g. because no buffers were dirtied. We do
9735  * this because a non-exclusive base backup uses minRecoveryPoint to
9736  * determine which WAL files must be included in the backup, and the
9737  * file (or files) containing the checkpoint record must be included,
9738  * at a minimum. Note that for an ordinary restart of recovery there's
9739  * no value in having the minimum recovery point any earlier than this
9740  * anyway, because redo will begin just after the checkpoint record.
9741  */
9742  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
9743  {
9744  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
9746 
9747  /* update local copy */
9750  }
9751  if (flags & CHECKPOINT_IS_SHUTDOWN)
9754  }
9755  LWLockRelease(ControlFileLock);
9756 
9757  /*
9758  * Update the average distance between checkpoints/restartpoints if the
9759  * prior checkpoint exists.
9760  */
9761  if (PriorRedoPtr != InvalidXLogRecPtr)
9763 
9764  /*
9765  * Delete old log files, those no longer needed for last restartpoint to
9766  * prevent the disk holding the xlog from growing full.
9767  */
9769 
9770  /*
9771  * Retreat _logSegNo using the current end of xlog replayed or received,
9772  * whichever is later.
9773  */
9774  receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
9775  replayPtr = GetXLogReplayRecPtr(&replayTLI);
9776  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
9777  KeepLogSeg(endptr, &_logSegNo);
9778  if (InvalidateObsoleteReplicationSlots(_logSegNo))
9779  {
9780  /*
9781  * Some slots have been invalidated; recalculate the old-segment
9782  * horizon, starting again from RedoRecPtr.
9783  */
9785  KeepLogSeg(endptr, &_logSegNo);
9786  }
9787  _logSegNo--;
9788 
9789  /*
9790  * Try to recycle segments on a useful timeline. If we've been promoted
9791  * since the beginning of this restartpoint, use the new timeline chosen
9792  * at end of recovery (RecoveryInProgress() sets ThisTimeLineID in that
9793  * case). If we're still in recovery, use the timeline we're currently
9794  * replaying.
9795  *
9796  * There is no guarantee that the WAL segments will be useful on the
9797  * current timeline; if recovery proceeds to a new timeline right after
9798  * this, the pre-allocated WAL segments on this timeline will not be used,
9799  * and will go wasted until recycled on the next restartpoint. We'll live
9800  * with that.
9801  */
9802  if (RecoveryInProgress())
9803  ThisTimeLineID = replayTLI;
9804 
9805  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr);
9806 
9807  /*
9808  * Make more log segments if needed. (Do this after recycling old log
9809  * segments, since that may supply some of the needed files.)
9810  */
9811  PreallocXlogFiles(endptr);
9812 
9813  /*
9814  * ThisTimeLineID is normally not set when we're still in recovery.
9815  * However, recycling/preallocating segments above needed ThisTimeLineID
9816  * to determine which timeline to install the segments on. Reset it now,
9817  * to restore the normal state of affairs for debugging purposes.
9818  */
9819  if (RecoveryInProgress())
9820  ThisTimeLineID = 0;
9821 
9822  /*
9823  * Truncate pg_subtrans if possible. We can throw away all data before
9824  * the oldest XMIN of any running transaction. No future transaction will
9825  * attempt to reference any pg_subtrans entry older than that (see Asserts
9826  * in subtrans.c). When hot standby is disabled, though, we mustn't do
9827  * this because StartupSUBTRANS hasn't been called yet.
9828  */
9829  if (EnableHotStandby)
9831 
9832  /* Real work is done; log and update stats. */
9833  LogCheckpointEnd(true);
9834 
9835  /* Reset the process title */
9836  update_checkpoint_display(flags, true, true);
9837 
9838  xtime = GetLatestXTime();
9840  (errmsg("recovery restart point at %X/%X",
9841  LSN_FORMAT_ARGS(lastCheckPoint.redo)),
9842  xtime ? errdetail("Last completed transaction was at log time %s.",
9843  timestamptz_to_str(xtime)) : 0));
9844 
9845  /*
9846  * Finally, execute archive_cleanup_command, if any.
9847  */
9848  if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
9850  "archive_cleanup_command",
9851  false);
9852 
9853  return true;
9854 }
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:8915
bool log_checkpoints
Definition: xlog.c:106
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal)
Definition: xlogarchive.c:287
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1740
int wal_segment_size
Definition: xlog.c:119
pg_time_t time
Definition: pg_control.h:130
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
int64 TimestampTz
Definition: timestamp.h:39
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2803
TimestampTz ckpt_start_t
Definition: xlog.h:221
slock_t info_lck
Definition: xlog.c:735
#define MemSet(start, val, len)
Definition: c.h:1008
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:9530
TimestampTz GetLatestXTime(void)
Definition: xlog.c:6426
CheckPoint checkPointCopy
Definition: pg_control.h:133
XLogCtlInsert Insert
Definition: xlog.c:588
#define LOG
Definition: elog.h:26
bool RecoveryInProgress(void)
Definition: xlog.c:8328
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:338
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:703
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:8953
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
void UpdateControlFile(void)
Definition: xlog.c:4989
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:8823
bool InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
Definition: slot.c:1317
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
Definition: xlog.c:11927
#define DEBUG2
Definition: elog.h:24
static XLogRecPtr RedoRecPtr
Definition: xlog.c:366
static void PreallocXlogFiles(XLogRecPtr endptr)
Definition: xlog.c:3962
uint64 XLogSegNo
Definition: xlogdefs.h:48
int errdetail(const char *fmt,...)
Definition: elog.c:1042
XLogRecPtr RedoRecPtr
Definition: xlog.c:592
CheckPoint lastCheckPoint
Definition: xlog.c:705
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define SpinLockRelease(lock)
Definition: spin.h:64
static TimeLineID minRecoveryPointTLI
Definition: xlog.c:869
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:9963
static ControlFileData * ControlFile
Definition: xlog.c:746
TimeLineID ThisTimeLineID
Definition: xlog.c:194
#define ereport(elevel,...)
Definition: elog.h:157
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996
uint64 XLogRecPtr
Definition: xlogdefs.h:21
CheckpointStatsData CheckpointStats
Definition: xlog.c:188
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1711
static XLogCtlData * XLogCtl
Definition: xlog.c:738
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
bool EnableHotStandby
Definition: xlog.c:98
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
int errmsg(const char *fmt,...)
Definition: elog.c:909
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr)
Definition: xlog.c:4097
XLogRecPtr RedoRecPtr
Definition: xlog.c:561
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:704
XLogRecPtr checkPoint
Definition: pg_control.h:131
XLogRecPtr redo
Definition: pg_control.h:37
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:8791
char * archiveCleanupCommand
Definition: xlog.c:274
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:196
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
static XLogRecPtr minRecoveryPoint
Definition: xlog.c:868
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1774
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)

◆ DataChecksumsEnabled()

bool DataChecksumsEnabled ( void  )

Definition at line 5018 of file xlog.c.

References Assert, and ControlFileData::data_checksum_version.

Referenced by PageIsVerifiedExtended(), PageSetChecksumCopy(), PageSetChecksumInplace(), pg_stat_get_db_checksum_failures(), pg_stat_get_db_checksum_last_failure(), ReadControlFile(), and sendFile().

5019 {
5020  Assert(ControlFile != NULL);
5021  return (ControlFile->data_checksum_version > 0);
5022 }
uint32 data_checksum_version
Definition: pg_control.h:222
static ControlFileData * ControlFile
Definition: xlog.c:746
#define Assert(condition)
Definition: c.h:804

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 11879 of file xlog.c.

References Assert, DatumGetBool, ereport, errmsg(), EXCLUSIVE_BACKUP_NONE, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, XLogCtlData::Insert, XLogCtlInsert::nonExclusiveBackups, SESSION_BACKUP_NON_EXCLUSIVE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), and WARNING.

Referenced by perform_base_backup(), and register_persistent_abort_backup_handler().

11880 {
11881  bool emit_warning = DatumGetBool(arg);
11882 
11883  /*
11884  * Quick exit if session is not keeping around a non-exclusive backup
11885  * already started.
11886  */
11888  return;
11889 
11893 
11896  {
11897  XLogCtl->Insert.forcePageWrites = false;
11898  }
11900 
11901  if (emit_warning)
11902  ereport(WARNING,
11903  (errmsg("aborting backup due to backend exiting before pg_stop_backup was called")));
11904 }
static void WALInsertLockRelease(void)
Definition: xlog.c:1740
static SessionBackupState sessionBackupState
Definition: xlog.c:522
XLogCtlInsert Insert
Definition: xlog.c:588
bool forcePageWrites
Definition: xlog.c:562
#define DatumGetBool(X)
Definition: postgres.h:437
#define WARNING
Definition: elog.h:40
int nonExclusiveBackups
Definition: xlog.c:574
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:573
#define ereport(elevel,...)
Definition: elog.h:157
#define Assert(condition)
Definition: c.h:804
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1711
static XLogCtlData * XLogCtl
Definition: xlog.c:738
int errmsg(const char *fmt,...)
Definition: elog.c:909
void * arg

◆ do_pg_start_backup()

XLogRecPtr do_pg_start_backup ( const char *  backupidstr,
bool  fast,
TimeLineID starttli_p,
StringInfo  labelfile,
List **  tablespaces,
StringInfo  tblspcmapfile 
)

Definition at line 10946 of file xlog.c.

References AllocateDir(), AllocateFile(), appendStringInfo(), appendStringInfoChar(), BACKUP_LABEL_FILE, backup_started_in_recovery, BoolGetDatum, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, dirent::d_name, StringInfoData::data, DataDir, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_NONE, EXCLUSIVE_BACKUP_STARTING, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, FreeDir(), FreeFile(), CheckPoint::fullPageWrites, XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, StringInfoData::len, log_timezone, LSN_FORMAT_ARGS, LW_SHARED, LWLockAcquire(), LWLockRelease(), makeStringInfo(), MAXFNAMELEN, MAXPGPATH, XLogCtlInsert::nonExclusiveBackups, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, pg_fsync(), pg_localtime(), pg_start_backup_callback(), pg_strftime(), pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, SESSION_BACKUP_EXCLUSIVE, SESSION_BACKUP_NON_EXCLUSIVE, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, stat, TABLESPACE_MAP, CheckPoint::ThisTimeLineID, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLByteToSeg, XLogFileName, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_start_backup().

10949 {
10950  bool exclusive = (labelfile == NULL);
10951  bool backup_started_in_recovery = false;
10952  XLogRecPtr checkpointloc;
10953  XLogRecPtr startpoint;
10954  TimeLineID starttli;
10955  pg_time_t stamp_time;
10956  char strfbuf[128];
10957  char xlogfilename[MAXFNAMELEN];
10958  XLogSegNo _logSegNo;
10959  struct stat stat_buf;
10960  FILE *fp;
10961 
10962  backup_started_in_recovery = RecoveryInProgress();
10963 
10964  /*
10965  * Currently only non-exclusive backup can be taken during recovery.
10966  */
10967  if (backup_started_in_recovery && exclusive)
10968  ereport(ERROR,
10969  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10970  errmsg("recovery is in progress"),
10971  errhint("WAL control functions cannot be executed during recovery.")));
10972 
10973  /*
10974  * During recovery, we don't need to check WAL level. Because, if WAL
10975  * level is not sufficient, it's impossible to get here during recovery.
10976  */
10977  if (!backup_started_in_recovery && !XLogIsNeeded())
10978  ereport(ERROR,
10979  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10980  errmsg("WAL level not sufficient for making an online backup"),
10981  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
10982 
10983  if (strlen(backupidstr) > MAXPGPATH)
10984  ereport(ERROR,
10985  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
10986  errmsg("backup label too long (max %d bytes)",
10987  MAXPGPATH)));
10988 
10989  /*
10990  * Mark backup active in shared memory. We must do full-page WAL writes
10991  * during an on-line backup even if not doing so at other times, because
10992  * it's quite possible for the backup dump to obtain a "torn" (partially
10993  * written) copy of a database page if it reads the page concurrently with
10994  * our write to the same page. This can be fixed as long as the first
10995  * write to the page in the WAL sequence is a full-page write. Hence, we
10996  * turn on forcePageWrites and then force a CHECKPOINT, to ensure there
10997  * are no dirty pages in shared memory that might get dumped while the
10998  * backup is in progress without having a corresponding WAL record. (Once
10999  * the backup is complete, we need not force full-page writes anymore,
11000  * since we expect that any pages not modified during the backup interval
11001  * must have been correctly captured by the backup.)
11002  *
11003  * Note that forcePageWrites has no effect during an online backup from
11004  * the standby.
11005  *
11006  * We must hold all the insertion locks to change the value of
11007  * forcePageWrites, to ensure adequate interlocking against
11008  * XLogInsertRecord().
11009  */
11011  if (exclusive)
11012  {
11013  /*
11014  * At first, mark that we're now starting an exclusive backup, to
11015  * ensure that there are no other sessions currently running
11016  * pg_start_backup() or pg_stop_backup().
11017  */
11019  {
11021  ereport(ERROR,
11022  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11023  errmsg("a backup is already in progress"),
11024  errhint("Run pg_stop_backup() and try again.")));
11025  }
11027  }
11028  else
11030  XLogCtl->Insert.forcePageWrites = true;
11032 
11033  /* Ensure we release forcePageWrites if fail below */
11035  {
11036  bool gotUniqueStartpoint = false;
11037  DIR *tblspcdir;
11038  struct dirent *de;
11039  tablespaceinfo *ti;
11040  int datadirpathlen;
11041 
11042  /*
11043  * Force an XLOG file switch before the checkpoint, to ensure that the
11044  * WAL segment the checkpoint is written to doesn't contain pages with
11045  * old timeline IDs. That would otherwise happen if you called
11046  * pg_start_backup() right after restoring from a PITR archive: the
11047  * first WAL segment containing the startup checkpoint has pages in
11048  * the beginning with the old timeline ID. That can cause trouble at
11049  * recovery: we won't have a history file covering the old timeline if
11050  * pg_wal directory was not included in the base backup and the WAL
11051  * archive was cleared too before starting the backup.
11052  *
11053  * This also ensures that we have emitted a WAL page header that has
11054  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
11055  * Therefore, if a WAL archiver (such as pglesslog) is trying to
11056  * compress out removable backup blocks, it won't remove any that
11057  * occur after this point.
11058  *
11059  * During recovery, we skip forcing XLOG file switch, which means that
11060  * the backup taken during recovery is not available for the special
11061  * recovery case described above.
11062  */
11063  if (!backup_started_in_recovery)
11064  RequestXLogSwitch(false);
11065 
11066  do
11067  {
11068  bool checkpointfpw;
11069 
11070  /*
11071  * Force a CHECKPOINT. Aside from being necessary to prevent torn
11072  * page problems, this guarantees that two successive backup runs
11073  * will have different checkpoint positions and hence different
11074  * history file names, even if nothing happened in between.
11075  *
11076  * During recovery, establish a restartpoint if possible. We use
11077  * the last restartpoint as the backup starting checkpoint. This
11078  * means that two successive backup runs can have same checkpoint
11079  * positions.
11080  *
11081  * Since the fact that we are executing do_pg_start_backup()
11082  * during recovery means that checkpointer is running, we can use
11083  * RequestCheckpoint() to establish a restartpoint.
11084  *
11085  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
11086  * passing fast = true). Otherwise this can take awhile.
11087  */
11089  (fast ? CHECKPOINT_IMMEDIATE : 0));
11090 
11091  /*
11092  * Now we need to fetch the checkpoint record location, and also
11093  * its REDO pointer. The oldest point in WAL that would be needed
11094  * to restore starting from the checkpoint is precisely the REDO
11095  * pointer.
11096  */
11097  LWLockAcquire(ControlFileLock, LW_SHARED);
11098  checkpointloc = ControlFile->checkPoint;
11099  startpoint = ControlFile->checkPointCopy.redo;
11101  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
11102  LWLockRelease(ControlFileLock);
11103 
11104  if (backup_started_in_recovery)
11105  {
11106  XLogRecPtr recptr;
11107 
11108  /*
11109  * Check to see if all WAL replayed during online backup
11110  * (i.e., since last restartpoint used as backup starting
11111  * checkpoint) contain full-page writes.
11112  */
11114  recptr = XLogCtl->lastFpwDisableRecPtr;
11116 
11117  if (!checkpointfpw || startpoint <= recptr)
11118  ereport(ERROR,
11119  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11120  errmsg("WAL generated with full_page_writes=off was replayed "
11121  "since last restartpoint"),
11122  errhint("This means that the backup being taken on the standby "
11123  "is corrupt and should not be used. "
11124  "Enable full_page_writes and run CHECKPOINT on the primary, "
11125  "and then try an online backup again.")));
11126 
11127  /*
11128  * During recovery, since we don't use the end-of-backup WAL
11129  * record and don't write the backup history file, the
11130  * starting WAL location doesn't need to be unique. This means
11131  * that two base backups started at the same time might use
11132  * the same checkpoint as starting locations.
11133  */
11134  gotUniqueStartpoint = true;
11135  }
11136 
11137  /*
11138  * If two base backups are started at the same time (in WAL sender
11139  * processes), we need to make sure that they use different
11140  * checkpoints as starting locations, because we use the starting
11141  * WAL location as a unique identifier for the base backup in the
11142  * end-of-backup WAL record and when we write the backup history
11143  * file. Perhaps it would be better generate a separate unique ID
11144  * for each backup instead of forcing another checkpoint, but
11145  * taking a checkpoint right after another is not that expensive
11146  * either because only few buffers have been dirtied yet.
11147  */
11149  if (XLogCtl->Insert.lastBackupStart < startpoint)
11150  {
11151  XLogCtl->Insert.lastBackupStart = startpoint;
11152  gotUniqueStartpoint = true;
11153  }
11155  } while (!gotUniqueStartpoint);
11156 
11157  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
11158  XLogFileName(xlogfilename, starttli, _logSegNo, wal_segment_size);
11159 
11160  /*
11161  * Construct tablespace_map file. If caller isn't interested in this,
11162  * we make a local StringInfo.
11163  */
11164  if (tblspcmapfile == NULL)
11165  tblspcmapfile = makeStringInfo();
11166 
11167  datadirpathlen = strlen(DataDir);
11168 
11169  /* Collect information about all tablespaces */
11170  tblspcdir = AllocateDir("pg_tblspc");
11171  while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
11172  {
11173  char fullpath[MAXPGPATH + 10];
11174  char linkpath[MAXPGPATH];
11175  char *relpath = NULL;
11176  int rllen;
11177  StringInfoData escapedpath;
11178  char *s;
11179 
11180  /* Skip anything that doesn't look like a tablespace */
11181  if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
11182  continue;
11183 
11184  snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
11185 
11186 #if defined(HAVE_READLINK) || defined(WIN32)
11187  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
11188  if (rllen < 0)
11189  {
11190  ereport(WARNING,
11191  (errmsg("could not read symbolic link \"%s\": %m",
11192  fullpath)));
11193  continue;
11194  }
11195  else if (rllen >= sizeof(linkpath))
11196  {
11197  ereport(WARNING,
11198  (errmsg("symbolic link \"%s\" target is too long",
11199  fullpath)));
11200  continue;
11201  }
11202  linkpath[rllen] = '\0';
11203 
11204  /*
11205  * Build a backslash-escaped version of the link path to include
11206  * in the tablespace map file.
11207  */
11208  initStringInfo(&escapedpath);
11209  for (s = linkpath; *s; s++)
11210  {
11211  if (*s == '\n' || *s == '\r' || *s == '\\')
11212  appendStringInfoChar(&escapedpath, '\\');
11213  appendStringInfoChar(&escapedpath, *s);
11214  }
11215 
11216  /*
11217  * Relpath holds the relative path of the tablespace directory
11218  * when it's located within PGDATA, or NULL if it's located
11219  * elsewhere.
11220  */
11221  if (rllen > datadirpathlen &&
11222  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
11223  IS_DIR_SEP(linkpath[datadirpathlen]))
11224  relpath = linkpath + datadirpathlen + 1;
11225 
11226  ti = palloc(sizeof(tablespaceinfo));
11227  ti->oid = pstrdup(de->d_name);
11228  ti->path = pstrdup(linkpath);
11229  ti->rpath = relpath ? pstrdup(relpath) : NULL;
11230  ti->size = -1;
11231 
11232  if (tablespaces)
11233  *tablespaces = lappend(*tablespaces, ti);
11234 
11235  appendStringInfo(tblspcmapfile, "%s %s\n",
11236  ti->oid, escapedpath.data);
11237 
11238  pfree(escapedpath.data);
11239 #else
11240 
11241  /*
11242  * If the platform does not have symbolic links, it should not be
11243  * possible to have tablespaces - clearly somebody else created
11244  * them. Warn about it and ignore.
11245  */
11246  ereport(WARNING,
11247  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
11248  errmsg("tablespaces are not supported on this platform")));
11249 #endif
11250  }
11251  FreeDir(tblspcdir);
11252 
11253  /*
11254  * Construct backup label file. If caller isn't interested in this,
11255  * we make a local StringInfo.
11256  */
11257  if (labelfile == NULL)
11258  labelfile = makeStringInfo();
11259 
11260  /* Use the log timezone here, not the session timezone */
11261  stamp_time = (pg_time_t) time(NULL);
11262  pg_strftime(strfbuf, sizeof(strfbuf),
11263  "%Y-%m-%d %H:%M:%S %Z",
11264  pg_localtime(&stamp_time, log_timezone));
11265  appendStringInfo(labelfile, "START WAL LOCATION: %X/%X (file %s)\n",
11266  LSN_FORMAT_ARGS(startpoint), xlogfilename);
11267  appendStringInfo(labelfile, "CHECKPOINT LOCATION: %X/%X\n",
11268  LSN_FORMAT_ARGS(checkpointloc));
11269  appendStringInfo(labelfile, "BACKUP METHOD: %s\n",
11270  exclusive ? "pg_start_backup" : "streamed");
11271  appendStringInfo(labelfile, "BACKUP FROM: %s\n",
11272  backup_started_in_recovery ? "standby" : "primary");
11273  appendStringInfo(labelfile, "START TIME: %s\n", strfbuf);
11274  appendStringInfo(labelfile, "LABEL: %s\n", backupidstr);
11275  appendStringInfo(labelfile, "START TIMELINE: %u\n", starttli);
11276 
11277  /*
11278  * Okay, write the file, or return its contents to caller.
11279  */
11280  if (exclusive)
11281  {
11282  /*
11283  * Check for existing backup label --- implies a backup is already
11284  * running. (XXX given that we checked exclusiveBackupState
11285  * above, maybe it would be OK to just unlink any such label
11286  * file?)
11287  */
11288  if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
11289  {
11290  if (errno != ENOENT)
11291  ereport(ERROR,
11293  errmsg("could not stat file \"%s\": %m",
11294  BACKUP_LABEL_FILE)));
11295  }
11296  else
11297  ereport(ERROR,
11298  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11299  errmsg("a backup is already in progress"),
11300  errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
11301  BACKUP_LABEL_FILE)));
11302 
11303  fp = AllocateFile(BACKUP_LABEL_FILE, "w");
11304 
11305  if (!fp)
11306  ereport(ERROR,
11308  errmsg("could not create file \"%s\": %m",
11309  BACKUP_LABEL_FILE)));
11310  if (fwrite(labelfile->data, labelfile->len, 1, fp) != 1 ||
11311  fflush(fp) != 0 ||
11312  pg_fsync(fileno(fp)) != 0 ||
11313  ferror(fp) ||
11314  FreeFile(fp))
11315  ereport(ERROR,
11317  errmsg("could not write file \"%s\": %m",
11318  BACKUP_LABEL_FILE)));
11319  /* Allocated locally for exclusive backups, so free separately */
11320  pfree(labelfile->data);
11321  pfree(labelfile);
11322 
11323  /* Write backup tablespace_map file. */
11324  if (tblspcmapfile->len > 0)
11325  {
11326  if (stat(TABLESPACE_MAP, &stat_buf) != 0)
11327  {
11328  if (errno != ENOENT)
11329  ereport(ERROR,
11331  errmsg("could not stat file \"%s\": %m",
11332  TABLESPACE_MAP)));
11333  }
11334  else
11335  ereport(ERROR,
11336  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11337  errmsg("a backup is already in progress"),
11338  errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
11339  TABLESPACE_MAP)));
11340 
11341  fp = AllocateFile(TABLESPACE_MAP, "w");
11342 
11343  if (!fp)
11344  ereport(ERROR,
11346  errmsg("could not create file \"%s\": %m",
11347  TABLESPACE_MAP)));
11348  if (fwrite(tblspcmapfile->data, tblspcmapfile->len, 1, fp) != 1 ||
11349  fflush(fp) != 0 ||
11350  pg_fsync(fileno(fp)) != 0 ||
11351  ferror(fp) ||
11352  FreeFile(fp))
11353  ereport(ERROR,
11355  errmsg("could not write file \"%s\": %m",
11356  TABLESPACE_MAP)));
11357  }
11358 
11359  /* Allocated locally for exclusive backups, so free separately */
11360  pfree(tblspcmapfile->data);
11361  pfree(tblspcmapfile);
11362  }
11363  }
11365 
11366  /*
11367  * Mark that start phase has correctly finished for an exclusive backup.
11368  * Session-level locks are updated as well to reflect that state.
11369  *
11370  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating backup
11371  * counters and session-level lock. Otherwise they can be updated
11372  * inconsistently, and which might cause do_pg_abort_backup() to fail.
11373  */
11374  if (exclusive)
11375  {
11378 
11379  /* Set session-level lock */
11382  }
11383  else
11385 
11386  /*
11387  * We're done. As a convenience, return the starting WAL location.
11388  */
11389  if (starttli_p)
11390  *starttli_p = starttli;
11391  return startpoint;
11392 }
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:128
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:10056
int errhint(const char *fmt,...)
Definition: elog.c:1156
uint32 TimeLineID
Definition: xlogdefs.h:59
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1740
int wal_segment_size
Definition: xlog.c:119
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:733
static SessionBackupState sessionBackupState
Definition: xlog.c:522
XLogRecPtr lastBackupStart
Definition: xlog.c:575
char * pstrdup(const char *in)
Definition: mcxt.c:1299
#define XLogIsNeeded()
Definition: xlog.h:166
char * rpath
Definition: basebackup.h:32
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
slock_t info_lck
Definition: xlog.c:735
int errcode(int sqlerrcode)
Definition: elog.c:698
CheckPoint checkPointCopy
Definition: pg_control.h:133
XLogCtlInsert Insert
Definition: xlog.c:588
bool RecoveryInProgress(void)
Definition: xlog.c:8328
static bool backup_started_in_recovery
Definition: basebackup.c:88
Definition: dirent.h:9
#define IS_DIR_SEP(ch)
Definition: port.h:84
pg_tz * log_timezone
Definition: pgtz.c:31
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define TABLESPACE_MAP
Definition: xlog.h:366
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
void pfree(void *pointer)
Definition: mcxt.c:1169
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool forcePageWrites
Definition: xlog.c:562
Definition: dirent.c:25
#define ERROR
Definition: elog.h:46
#define MAXPGPATH
uint64 XLogSegNo
Definition: xlogdefs.h:48
#define readlink(path, buf, size)
Definition: win32_port.h:236
int errcode_for_file_access(void)
Definition: elog.c:721
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2459
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2720
#define CHECKPOINT_FORCE
Definition: xlog.h:200
List * lappend(List *list, void *datum)
Definition: list.c:336
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define WARNING
Definition: elog.h:40
int nonExclusiveBackups
Definition: xlog.c:574
#define MAXFNAMELEN
#define SpinLockRelease(lock)
Definition: spin.h:64
static void pg_start_backup_callback(int code, Datum arg)
Definition: xlog.c:11396
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:573
uintptr_t Datum
Definition: postgres.h:411
static ControlFileData * ControlFile
Definition: xlog.c:746
#define BoolGetDatum(X)
Definition: postgres.h:446
#define ereport(elevel,...)
Definition: elog.h:157
bool fullPageWrites
Definition: pg_control.h:42
#define CHECKPOINT_WAIT
Definition: xlog.h:204
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2786
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1711
static XLogCtlData * XLogCtl
Definition: xlog.c:738
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1342
int FreeFile(FILE *file)
Definition: fd.c:2658
void * palloc(Size size)
Definition: mcxt.c:1062
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:199
#define relpath(rnode, forknum)
Definition: relpath.h:87
char * DataDir
Definition: globals.c:65
#define BACKUP_LABEL_FILE
Definition: xlog.h:363
int pg_fsync(int fd)
Definition: fd.c:357
char d_name[MAX_PATH]
Definition: dirent.h:15
#define snprintf
Definition: port.h:217
XLogRecPtr checkPoint
Definition: pg_control.h:131
XLogRecPtr redo
Definition: pg_control.h:37
int FreeDir(DIR *dir)
Definition: fd.c:2838
#define stat
Definition: win32_port.h:283
void RequestCheckpoint(int flags)
Definition: checkpointer.c:920
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)

◆ do_pg_stop_backup()

XLogRecPtr do_pg_stop_backup ( char *  labelfile,
bool  waitforarchive,
TimeLineID stoptli_p 
)

Definition at line 11464 of file xlog.c.

References AllocateFile(), Assert, BACKUP_LABEL_FILE, backup_started_in_recovery, BackupHistoryFileName, BackupHistoryFilePath, BoolGetDatum, CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), DEBUG1, durable_unlink(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_NONE, EXCLUSIVE_BACKUP_STOPPING, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::lastFpwDisableRecPtr, log_timezone, LSN_FORMAT_ARGS, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, XLogCtlInsert::nonExclusiveBackups, NOTICE, palloc(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, pg_localtime(), pg_stop_backup_callback(), pg_strftime(), RecoveryInProgress(), remaining, RequestXLogSwitch(), ResetLatch(), SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, stat::st_size, stat, TABLESPACE_MAP, ThisTimeLineID, WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogFileName, XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), pg_stop_backup(), and pg_stop_backup_v2().

11465 {
11466  bool exclusive = (labelfile == NULL);
11467  bool backup_started_in_recovery = false;
11468  XLogRecPtr startpoint;
11469  XLogRecPtr stoppoint;
11470  TimeLineID stoptli;
11471  pg_time_t stamp_time;
11472  char strfbuf[128];
11473  char histfilepath[MAXPGPATH];
11474  char startxlogfilename[MAXFNAMELEN];
11475  char stopxlogfilename[MAXFNAMELEN];
11476  char lastxlogfilename[MAXFNAMELEN];
11477  char histfilename[MAXFNAMELEN];
11478  char backupfrom[20];
11479  XLogSegNo _logSegNo;
11480  FILE *lfp;
11481  FILE *fp;
11482  char ch;
11483  int seconds_before_warning;
11484  int waits = 0;
11485  bool reported_waiting = false;
11486  char *remaining;
11487  char *ptr;
11488  uint32 hi,
11489  lo;
11490 
11491  backup_started_in_recovery = RecoveryInProgress();
11492 
11493  /*
11494  * Currently only non-exclusive backup can be taken during recovery.
11495  */
11496  if (backup_started_in_recovery && exclusive)
11497  ereport(ERROR,
11498  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11499  errmsg("recovery is in progress"),
11500  errhint("WAL control functions cannot be executed during recovery.")));
11501 
11502  /*
11503  * During recovery, we don't need to check WAL level. Because, if WAL
11504  * level is not sufficient, it's impossible to get here during recovery.
11505  */
11506  if (!backup_started_in_recovery && !XLogIsNeeded())
11507  ereport(ERROR,
11508  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11509  errmsg("WAL level not sufficient for making an online backup"),
11510  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
11511 
11512  if (exclusive)
11513  {
11514  /*
11515  * At first, mark that we're now stopping an exclusive backup, to
11516  * ensure that there are no other sessions currently running
11517  * pg_start_backup() or pg_stop_backup().
11518  */
11521  {
11523  ereport(ERROR,
11524  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11525  errmsg("exclusive backup not in progress")));
11526  }
11529 
11530  /*
11531  * Remove backup_label. In case of failure, the state for an exclusive
11532  * backup is switched back to in-progress.
11533  */
11535  {
11536  /*
11537  * Read the existing label file into memory.
11538  */
11539  struct stat statbuf;
11540  int r;
11541 
11542  if (stat(BACKUP_LABEL_FILE, &statbuf))
11543  {
11544  /* should not happen per the upper checks */
11545  if (errno != ENOENT)
11546  ereport(ERROR,
11548  errmsg("could not stat file \"%s\": %m",
11549  BACKUP_LABEL_FILE)));
11550  ereport(ERROR,
11551  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11552  errmsg("a backup is not in progress")));
11553  }
11554 
11555  lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
11556  if (!lfp)
11557  {
11558  ereport(ERROR,
11560  errmsg("could not read file \"%s\": %m",
11561  BACKUP_LABEL_FILE)));
11562  }
11563  labelfile = palloc(statbuf.st_size + 1);
11564  r = fread(labelfile, statbuf.st_size, 1, lfp);
11565  labelfile[statbuf.st_size] = '\0';
11566 
11567  /*
11568  * Close and remove the backup label file
11569  */
11570  if (r != 1 || ferror(lfp) || FreeFile(lfp))
11571  ereport(ERROR,
11573  errmsg("could not read file \"%s\": %m",
11574  BACKUP_LABEL_FILE)));
11576 
11577  /*
11578  * Remove tablespace_map file if present, it is created only if
11579  * there are tablespaces.
11580  */
11582  }
11584  }
11585 
11586  /*
11587  * OK to update backup counters, forcePageWrites and session-level lock.
11588  *
11589  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them.
11590  * Otherwise they can be updated inconsistently, and which might cause
11591  * do_pg_abort_backup() to fail.
11592  */
11594  if (exclusive)
11595  {
11597  }
11598  else
11599  {
11600  /*
11601  * The user-visible pg_start/stop_backup() functions that operate on
11602  * exclusive backups can be called at any time, but for non-exclusive
11603  * backups, it is expected that each do_pg_start_backup() call is
11604  * matched by exactly one do_pg_stop_backup() call.
11605  */
11608  }
11609 
11612  {
11613  XLogCtl->Insert.forcePageWrites = false;
11614  }
11615 
11616  /*
11617  * Clean up session-level lock.
11618  *
11619  * You might think that WALInsertLockRelease() can be called before
11620  * cleaning up session-level lock because session-level lock doesn't need
11621  * to be protected with WAL insertion lock. But since
11622  * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
11623  * cleaned up before it.
11624  */
11626 
11628 
11629  /*
11630  * Read and parse the START WAL LOCATION line (this code is pretty crude,
11631  * but we are not expecting any variability in the file format).
11632  */
11633  if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c",
11634  &hi, &lo, startxlogfilename,
11635  &ch) != 4 || ch != '\n')
11636  ereport(ERROR,
11637  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11638  errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
11639  startpoint = ((uint64) hi) << 32 | lo;
11640  remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */
11641 
11642  /*
11643  * Parse the BACKUP FROM line. If we are taking an online backup from the
11644  * standby, we confirm that the standby has not been promoted during the
11645  * backup.
11646  */
11647  ptr = strstr(remaining, "BACKUP FROM:");
11648  if (!ptr || sscanf(ptr, "BACKUP FROM: %19s\n", backupfrom) != 1)
11649  ereport(ERROR,
11650  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11651  errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
11652  if (strcmp(backupfrom, "standby") == 0 && !backup_started_in_recovery)
11653  ereport(ERROR,
11654  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11655  errmsg("the standby was promoted during online backup"),
11656  errhint("This means that the backup being taken is corrupt "
11657  "and should not be used. "
11658  "Try taking another online backup.")));
11659 
11660  /*
11661  * During recovery, we don't write an end-of-backup record. We assume that
11662  * pg_control was backed up last and its minimum recovery point can be
11663  * available as the backup end location. Since we don't have an
11664  * end-of-backup record, we use the pg_control value to check whether
11665  * we've reached the end of backup when starting recovery from this
11666  * backup. We have no way of checking if pg_control wasn't backed up last
11667  * however.
11668  *
11669  * We don't force a switch to new WAL file but it is still possible to
11670  * wait for all the required files to be archived if waitforarchive is
11671  * true. This is okay if we use the backup to start a standby and fetch
11672  * the missing WAL using streaming replication. But in the case of an
11673  * archive recovery, a user should set waitforarchive to true and wait for
11674  * them to be archived to ensure that all the required files are
11675  * available.
11676  *
11677  * We return the current minimum recovery point as the backup end
11678  * location. Note that it can be greater than the exact backup end
11679  * location if the minimum recovery point is updated after the backup of
11680  * pg_control. This is harmless for current uses.
11681  *
11682  * XXX currently a backup history file is for informational and debug
11683  * purposes only. It's not essential for an online backup. Furthermore,
11684  * even if it's created, it will not be archived during recovery because
11685  * an archiver is not invoked. So it doesn't seem worthwhile to write a
11686  * backup history file during recovery.
11687  */
11688  if (backup_started_in_recovery)
11689  {
11690  XLogRecPtr recptr;
11691 
11692  /*
11693  * Check to see if all WAL replayed during online backup contain
11694  * full-page writes.
11695  */
11697  recptr = XLogCtl->lastFpwDisableRecPtr;
11699 
11700  if (startpoint <= recptr)
11701  ereport(ERROR,
11702  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
11703  errmsg("WAL generated with full_page_writes=off was replayed "
11704  "during online backup"),
11705  errhint("This means that the backup being taken on the standby "
11706  "is corrupt and should not be used. "
11707  "Enable full_page_writes and run CHECKPOINT on the primary, "
11708  "and then try an online backup again.")));
11709 
11710 
11711  LWLockAcquire(ControlFileLock, LW_SHARED);
11712  stoppoint = ControlFile->minRecoveryPoint;
11713  stoptli = ControlFile->minRecoveryPointTLI;
11714  LWLockRelease(ControlFileLock);
11715  }
11716  else
11717  {
11718  /*
11719  * Write the backup-end xlog record
11720  */
11721  XLogBeginInsert();
11722  XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
11723  stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
11724  stoptli = ThisTimeLineID;
11725 
11726  /*
11727  * Force a switch to a new xlog segment file, so that the backup is
11728  * valid as soon as archiver moves out the current segment file.
11729  */
11730  RequestXLogSwitch(false);
11731 
11732  XLByteToPrevSeg(stoppoint, _logSegNo, wal_segment_size);
11733  XLogFileName(stopxlogfilename, stoptli, _logSegNo, wal_segment_size);
11734 
11735  /* Use the log timezone here, not the session timezone */
11736  stamp_time = (pg_time_t) time(NULL);
11737  pg_strftime(strfbuf, sizeof(strfbuf),
11738  "%Y-%m-%d %H:%M:%S %Z",
11739  pg_localtime(&stamp_time, log_timezone));
11740 
11741  /*
11742  * Write the backup history file
11743  */
11744  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
11745  BackupHistoryFilePath(histfilepath, stoptli, _logSegNo,
11746  startpoint, wal_segment_size);
11747  fp = AllocateFile(histfilepath, "w");
11748  if (!fp)
11749  ereport(ERROR,
11751  errmsg("could not create file \"%s\": %m",
11752  histfilepath)));
11753  fprintf(fp, "START WAL LOCATION: %X/%X (file %s)\n",
11754  LSN_FORMAT_ARGS(startpoint), startxlogfilename);
11755  fprintf(fp, "STOP WAL LOCATION: %X/%X (file %s)\n",
11756  LSN_FORMAT_ARGS(stoppoint), stopxlogfilename);
11757 
11758  /*
11759  * Transfer remaining lines including label and start timeline to
11760  * history file.
11761  */
11762  fprintf(fp, "%s", remaining);
11763  fprintf(fp, "STOP TIME: %s\n", strfbuf);
11764  fprintf(fp, "STOP TIMELINE: %u\n", stoptli);
11765  if (fflush(fp) || ferror(fp) || FreeFile(fp))
11766  ereport(ERROR,
11768  errmsg("could not write file \"%s\": %m",
11769  histfilepath)));
11770 
11771  /*
11772  * Clean out any no-longer-needed history files. As a side effect,
11773  * this will post a .ready file for the newly created history file,
11774  * notifying the archiver that history file may be archived
11775  * immediately.
11776  */
11778  }
11779 
11780  /*
11781  * If archiving is enabled, wait for all the required WAL files to be
11782  * archived before returning. If archiving isn't enabled, the required WAL
11783  * needs to be transported via streaming replication (hopefully with
11784  * wal_keep_size set high enough), or some more exotic mechanism like
11785  * polling and copying files from pg_wal with script. We have no knowledge
11786  * of those mechanisms, so it's up to the user to ensure that he gets all
11787  * the required WAL.
11788  *
11789  * We wait until both the last WAL file filled during backup and the
11790  * history file have been archived, and assume that the alphabetic sorting
11791  * property of the WAL files ensures any earlier WAL files are safely
11792  * archived as well.
11793  *
11794  * We wait forever, since archive_command is supposed to work and we
11795  * assume the admin wanted his backup to work completely. If you don't
11796  * wish to wait, then either waitforarchive should be passed in as false,
11797  * or you can set statement_timeout. Also, some notices are issued to
11798  * clue in anyone who might be doing this interactively.
11799  */
11800 
11801  if (waitforarchive &&
11802  ((!backup_started_in_recovery && XLogArchivingActive()) ||
11803  (backup_started_in_recovery && XLogArchivingAlways())))
11804  {
11805  XLByteToPrevSeg(stoppoint, _logSegNo, wal_segment_size);
11806  XLogFileName(lastxlogfilename, stoptli, _logSegNo, wal_segment_size);
11807 
11808  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
11809  BackupHistoryFileName(histfilename, stoptli, _logSegNo,
11810  startpoint, wal_segment_size);
11811 
11812  seconds_before_warning = 60;
11813  waits = 0;
11814 
11815  while (XLogArchiveIsBusy(lastxlogfilename) ||
11816  XLogArchiveIsBusy(histfilename))
11817  {
11819 
11820  if (!reported_waiting && waits > 5)
11821  {
11822  ereport(NOTICE,
11823  (errmsg("base backup done, waiting for required WAL segments to be archived")));
11824  reported_waiting = true;
11825  }
11826 
11827  (void) WaitLatch(MyLatch,
11829  1000L,
11832 
11833  if (++waits >= seconds_before_warning)
11834  {
11835  seconds_before_warning *= 2; /* This wraps in >10 years... */
11836  ereport(WARNING,
11837  (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
11838  waits),
11839  errhint("Check that your archive_command is executing properly. "
11840  "You can safely cancel this backup, "
11841  "but the database backup will not be usable without all the WAL segments.")));
11842  }
11843  }
11844 
11845  ereport(NOTICE,
11846  (errmsg("all required WAL segments have been archived")));
11847  }
11848  else if (waitforarchive)
11849  ereport(NOTICE,
11850  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
11851 
11852  /*
11853  * We're done. As a convenience, return the ending WAL location.
11854  */
11855  if (stoptli_p)
11856  *stoptli_p = stoptli;
11857  return stoppoint;
11858 }
int remaining
Definition: informix.c:667
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:128
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:10056
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:1156
uint32 TimeLineID
Definition: xlogdefs.h:59
int64 pg_time_t
Definition: pgtime.h:23