PostgreSQL Source Code  git master
xlog.h File Reference
#include "access/xlogbackup.h"
#include "access/xlogdefs.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
Include dependency graph for xlog.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  CheckpointStatsData
 

Macros

#define XLogArchivingActive()    (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
 
#define XLogArchivingAlways()    (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
 
#define XLogIsNeeded()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogHintBitIsNeeded()   (DataChecksumsEnabled() || wal_log_hints)
 
#define XLogStandbyInfoActive()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogLogicalInfoActive()   (wal_level >= WAL_LEVEL_LOGICAL)
 
#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */
 
#define CHECKPOINT_END_OF_RECOVERY
 
#define CHECKPOINT_IMMEDIATE   0x0004 /* Do it without delays */
 
#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */
 
#define CHECKPOINT_FLUSH_ALL
 
#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */
 
#define CHECKPOINT_REQUESTED   0x0040 /* Checkpoint request has been made */
 
#define CHECKPOINT_CAUSE_XLOG   0x0080 /* XLOG consumption */
 
#define CHECKPOINT_CAUSE_TIME   0x0100 /* Elapsed time */
 
#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */
 
#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */
 
#define RECOVERY_SIGNAL_FILE   "recovery.signal"
 
#define STANDBY_SIGNAL_FILE   "standby.signal"
 
#define BACKUP_LABEL_FILE   "backup_label"
 
#define BACKUP_LABEL_OLD   "backup_label.old"
 
#define TABLESPACE_MAP   "tablespace_map"
 
#define TABLESPACE_MAP_OLD   "tablespace_map.old"
 
#define PROMOTE_SIGNAL_FILE   "promote"
 

Typedefs

typedef enum ArchiveMode ArchiveMode
 
typedef enum WalLevel WalLevel
 
typedef enum WalCompression WalCompression
 
typedef enum RecoveryState RecoveryState
 
typedef struct CheckpointStatsData CheckpointStatsData
 
typedef enum WALAvailability WALAvailability
 
typedef enum SessionBackupState SessionBackupState
 

Enumerations

enum  WalSyncMethod {
  WAL_SYNC_METHOD_FSYNC = 0 , WAL_SYNC_METHOD_FDATASYNC , WAL_SYNC_METHOD_OPEN , WAL_SYNC_METHOD_FSYNC_WRITETHROUGH ,
  WAL_SYNC_METHOD_OPEN_DSYNC
}
 
enum  ArchiveMode { ARCHIVE_MODE_OFF = 0 , ARCHIVE_MODE_ON , ARCHIVE_MODE_ALWAYS }
 
enum  WalLevel { WAL_LEVEL_MINIMAL = 0 , WAL_LEVEL_REPLICA , WAL_LEVEL_LOGICAL }
 
enum  WalCompression { WAL_COMPRESSION_NONE = 0 , WAL_COMPRESSION_PGLZ , WAL_COMPRESSION_LZ4 , WAL_COMPRESSION_ZSTD }
 
enum  RecoveryState { RECOVERY_STATE_CRASH = 0 , RECOVERY_STATE_ARCHIVE , RECOVERY_STATE_DONE }
 
enum  WALAvailability {
  WALAVAIL_INVALID_LSN , WALAVAIL_RESERVED , WALAVAIL_EXTENDED , WALAVAIL_UNRESERVED ,
  WALAVAIL_REMOVED
}
 
enum  SessionBackupState { SESSION_BACKUP_NONE , SESSION_BACKUP_RUNNING }
 

Functions

XLogRecPtr XLogInsertRecord (struct XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void xlog_redo (struct XLogReaderState *record)
 
void xlog_desc (StringInfo buf, struct XLogReaderState *record)
 
const char * xlog_identify (uint8 info)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
void InitializeWalConsistencyChecking (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
void StartupXLOG (void)
 
void ShutdownXLOG (int code, Datum arg)
 
void CreateCheckPoint (int flags)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetRedoRecPtr (void)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
void SetWalWriterSleeping (bool sleeping)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void XLogShutdownWalRcv (void)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
SessionBackupState get_backup_status (void)
 

Variables

PGDLLIMPORT int wal_sync_method
 
PGDLLIMPORT XLogRecPtr ProcLastRecPtr
 
PGDLLIMPORT XLogRecPtr XactLastRecEnd
 
PGDLLIMPORT XLogRecPtr XactLastCommitEnd
 
PGDLLIMPORT int wal_segment_size
 
PGDLLIMPORT int min_wal_size_mb
 
PGDLLIMPORT int max_wal_size_mb
 
PGDLLIMPORT int wal_keep_size_mb
 
PGDLLIMPORT int max_slot_wal_keep_size_mb
 
PGDLLIMPORT int XLOGbuffers
 
PGDLLIMPORT int XLogArchiveTimeout
 
PGDLLIMPORT int wal_retrieve_retry_interval
 
PGDLLIMPORT char * XLogArchiveCommand
 
PGDLLIMPORT bool EnableHotStandby
 
PGDLLIMPORT bool fullPageWrites
 
PGDLLIMPORT bool wal_log_hints
 
PGDLLIMPORT int wal_compression
 
PGDLLIMPORT bool wal_init_zero
 
PGDLLIMPORT bool wal_recycle
 
PGDLLIMPORT boolwal_consistency_checking
 
PGDLLIMPORT char * wal_consistency_checking_string
 
PGDLLIMPORT bool log_checkpoints
 
PGDLLIMPORT int CommitDelay
 
PGDLLIMPORT int CommitSiblings
 
PGDLLIMPORT bool track_wal_io_timing
 
PGDLLIMPORT int wal_decode_buffer_size
 
PGDLLIMPORT int CheckPointSegments
 
PGDLLIMPORT int XLogArchiveMode
 
PGDLLIMPORT int wal_level
 
PGDLLIMPORT CheckpointStatsData CheckpointStats
 

Macro Definition Documentation

◆ BACKUP_LABEL_FILE

#define BACKUP_LABEL_FILE   "backup_label"

Definition at line 301 of file xlog.h.

◆ BACKUP_LABEL_OLD

#define BACKUP_LABEL_OLD   "backup_label.old"

Definition at line 302 of file xlog.h.

◆ CHECKPOINT_CAUSE_TIME

#define CHECKPOINT_CAUSE_TIME   0x0100 /* Elapsed time */

Definition at line 149 of file xlog.h.

◆ CHECKPOINT_CAUSE_XLOG

#define CHECKPOINT_CAUSE_XLOG   0x0080 /* XLOG consumption */

Definition at line 148 of file xlog.h.

◆ CHECKPOINT_END_OF_RECOVERY

#define CHECKPOINT_END_OF_RECOVERY
Value:
0x0002 /* Like shutdown checkpoint, but
* issued at end of WAL recovery */

Definition at line 140 of file xlog.h.

◆ CHECKPOINT_FLUSH_ALL

#define CHECKPOINT_FLUSH_ALL
Value:
0x0010 /* Flush all pages, including those
* belonging to unlogged tables */

Definition at line 143 of file xlog.h.

◆ CHECKPOINT_FORCE

#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */

Definition at line 142 of file xlog.h.

◆ CHECKPOINT_IMMEDIATE

#define CHECKPOINT_IMMEDIATE   0x0004 /* Do it without delays */

Definition at line 141 of file xlog.h.

◆ CHECKPOINT_IS_SHUTDOWN

#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */

Definition at line 139 of file xlog.h.

◆ CHECKPOINT_REQUESTED

#define CHECKPOINT_REQUESTED   0x0040 /* Checkpoint request has been made */

Definition at line 146 of file xlog.h.

◆ CHECKPOINT_WAIT

#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */

Definition at line 145 of file xlog.h.

◆ PROMOTE_SIGNAL_FILE

#define PROMOTE_SIGNAL_FILE   "promote"

Definition at line 308 of file xlog.h.

◆ RECOVERY_SIGNAL_FILE

#define RECOVERY_SIGNAL_FILE   "recovery.signal"

Definition at line 299 of file xlog.h.

◆ STANDBY_SIGNAL_FILE

#define STANDBY_SIGNAL_FILE   "standby.signal"

Definition at line 300 of file xlog.h.

◆ TABLESPACE_MAP

#define TABLESPACE_MAP   "tablespace_map"

Definition at line 304 of file xlog.h.

◆ TABLESPACE_MAP_OLD

#define TABLESPACE_MAP_OLD   "tablespace_map.old"

Definition at line 305 of file xlog.h.

◆ XLOG_INCLUDE_ORIGIN

#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */

Definition at line 154 of file xlog.h.

◆ XLOG_MARK_UNIMPORTANT

#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */

Definition at line 155 of file xlog.h.

◆ XLogArchivingActive

Definition at line 99 of file xlog.h.

◆ XLogArchivingAlways

Definition at line 102 of file xlog.h.

◆ XLogHintBitIsNeeded

#define XLogHintBitIsNeeded ( )    (DataChecksumsEnabled() || wal_log_hints)

Definition at line 120 of file xlog.h.

◆ XLogIsNeeded

#define XLogIsNeeded ( )    (wal_level >= WAL_LEVEL_REPLICA)

Definition at line 109 of file xlog.h.

◆ XLogLogicalInfoActive

#define XLogLogicalInfoActive ( )    (wal_level >= WAL_LEVEL_LOGICAL)

Definition at line 126 of file xlog.h.

◆ XLogStandbyInfoActive

#define XLogStandbyInfoActive ( )    (wal_level >= WAL_LEVEL_REPLICA)

Definition at line 123 of file xlog.h.

Typedef Documentation

◆ ArchiveMode

typedef enum ArchiveMode ArchiveMode

◆ CheckpointStatsData

◆ RecoveryState

◆ SessionBackupState

◆ WALAvailability

◆ WalCompression

◆ WalLevel

typedef enum WalLevel WalLevel

Enumeration Type Documentation

◆ ArchiveMode

Enumerator
ARCHIVE_MODE_OFF 
ARCHIVE_MODE_ON 
ARCHIVE_MODE_ALWAYS 

Definition at line 63 of file xlog.h.

64 {
65  ARCHIVE_MODE_OFF = 0, /* disabled */
66  ARCHIVE_MODE_ON, /* enabled while server is running normally */
67  ARCHIVE_MODE_ALWAYS, /* enabled always (even during recovery) */
68 } ArchiveMode;
ArchiveMode
Definition: xlog.h:64
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:67
@ ARCHIVE_MODE_OFF
Definition: xlog.h:65
@ ARCHIVE_MODE_ON
Definition: xlog.h:66

◆ RecoveryState

Enumerator
RECOVERY_STATE_CRASH 
RECOVERY_STATE_ARCHIVE 
RECOVERY_STATE_DONE 

Definition at line 89 of file xlog.h.

90 {
91  RECOVERY_STATE_CRASH = 0, /* crash recovery */
92  RECOVERY_STATE_ARCHIVE, /* archive recovery */
93  RECOVERY_STATE_DONE, /* currently in production */
RecoveryState
Definition: xlog.h:90
@ RECOVERY_STATE_CRASH
Definition: xlog.h:91
@ RECOVERY_STATE_DONE
Definition: xlog.h:93
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:92

◆ SessionBackupState

Enumerator
SESSION_BACKUP_NONE 
SESSION_BACKUP_RUNNING 

Definition at line 284 of file xlog.h.

287 {
@ SESSION_BACKUP_NONE
Definition: xlog.h:286

◆ WALAvailability

Enumerator
WALAVAIL_INVALID_LSN 
WALAVAIL_RESERVED 
WALAVAIL_EXTENDED 
WALAVAIL_UNRESERVED 
WALAVAIL_REMOVED 

Definition at line 186 of file xlog.h.

189 {
190  WALAVAIL_INVALID_LSN, /* parameter error */
191  WALAVAIL_RESERVED, /* WAL segment is within max_wal_size */
192  WALAVAIL_EXTENDED, /* WAL segment is reserved by a slot or
193  * wal_keep_size */
194  WALAVAIL_UNRESERVED, /* no longer reserved, but not removed yet */
@ WALAVAIL_RESERVED
Definition: xlog.h:189
@ WALAVAIL_UNRESERVED
Definition: xlog.h:192
@ WALAVAIL_EXTENDED
Definition: xlog.h:190
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:188

◆ WalCompression

Enumerator
WAL_COMPRESSION_NONE 
WAL_COMPRESSION_PGLZ 
WAL_COMPRESSION_LZ4 
WAL_COMPRESSION_ZSTD 

Definition at line 80 of file xlog.h.

81 {
WalCompression
Definition: xlog.h:81
@ WAL_COMPRESSION_NONE
Definition: xlog.h:82
@ WAL_COMPRESSION_LZ4
Definition: xlog.h:84
@ WAL_COMPRESSION_PGLZ
Definition: xlog.h:83
@ WAL_COMPRESSION_ZSTD
Definition: xlog.h:85

◆ WalLevel

enum WalLevel
Enumerator
WAL_LEVEL_MINIMAL 
WAL_LEVEL_REPLICA 
WAL_LEVEL_LOGICAL 

Definition at line 72 of file xlog.h.

73 {
77 } WalLevel;
WalLevel
Definition: xlog.h:73
@ WAL_LEVEL_REPLICA
Definition: xlog.h:75
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:74

◆ WalSyncMethod

Enumerator
WAL_SYNC_METHOD_FSYNC 
WAL_SYNC_METHOD_FDATASYNC 
WAL_SYNC_METHOD_OPEN 
WAL_SYNC_METHOD_FSYNC_WRITETHROUGH 
WAL_SYNC_METHOD_OPEN_DSYNC 

Definition at line 22 of file xlog.h.

23 {
26  WAL_SYNC_METHOD_OPEN, /* for O_SYNC */
28  WAL_SYNC_METHOD_OPEN_DSYNC /* for O_DSYNC */
29 };
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

Function Documentation

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5003 of file xlog.c.

5004 {
5005  CheckPoint checkPoint;
5006  char *buffer;
5007  XLogPageHeader page;
5008  XLogLongPageHeader longpage;
5009  XLogRecord *record;
5010  char *recptr;
5011  uint64 sysidentifier;
5012  struct timeval tv;
5013  pg_crc32c crc;
5014 
5015  /* allow ordinary WAL segment creation, like StartupXLOG() would */
5017 
5018  /*
5019  * Select a hopefully-unique system identifier code for this installation.
5020  * We use the result of gettimeofday(), including the fractional seconds
5021  * field, as being about as unique as we can easily get. (Think not to
5022  * use random(), since it hasn't been seeded and there's no portable way
5023  * to seed it other than the system clock value...) The upper half of the
5024  * uint64 value is just the tv_sec part, while the lower half contains the
5025  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5026  * PID for a little extra uniqueness. A person knowing this encoding can
5027  * determine the initialization time of the installation, which could
5028  * perhaps be useful sometimes.
5029  */
5030  gettimeofday(&tv, NULL);
5031  sysidentifier = ((uint64) tv.tv_sec) << 32;
5032  sysidentifier |= ((uint64) tv.tv_usec) << 12;
5033  sysidentifier |= getpid() & 0xFFF;
5034 
5035  /* page buffer must be aligned suitably for O_DIRECT */
5036  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5037  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5038  memset(page, 0, XLOG_BLCKSZ);
5039 
5040  /*
5041  * Set up information for the initial checkpoint record
5042  *
5043  * The initial checkpoint record is written to the beginning of the WAL
5044  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5045  * used, so that we can use 0/0 to mean "before any valid WAL segment".
5046  */
5047  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
5048  checkPoint.ThisTimeLineID = BootstrapTimeLineID;
5049  checkPoint.PrevTimeLineID = BootstrapTimeLineID;
5050  checkPoint.fullPageWrites = fullPageWrites;
5051  checkPoint.wal_level = wal_level;
5052  checkPoint.nextXid =
5054  checkPoint.nextOid = FirstGenbkiObjectId;
5055  checkPoint.nextMulti = FirstMultiXactId;
5056  checkPoint.nextMultiOffset = 0;
5057  checkPoint.oldestXid = FirstNormalTransactionId;
5058  checkPoint.oldestXidDB = Template1DbOid;
5059  checkPoint.oldestMulti = FirstMultiXactId;
5060  checkPoint.oldestMultiDB = Template1DbOid;
5063  checkPoint.time = (pg_time_t) time(NULL);
5065 
5066  TransamVariables->nextXid = checkPoint.nextXid;
5067  TransamVariables->nextOid = checkPoint.nextOid;
5069  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5070  AdvanceOldestClogXid(checkPoint.oldestXid);
5071  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5072  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5074 
5075  /* Set up the XLOG page header */
5076  page->xlp_magic = XLOG_PAGE_MAGIC;
5077  page->xlp_info = XLP_LONG_HEADER;
5078  page->xlp_tli = BootstrapTimeLineID;
5080  longpage = (XLogLongPageHeader) page;
5081  longpage->xlp_sysid = sysidentifier;
5082  longpage->xlp_seg_size = wal_segment_size;
5083  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5084 
5085  /* Insert the initial checkpoint record */
5086  recptr = ((char *) page + SizeOfXLogLongPHD);
5087  record = (XLogRecord *) recptr;
5088  record->xl_prev = 0;
5089  record->xl_xid = InvalidTransactionId;
5090  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5092  record->xl_rmid = RM_XLOG_ID;
5093  recptr += SizeOfXLogRecord;
5094  /* fill the XLogRecordDataHeaderShort struct */
5095  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5096  *(recptr++) = sizeof(checkPoint);
5097  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5098  recptr += sizeof(checkPoint);
5099  Assert(recptr - (char *) record == record->xl_tot_len);
5100 
5101  INIT_CRC32C(crc);
5102  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5103  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5104  FIN_CRC32C(crc);
5105  record->xl_crc = crc;
5106 
5107  /* Create first XLOG segment file */
5110 
5111  /*
5112  * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5113  * close the file again in a moment.
5114  */
5115 
5116  /* Write the first page with the initial record */
5117  errno = 0;
5118  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5119  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5120  {
5121  /* if write didn't set errno, assume problem is no disk space */
5122  if (errno == 0)
5123  errno = ENOSPC;
5124  ereport(PANIC,
5126  errmsg("could not write bootstrap write-ahead log file: %m")));
5127  }
5129 
5130  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5131  if (pg_fsync(openLogFile) != 0)
5132  ereport(PANIC,
5134  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5136 
5137  if (close(openLogFile) != 0)
5138  ereport(PANIC,
5140  errmsg("could not close bootstrap write-ahead log file: %m")));
5141 
5142  openLogFile = -1;
5143 
5144  /* Now create pg_control */
5145  InitControlFile(sysidentifier, data_checksum_version);
5146  ControlFile->time = checkPoint.time;
5147  ControlFile->checkPoint = checkPoint.redo;
5148  ControlFile->checkPointCopy = checkPoint;
5149 
5150  /* some additional ControlFile fields are set in WriteControlFile() */
5151  WriteControlFile();
5152 
5153  /* Bootstrap the commit log, too */
5154  BootStrapCLOG();
5158 
5159  pfree(buffer);
5160 
5161  /*
5162  * Force control file to be read - in contrast to normal processing we'd
5163  * otherwise never run the checks and GUC related initializations therein.
5164  */
5165  ReadControlFile();
5166 }
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:804
#define Assert(condition)
Definition: c.h:858
void BootStrapCLOG(void)
Definition: clog.c:833
void BootStrapCommitTs(void)
Definition: commit_ts.c:596
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:909
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
int pg_fsync(int fd)
Definition: fd.c:386
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2320
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2354
void BootStrapMultiXact(void)
Definition: multixact.c:2026
#define FirstMultiXactId
Definition: multixact.h:25
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:68
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:51
MultiXactId oldestMulti
Definition: pg_control.h:50
MultiXactOffset nextMultiOffset
Definition: pg_control.h:47
TransactionId newestCommitTsXid
Definition: pg_control.h:55
TransactionId oldestXid
Definition: pg_control.h:48
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:45
TransactionId oldestActiveXid
Definition: pg_control.h:64
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:46
FullTransactionId nextXid
Definition: pg_control.h:44
TransactionId oldestCommitTsXid
Definition: pg_control.h:53
pg_time_t time
Definition: pg_control.h:52
int wal_level
Definition: pg_control.h:43
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:49
CheckPoint checkPointCopy
Definition: pg_control.h:135
pg_time_t time
Definition: pg_control.h:132
XLogRecPtr checkPoint
Definition: pg_control.h:133
FullTransactionId nextXid
Definition: transam.h:220
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:270
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3372
bool fullPageWrites
Definition: xlog.c:120
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition: xlog.c:4196
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9447
static int openLogFile
Definition: xlog.c:633
int wal_level
Definition: xlog.c:129
static void WriteControlFile(void)
Definition: xlog.c:4231
int wal_segment_size
Definition: xlog.c:141
static TimeLineID openLogTLI
Definition: xlog.c:635
static ControlFileData * ControlFile
Definition: xlog.c:572
#define BootstrapTimeLineID
Definition: xlog.c:109
static void ReadControlFile(void)
Definition: xlog.c:4313
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_level, CheckPoint::wal_level, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3719 of file xlog.c.

3720 {
3721  int save_errno = errno;
3722  XLogSegNo lastRemovedSegNo;
3723 
3725  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3727 
3728  if (segno <= lastRemovedSegNo)
3729  {
3730  char filename[MAXFNAMELEN];
3731 
3732  XLogFileName(filename, tli, segno, wal_segment_size);
3733  errno = save_errno;
3734  ereport(ERROR,
3736  errmsg("requested WAL segment %s has already been removed",
3737  filename)));
3738  }
3739  errno = save_errno;
3740 }
#define ERROR
Definition: elog.h:39
static char * filename
Definition: pg_dumpall.c:119
#define SpinLockRelease(lock)
Definition: spin.h:64
#define SpinLockAcquire(lock)
Definition: spin.h:62
slock_t info_lck
Definition: xlog.c:551
XLogSegNo lastRemovedSegNo
Definition: xlog.c:459
static XLogCtlData * XLogCtl
Definition: xlog.c:564
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
uint64 XLogSegNo
Definition: xlogdefs.h:48

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CreateCheckPoint()

void CreateCheckPoint ( int  flags)

Definition at line 6854 of file xlog.c.

6855 {
6856  bool shutdown;
6857  CheckPoint checkPoint;
6858  XLogRecPtr recptr;
6859  XLogSegNo _logSegNo;
6861  uint32 freespace;
6862  XLogRecPtr PriorRedoPtr;
6863  XLogRecPtr last_important_lsn;
6864  VirtualTransactionId *vxids;
6865  int nvxids;
6866  int oldXLogAllowed = 0;
6867 
6868  /*
6869  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6870  * issued at a different time.
6871  */
6873  shutdown = true;
6874  else
6875  shutdown = false;
6876 
6877  /* sanity check */
6878  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6879  elog(ERROR, "can't create a checkpoint during recovery");
6880 
6881  /*
6882  * Prepare to accumulate statistics.
6883  *
6884  * Note: because it is possible for log_checkpoints to change while a
6885  * checkpoint proceeds, we always accumulate stats, even if
6886  * log_checkpoints is currently off.
6887  */
6888  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
6890 
6891  /*
6892  * Let smgr prepare for checkpoint; this has to happen outside the
6893  * critical section and before we determine the REDO pointer. Note that
6894  * smgr must not do anything that'd have to be undone if we decide no
6895  * checkpoint is needed.
6896  */
6898 
6899  /*
6900  * Use a critical section to force system panic if we have trouble.
6901  */
6903 
6904  if (shutdown)
6905  {
6906  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6909  LWLockRelease(ControlFileLock);
6910  }
6911 
6912  /* Begin filling in the checkpoint WAL record */
6913  MemSet(&checkPoint, 0, sizeof(checkPoint));
6914  checkPoint.time = (pg_time_t) time(NULL);
6915 
6916  /*
6917  * For Hot Standby, derive the oldestActiveXid before we fix the redo
6918  * pointer. This allows us to begin accumulating changes to assemble our
6919  * starting snapshot of locks and transactions.
6920  */
6921  if (!shutdown && XLogStandbyInfoActive())
6923  else
6925 
6926  /*
6927  * Get location of last important record before acquiring insert locks (as
6928  * GetLastImportantRecPtr() also locks WAL locks).
6929  */
6930  last_important_lsn = GetLastImportantRecPtr();
6931 
6932  /*
6933  * If this isn't a shutdown or forced checkpoint, and if there has been no
6934  * WAL activity requiring a checkpoint, skip it. The idea here is to
6935  * avoid inserting duplicate checkpoints when the system is idle.
6936  */
6938  CHECKPOINT_FORCE)) == 0)
6939  {
6940  if (last_important_lsn == ControlFile->checkPoint)
6941  {
6942  END_CRIT_SECTION();
6943  ereport(DEBUG1,
6944  (errmsg_internal("checkpoint skipped because system is idle")));
6945  return;
6946  }
6947  }
6948 
6949  /*
6950  * An end-of-recovery checkpoint is created before anyone is allowed to
6951  * write WAL. To allow us to write the checkpoint record, temporarily
6952  * enable XLogInsertAllowed.
6953  */
6954  if (flags & CHECKPOINT_END_OF_RECOVERY)
6955  oldXLogAllowed = LocalSetXLogInsertAllowed();
6956 
6957  checkPoint.ThisTimeLineID = XLogCtl->InsertTimeLineID;
6958  if (flags & CHECKPOINT_END_OF_RECOVERY)
6959  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
6960  else
6961  checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
6962 
6963  /*
6964  * We must block concurrent insertions while examining insert state.
6965  */
6967 
6968  checkPoint.fullPageWrites = Insert->fullPageWrites;
6969  checkPoint.wal_level = wal_level;
6970 
6971  if (shutdown)
6972  {
6973  XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
6974 
6975  /*
6976  * Compute new REDO record ptr = location of next XLOG record.
6977  *
6978  * Since this is a shutdown checkpoint, there can't be any concurrent
6979  * WAL insertion.
6980  */
6981  freespace = INSERT_FREESPACE(curInsert);
6982  if (freespace == 0)
6983  {
6984  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
6985  curInsert += SizeOfXLogLongPHD;
6986  else
6987  curInsert += SizeOfXLogShortPHD;
6988  }
6989  checkPoint.redo = curInsert;
6990 
6991  /*
6992  * Here we update the shared RedoRecPtr for future XLogInsert calls;
6993  * this must be done while holding all the insertion locks.
6994  *
6995  * Note: if we fail to complete the checkpoint, RedoRecPtr will be
6996  * left pointing past where it really needs to point. This is okay;
6997  * the only consequence is that XLogInsert might back up whole buffers
6998  * that it didn't really need to. We can't postpone advancing
6999  * RedoRecPtr because XLogInserts that happen while we are dumping
7000  * buffers must assume that their buffer changes are not included in
7001  * the checkpoint.
7002  */
7003  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7004  }
7005 
7006  /*
7007  * Now we can release the WAL insertion locks, allowing other xacts to
7008  * proceed while we are flushing disk buffers.
7009  */
7011 
7012  /*
7013  * If this is an online checkpoint, we have not yet determined the redo
7014  * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7015  * record; the LSN at which it starts becomes the new redo pointer. We
7016  * don't do this for a shutdown checkpoint, because in that case no WAL
7017  * can be written between the redo point and the insertion of the
7018  * checkpoint record itself, so the checkpoint record itself serves to
7019  * mark the redo point.
7020  */
7021  if (!shutdown)
7022  {
7023  /* Include WAL level in record for WAL summarizer's benefit. */
7024  XLogBeginInsert();
7025  XLogRegisterData((char *) &wal_level, sizeof(wal_level));
7026  (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
7027 
7028  /*
7029  * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7030  * shared memory and RedoRecPtr in backend-local memory, but we need
7031  * to copy that into the record that will be inserted when the
7032  * checkpoint is complete.
7033  */
7034  checkPoint.redo = RedoRecPtr;
7035  }
7036 
7037  /* Update the info_lck-protected copy of RedoRecPtr as well */
7039  XLogCtl->RedoRecPtr = checkPoint.redo;
7041 
7042  /*
7043  * If enabled, log checkpoint start. We postpone this until now so as not
7044  * to log anything if we decided to skip the checkpoint.
7045  */
7046  if (log_checkpoints)
7047  LogCheckpointStart(flags, false);
7048 
7049  /* Update the process title */
7050  update_checkpoint_display(flags, false, false);
7051 
7052  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7053 
7054  /*
7055  * Get the other info we need for the checkpoint record.
7056  *
7057  * We don't need to save oldestClogXid in the checkpoint, it only matters
7058  * for the short period in which clog is being truncated, and if we crash
7059  * during that we'll redo the clog truncation and fix up oldestClogXid
7060  * there.
7061  */
7062  LWLockAcquire(XidGenLock, LW_SHARED);
7063  checkPoint.nextXid = TransamVariables->nextXid;
7064  checkPoint.oldestXid = TransamVariables->oldestXid;
7066  LWLockRelease(XidGenLock);
7067 
7068  LWLockAcquire(CommitTsLock, LW_SHARED);
7071  LWLockRelease(CommitTsLock);
7072 
7073  LWLockAcquire(OidGenLock, LW_SHARED);
7074  checkPoint.nextOid = TransamVariables->nextOid;
7075  if (!shutdown)
7076  checkPoint.nextOid += TransamVariables->oidCount;
7077  LWLockRelease(OidGenLock);
7078 
7079  MultiXactGetCheckptMulti(shutdown,
7080  &checkPoint.nextMulti,
7081  &checkPoint.nextMultiOffset,
7082  &checkPoint.oldestMulti,
7083  &checkPoint.oldestMultiDB);
7084 
7085  /*
7086  * Having constructed the checkpoint record, ensure all shmem disk buffers
7087  * and commit-log buffers are flushed to disk.
7088  *
7089  * This I/O could fail for various reasons. If so, we will fail to
7090  * complete the checkpoint, but there is no reason to force a system
7091  * panic. Accordingly, exit critical section while doing it.
7092  */
7093  END_CRIT_SECTION();
7094 
7095  /*
7096  * In some cases there are groups of actions that must all occur on one
7097  * side or the other of a checkpoint record. Before flushing the
7098  * checkpoint record we must explicitly wait for any backend currently
7099  * performing those groups of actions.
7100  *
7101  * One example is end of transaction, so we must wait for any transactions
7102  * that are currently in commit critical sections. If an xact inserted
7103  * its commit record into XLOG just before the REDO point, then a crash
7104  * restart from the REDO point would not replay that record, which means
7105  * that our flushing had better include the xact's update of pg_xact. So
7106  * we wait till he's out of his commit critical section before proceeding.
7107  * See notes in RecordTransactionCommit().
7108  *
7109  * Because we've already released the insertion locks, this test is a bit
7110  * fuzzy: it is possible that we will wait for xacts we didn't really need
7111  * to wait for. But the delay should be short and it seems better to make
7112  * checkpoint take a bit longer than to hold off insertions longer than
7113  * necessary. (In fact, the whole reason we have this issue is that xact.c
7114  * does commit record XLOG insertion and clog update as two separate steps
7115  * protected by different locks, but again that seems best on grounds of
7116  * minimizing lock contention.)
7117  *
7118  * A transaction that has not yet set delayChkptFlags when we look cannot
7119  * be at risk, since it has not inserted its commit record yet; and one
7120  * that's already cleared it is not at risk either, since it's done fixing
7121  * clog and we will correctly flush the update below. So we cannot miss
7122  * any xacts we need to wait for.
7123  */
7125  if (nvxids > 0)
7126  {
7127  do
7128  {
7129  /*
7130  * Keep absorbing fsync requests while we wait. There could even
7131  * be a deadlock if we don't, if the process that prevents the
7132  * checkpoint is trying to add a request to the queue.
7133  */
7135 
7136  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7137  pg_usleep(10000L); /* wait for 10 msec */
7139  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7141  }
7142  pfree(vxids);
7143 
7144  CheckPointGuts(checkPoint.redo, flags);
7145 
7147  if (nvxids > 0)
7148  {
7149  do
7150  {
7152 
7153  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7154  pg_usleep(10000L); /* wait for 10 msec */
7156  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7158  }
7159  pfree(vxids);
7160 
7161  /*
7162  * Take a snapshot of running transactions and write this to WAL. This
7163  * allows us to reconstruct the state of running transactions during
7164  * archive recovery, if required. Skip, if this info disabled.
7165  *
7166  * If we are shutting down, or Startup process is completing crash
7167  * recovery we don't need to write running xact data.
7168  */
7169  if (!shutdown && XLogStandbyInfoActive())
7171 
7173 
7174  /*
7175  * Now insert the checkpoint record into XLOG.
7176  */
7177  XLogBeginInsert();
7178  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
7179  recptr = XLogInsert(RM_XLOG_ID,
7180  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7182 
7183  XLogFlush(recptr);
7184 
7185  /*
7186  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7187  * overwritten at next startup. No-one should even try, this just allows
7188  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7189  * to just temporarily disable writing until the system has exited
7190  * recovery.
7191  */
7192  if (shutdown)
7193  {
7194  if (flags & CHECKPOINT_END_OF_RECOVERY)
7195  LocalXLogInsertAllowed = oldXLogAllowed;
7196  else
7197  LocalXLogInsertAllowed = 0; /* never again write WAL */
7198  }
7199 
7200  /*
7201  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7202  * = end of actual checkpoint record.
7203  */
7204  if (shutdown && checkPoint.redo != ProcLastRecPtr)
7205  ereport(PANIC,
7206  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7207 
7208  /*
7209  * Remember the prior checkpoint's redo ptr for
7210  * UpdateCheckPointDistanceEstimate()
7211  */
7212  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7213 
7214  /*
7215  * Update the control file.
7216  */
7217  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7218  if (shutdown)
7221  ControlFile->checkPointCopy = checkPoint;
7222  /* crash recovery should always recover to the end of WAL */
7225 
7226  /*
7227  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7228  * unused on non-shutdown checkpoints, but seems useful to store it always
7229  * for debugging purposes.
7230  */
7232 
7234  LWLockRelease(ControlFileLock);
7235 
7236  /* Update shared-memory copy of checkpoint XID/epoch */
7238  XLogCtl->ckptFullXid = checkPoint.nextXid;
7240 
7241  /*
7242  * We are now done with critical updates; no need for system panic if we
7243  * have trouble while fooling with old log segments.
7244  */
7245  END_CRIT_SECTION();
7246 
7247  /*
7248  * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7249  * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7250  * where (a) we're not inside of a critical section and (b) we can be
7251  * certain that the relevant record has been flushed to disk, which must
7252  * happen before it can be summarized.
7253  *
7254  * If this is a shutdown checkpoint, then this happens reasonably
7255  * promptly: we've only just inserted and flushed the
7256  * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7257  * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7258  * record was written before we began flushing data to disk, and that
7259  * could be many minutes ago at this point. However, we don't XLogFlush()
7260  * after inserting that record, so we're not guaranteed that it's on disk
7261  * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7262  * record.
7263  */
7265 
7266  /*
7267  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7268  */
7270 
7271  /*
7272  * Update the average distance between checkpoints if the prior checkpoint
7273  * exists.
7274  */
7275  if (PriorRedoPtr != InvalidXLogRecPtr)
7277 
7278  /*
7279  * Delete old log files, those no longer needed for last checkpoint to
7280  * prevent the disk holding the xlog from growing full.
7281  */
7283  KeepLogSeg(recptr, &_logSegNo);
7285  _logSegNo, InvalidOid,
7287  {
7288  /*
7289  * Some slots have been invalidated; recalculate the old-segment
7290  * horizon, starting again from RedoRecPtr.
7291  */
7293  KeepLogSeg(recptr, &_logSegNo);
7294  }
7295  _logSegNo--;
7296  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7297  checkPoint.ThisTimeLineID);
7298 
7299  /*
7300  * Make more log segments if needed. (Do this after recycling old log
7301  * segments, since that may supply some of the needed files.)
7302  */
7303  if (!shutdown)
7304  PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7305 
7306  /*
7307  * Truncate pg_subtrans if possible. We can throw away all data before
7308  * the oldest XMIN of any running transaction. No future transaction will
7309  * attempt to reference any pg_subtrans entry older than that (see Asserts
7310  * in subtrans.c). During recovery, though, we mustn't do this because
7311  * StartupSUBTRANS hasn't been called yet.
7312  */
7313  if (!RecoveryInProgress())
7315 
7316  /* Real work is done; log and update stats. */
7317  LogCheckpointEnd(false);
7318 
7319  /* Reset the process title */
7320  update_checkpoint_display(flags, false, true);
7321 
7322  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7323  NBuffers,
7327 }
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:469
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1655
unsigned int uint32
Definition: c.h:506
#define MemSet(start, val, len)
Definition: c.h:1020
void AbsorbSyncRequests(void)
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:224
static void Insert(File file)
Definition: fd.c:1313
int NBuffers
Definition: globals.c:140
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2274
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:82
@ DB_SHUTDOWNING
Definition: pg_control.h:94
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:69
#define InvalidOid
Definition: postgres_ext.h:36
#define DELAY_CHKPT_START
Definition: proc.h:114
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:115
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3047
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2884
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3093
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1804
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:51
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1285
TimestampTz ckpt_start_t
Definition: xlog.h:161
int ckpt_segs_removed
Definition: xlog.h:170
int ckpt_segs_added
Definition: xlog.h:169
int ckpt_bufs_written
Definition: xlog.h:167
int ckpt_segs_recycled
Definition: xlog.h:171
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
XLogRecPtr unloggedLSN
Definition: pg_control.h:137
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId ckptFullXid
Definition: xlog.c:455
TimeLineID InsertTimeLineID
Definition: xlog.c:507
XLogRecPtr RedoRecPtr
Definition: xlog.c:454
XLogCtlInsert Insert
Definition: xlog.c:450
TimeLineID PrevTimeLineID
Definition: xlog.c:508
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:462
XLogRecPtr RedoRecPtr
Definition: xlog.c:428
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:411
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void SetWalSummarizerLatch(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:251
bool RecoveryInProgress(void)
Definition: xlog.c:6304
static void WALInsertLockRelease(void)
Definition: xlog.c:1443
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1856
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1414
static void UpdateControlFile(void)
Definition: xlog.c:4529
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3857
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6619
static XLogRecPtr RedoRecPtr
Definition: xlog.c:271
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6651
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3682
bool log_checkpoints
Definition: xlog.c:127
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:7916
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6392
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6526
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6754
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:578
static int LocalXLogInsertAllowed
Definition: xlog.c:234
CheckpointStatsData CheckpointStats
Definition: xlog.c:207
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2794
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7473
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6792
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FORCE
Definition: xlog.h:142
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SetWalSummarizerLatch(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7554 of file xlog.c.

7555 {
7556  XLogRecPtr lastCheckPointRecPtr;
7557  XLogRecPtr lastCheckPointEndPtr;
7558  CheckPoint lastCheckPoint;
7559  XLogRecPtr PriorRedoPtr;
7560  XLogRecPtr receivePtr;
7561  XLogRecPtr replayPtr;
7562  TimeLineID replayTLI;
7563  XLogRecPtr endptr;
7564  XLogSegNo _logSegNo;
7565  TimestampTz xtime;
7566 
7567  /* Concurrent checkpoint/restartpoint cannot happen */
7569 
7570  /* Get a local copy of the last safe checkpoint record. */
7572  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7573  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7574  lastCheckPoint = XLogCtl->lastCheckPoint;
7576 
7577  /*
7578  * Check that we're still in recovery mode. It's ok if we exit recovery
7579  * mode after this check, the restart point is valid anyway.
7580  */
7581  if (!RecoveryInProgress())
7582  {
7583  ereport(DEBUG2,
7584  (errmsg_internal("skipping restartpoint, recovery has already ended")));
7585  return false;
7586  }
7587 
7588  /*
7589  * If the last checkpoint record we've replayed is already our last
7590  * restartpoint, we can't perform a new restart point. We still update
7591  * minRecoveryPoint in that case, so that if this is a shutdown restart
7592  * point, we won't start up earlier than before. That's not strictly
7593  * necessary, but when hot standby is enabled, it would be rather weird if
7594  * the database opened up for read-only connections at a point-in-time
7595  * before the last shutdown. Such time travel is still possible in case of
7596  * immediate shutdown, though.
7597  *
7598  * We don't explicitly advance minRecoveryPoint when we do create a
7599  * restartpoint. It's assumed that flushing the buffers will do that as a
7600  * side-effect.
7601  */
7602  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7603  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7604  {
7605  ereport(DEBUG2,
7606  (errmsg_internal("skipping restartpoint, already performed at %X/%X",
7607  LSN_FORMAT_ARGS(lastCheckPoint.redo))));
7608 
7610  if (flags & CHECKPOINT_IS_SHUTDOWN)
7611  {
7612  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7615  LWLockRelease(ControlFileLock);
7616  }
7617  return false;
7618  }
7619 
7620  /*
7621  * Update the shared RedoRecPtr so that the startup process can calculate
7622  * the number of segments replayed since last restartpoint, and request a
7623  * restartpoint if it exceeds CheckPointSegments.
7624  *
7625  * Like in CreateCheckPoint(), hold off insertions to update it, although
7626  * during recovery this is just pro forma, because no WAL insertions are
7627  * happening.
7628  */
7630  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7632 
7633  /* Also update the info_lck-protected copy */
7635  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7637 
7638  /*
7639  * Prepare to accumulate statistics.
7640  *
7641  * Note: because it is possible for log_checkpoints to change while a
7642  * checkpoint proceeds, we always accumulate stats, even if
7643  * log_checkpoints is currently off.
7644  */
7645  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
7647 
7648  if (log_checkpoints)
7649  LogCheckpointStart(flags, true);
7650 
7651  /* Update the process title */
7652  update_checkpoint_display(flags, true, false);
7653 
7654  CheckPointGuts(lastCheckPoint.redo, flags);
7655 
7656  /*
7657  * This location needs to be after CheckPointGuts() to ensure that some
7658  * work has already happened during this checkpoint.
7659  */
7660  INJECTION_POINT("create-restart-point");
7661 
7662  /*
7663  * Remember the prior checkpoint's redo ptr for
7664  * UpdateCheckPointDistanceEstimate()
7665  */
7666  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7667 
7668  /*
7669  * Update pg_control, using current time. Check that it still shows an
7670  * older checkpoint, else do nothing; this is a quick hack to make sure
7671  * nothing really bad happens if somehow we get here after the
7672  * end-of-recovery checkpoint.
7673  */
7674  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7675  if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7676  {
7677  /*
7678  * Update the checkpoint information. We do this even if the cluster
7679  * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7680  * segments recycled below.
7681  */
7682  ControlFile->checkPoint = lastCheckPointRecPtr;
7683  ControlFile->checkPointCopy = lastCheckPoint;
7684 
7685  /*
7686  * Ensure minRecoveryPoint is past the checkpoint record and update it
7687  * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7688  * this will have happened already while writing out dirty buffers,
7689  * but not necessarily - e.g. because no buffers were dirtied. We do
7690  * this because a backup performed in recovery uses minRecoveryPoint
7691  * to determine which WAL files must be included in the backup, and
7692  * the file (or files) containing the checkpoint record must be
7693  * included, at a minimum. Note that for an ordinary restart of
7694  * recovery there's no value in having the minimum recovery point any
7695  * earlier than this anyway, because redo will begin just after the
7696  * checkpoint record.
7697  */
7699  {
7700  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7701  {
7702  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7704 
7705  /* update local copy */
7708  }
7709  if (flags & CHECKPOINT_IS_SHUTDOWN)
7711  }
7713  }
7714  LWLockRelease(ControlFileLock);
7715 
7716  /*
7717  * Update the average distance between checkpoints/restartpoints if the
7718  * prior checkpoint exists.
7719  */
7720  if (PriorRedoPtr != InvalidXLogRecPtr)
7722 
7723  /*
7724  * Delete old log files, those no longer needed for last restartpoint to
7725  * prevent the disk holding the xlog from growing full.
7726  */
7728 
7729  /*
7730  * Retreat _logSegNo using the current end of xlog replayed or received,
7731  * whichever is later.
7732  */
7733  receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7734  replayPtr = GetXLogReplayRecPtr(&replayTLI);
7735  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7736  KeepLogSeg(endptr, &_logSegNo);
7738  _logSegNo, InvalidOid,
7740  {
7741  /*
7742  * Some slots have been invalidated; recalculate the old-segment
7743  * horizon, starting again from RedoRecPtr.
7744  */
7746  KeepLogSeg(endptr, &_logSegNo);
7747  }
7748  _logSegNo--;
7749 
7750  /*
7751  * Try to recycle segments on a useful timeline. If we've been promoted
7752  * since the beginning of this restartpoint, use the new timeline chosen
7753  * at end of recovery. If we're still in recovery, use the timeline we're
7754  * currently replaying.
7755  *
7756  * There is no guarantee that the WAL segments will be useful on the
7757  * current timeline; if recovery proceeds to a new timeline right after
7758  * this, the pre-allocated WAL segments on this timeline will not be used,
7759  * and will go wasted until recycled on the next restartpoint. We'll live
7760  * with that.
7761  */
7762  if (!RecoveryInProgress())
7763  replayTLI = XLogCtl->InsertTimeLineID;
7764 
7765  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7766 
7767  /*
7768  * Make more log segments if needed. (Do this after recycling old log
7769  * segments, since that may supply some of the needed files.)
7770  */
7771  PreallocXlogFiles(endptr, replayTLI);
7772 
7773  /*
7774  * Truncate pg_subtrans if possible. We can throw away all data before
7775  * the oldest XMIN of any running transaction. No future transaction will
7776  * attempt to reference any pg_subtrans entry older than that (see Asserts
7777  * in subtrans.c). When hot standby is disabled, though, we mustn't do
7778  * this because StartupSUBTRANS hasn't been called yet.
7779  */
7780  if (EnableHotStandby)
7782 
7783  /* Real work is done; log and update stats. */
7784  LogCheckpointEnd(true);
7785 
7786  /* Reset the process title */
7787  update_checkpoint_display(flags, true, true);
7788 
7789  xtime = GetLatestXTime();
7791  (errmsg("recovery restart point at %X/%X",
7792  LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7793  xtime ? errdetail("Last completed transaction was at log time %s.",
7794  timestamptz_to_str(xtime)) : 0));
7795 
7796  /*
7797  * Finally, execute archive_cleanup_command, if any.
7798  */
7799  if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7801  "archive_cleanup_command",
7802  false,
7803  WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7804 
7805  return true;
7806 }
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1854
int64 TimestampTz
Definition: timestamp.h:39
int errdetail(const char *fmt,...)
Definition: elog.c:1203
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
bool IsUnderPostmaster
Definition: globals.c:118
#define INJECTION_POINT(name)
@ B_CHECKPOINTER
Definition: miscadmin.h:357
BackendType MyBackendType
Definition: miscinit.c:63
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
CheckPoint lastCheckPoint
Definition: xlog.c:543
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:541
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:542
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
bool EnableHotStandby
Definition: xlog.c:119
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2714
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:644
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:645
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
char * archiveCleanupCommand
Definition: xlogrecovery.c:84
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9357 of file xlog.c.

9358 {
9359  bool during_backup_start = DatumGetBool(arg);
9360 
9361  /* If called during backup start, there shouldn't be one already running */
9362  Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9363 
9364  if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9365  {
9369 
9372 
9373  if (!during_backup_start)
9374  ereport(WARNING,
9375  errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9376  }
9377 }
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
int runningBackups
Definition: xlog.c:436
static SessionBackupState sessionBackupState
Definition: xlog.c:389

References arg, Assert, DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8755 of file xlog.c.

8757 {
8759 
8760  Assert(state != NULL);
8762 
8763  /*
8764  * During recovery, we don't need to check WAL level. Because, if WAL
8765  * level is not sufficient, it's impossible to get here during recovery.
8766  */
8768  ereport(ERROR,
8769  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8770  errmsg("WAL level not sufficient for making an online backup"),
8771  errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8772 
8773  if (strlen(backupidstr) > MAXPGPATH)
8774  ereport(ERROR,
8775  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8776  errmsg("backup label too long (max %d bytes)",
8777  MAXPGPATH)));
8778 
8779  strlcpy(state->name, backupidstr, sizeof(state->name));
8780 
8781  /*
8782  * Mark backup active in shared memory. We must do full-page WAL writes
8783  * during an on-line backup even if not doing so at other times, because
8784  * it's quite possible for the backup dump to obtain a "torn" (partially
8785  * written) copy of a database page if it reads the page concurrently with
8786  * our write to the same page. This can be fixed as long as the first
8787  * write to the page in the WAL sequence is a full-page write. Hence, we
8788  * increment runningBackups then force a CHECKPOINT, to ensure there are
8789  * no dirty pages in shared memory that might get dumped while the backup
8790  * is in progress without having a corresponding WAL record. (Once the
8791  * backup is complete, we need not force full-page writes anymore, since
8792  * we expect that any pages not modified during the backup interval must
8793  * have been correctly captured by the backup.)
8794  *
8795  * Note that forcing full-page writes has no effect during an online
8796  * backup from the standby.
8797  *
8798  * We must hold all the insertion locks to change the value of
8799  * runningBackups, to ensure adequate interlocking against
8800  * XLogInsertRecord().
8801  */
8805 
8806  /*
8807  * Ensure we decrement runningBackups if we fail below. NB -- for this to
8808  * work correctly, it is critical that sessionBackupState is only updated
8809  * after this block is over.
8810  */
8812  {
8813  bool gotUniqueStartpoint = false;
8814  DIR *tblspcdir;
8815  struct dirent *de;
8816  tablespaceinfo *ti;
8817  int datadirpathlen;
8818 
8819  /*
8820  * Force an XLOG file switch before the checkpoint, to ensure that the
8821  * WAL segment the checkpoint is written to doesn't contain pages with
8822  * old timeline IDs. That would otherwise happen if you called
8823  * pg_backup_start() right after restoring from a PITR archive: the
8824  * first WAL segment containing the startup checkpoint has pages in
8825  * the beginning with the old timeline ID. That can cause trouble at
8826  * recovery: we won't have a history file covering the old timeline if
8827  * pg_wal directory was not included in the base backup and the WAL
8828  * archive was cleared too before starting the backup.
8829  *
8830  * This also ensures that we have emitted a WAL page header that has
8831  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8832  * Therefore, if a WAL archiver (such as pglesslog) is trying to
8833  * compress out removable backup blocks, it won't remove any that
8834  * occur after this point.
8835  *
8836  * During recovery, we skip forcing XLOG file switch, which means that
8837  * the backup taken during recovery is not available for the special
8838  * recovery case described above.
8839  */
8841  RequestXLogSwitch(false);
8842 
8843  do
8844  {
8845  bool checkpointfpw;
8846 
8847  /*
8848  * Force a CHECKPOINT. Aside from being necessary to prevent torn
8849  * page problems, this guarantees that two successive backup runs
8850  * will have different checkpoint positions and hence different
8851  * history file names, even if nothing happened in between.
8852  *
8853  * During recovery, establish a restartpoint if possible. We use
8854  * the last restartpoint as the backup starting checkpoint. This
8855  * means that two successive backup runs can have same checkpoint
8856  * positions.
8857  *
8858  * Since the fact that we are executing do_pg_backup_start()
8859  * during recovery means that checkpointer is running, we can use
8860  * RequestCheckpoint() to establish a restartpoint.
8861  *
8862  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
8863  * passing fast = true). Otherwise this can take awhile.
8864  */
8866  (fast ? CHECKPOINT_IMMEDIATE : 0));
8867 
8868  /*
8869  * Now we need to fetch the checkpoint record location, and also
8870  * its REDO pointer. The oldest point in WAL that would be needed
8871  * to restore starting from the checkpoint is precisely the REDO
8872  * pointer.
8873  */
8874  LWLockAcquire(ControlFileLock, LW_SHARED);
8875  state->checkpointloc = ControlFile->checkPoint;
8876  state->startpoint = ControlFile->checkPointCopy.redo;
8878  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8879  LWLockRelease(ControlFileLock);
8880 
8882  {
8883  XLogRecPtr recptr;
8884 
8885  /*
8886  * Check to see if all WAL replayed during online backup
8887  * (i.e., since last restartpoint used as backup starting
8888  * checkpoint) contain full-page writes.
8889  */
8891  recptr = XLogCtl->lastFpwDisableRecPtr;
8893 
8894  if (!checkpointfpw || state->startpoint <= recptr)
8895  ereport(ERROR,
8896  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8897  errmsg("WAL generated with \"full_page_writes=off\" was replayed "
8898  "since last restartpoint"),
8899  errhint("This means that the backup being taken on the standby "
8900  "is corrupt and should not be used. "
8901  "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
8902  "and then try an online backup again.")));
8903 
8904  /*
8905  * During recovery, since we don't use the end-of-backup WAL
8906  * record and don't write the backup history file, the
8907  * starting WAL location doesn't need to be unique. This means
8908  * that two base backups started at the same time might use
8909  * the same checkpoint as starting locations.
8910  */
8911  gotUniqueStartpoint = true;
8912  }
8913 
8914  /*
8915  * If two base backups are started at the same time (in WAL sender
8916  * processes), we need to make sure that they use different
8917  * checkpoints as starting locations, because we use the starting
8918  * WAL location as a unique identifier for the base backup in the
8919  * end-of-backup WAL record and when we write the backup history
8920  * file. Perhaps it would be better generate a separate unique ID
8921  * for each backup instead of forcing another checkpoint, but
8922  * taking a checkpoint right after another is not that expensive
8923  * either because only few buffers have been dirtied yet.
8924  */
8926  if (XLogCtl->Insert.lastBackupStart < state->startpoint)
8927  {
8928  XLogCtl->Insert.lastBackupStart = state->startpoint;
8929  gotUniqueStartpoint = true;
8930  }
8932  } while (!gotUniqueStartpoint);
8933 
8934  /*
8935  * Construct tablespace_map file.
8936  */
8937  datadirpathlen = strlen(DataDir);
8938 
8939  /* Collect information about all tablespaces */
8940  tblspcdir = AllocateDir("pg_tblspc");
8941  while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
8942  {
8943  char fullpath[MAXPGPATH + 10];
8944  char linkpath[MAXPGPATH];
8945  char *relpath = NULL;
8946  char *s;
8947  PGFileType de_type;
8948  char *badp;
8949  Oid tsoid;
8950 
8951  /*
8952  * Try to parse the directory name as an unsigned integer.
8953  *
8954  * Tablespace directories should be positive integers that can be
8955  * represented in 32 bits, with no leading zeroes or trailing
8956  * garbage. If we come across a name that doesn't meet those
8957  * criteria, skip it.
8958  */
8959  if (de->d_name[0] < '1' || de->d_name[1] > '9')
8960  continue;
8961  errno = 0;
8962  tsoid = strtoul(de->d_name, &badp, 10);
8963  if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
8964  continue;
8965 
8966  snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
8967 
8968  de_type = get_dirent_type(fullpath, de, false, ERROR);
8969 
8970  if (de_type == PGFILETYPE_LNK)
8971  {
8972  StringInfoData escapedpath;
8973  int rllen;
8974 
8975  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
8976  if (rllen < 0)
8977  {
8978  ereport(WARNING,
8979  (errmsg("could not read symbolic link \"%s\": %m",
8980  fullpath)));
8981  continue;
8982  }
8983  else if (rllen >= sizeof(linkpath))
8984  {
8985  ereport(WARNING,
8986  (errmsg("symbolic link \"%s\" target is too long",
8987  fullpath)));
8988  continue;
8989  }
8990  linkpath[rllen] = '\0';
8991 
8992  /*
8993  * Relpath holds the relative path of the tablespace directory
8994  * when it's located within PGDATA, or NULL if it's located
8995  * elsewhere.
8996  */
8997  if (rllen > datadirpathlen &&
8998  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
8999  IS_DIR_SEP(linkpath[datadirpathlen]))
9000  relpath = pstrdup(linkpath + datadirpathlen + 1);
9001 
9002  /*
9003  * Add a backslash-escaped version of the link path to the
9004  * tablespace map file.
9005  */
9006  initStringInfo(&escapedpath);
9007  for (s = linkpath; *s; s++)
9008  {
9009  if (*s == '\n' || *s == '\r' || *s == '\\')
9010  appendStringInfoChar(&escapedpath, '\\');
9011  appendStringInfoChar(&escapedpath, *s);
9012  }
9013  appendStringInfo(tblspcmapfile, "%s %s\n",
9014  de->d_name, escapedpath.data);
9015  pfree(escapedpath.data);
9016  }
9017  else if (de_type == PGFILETYPE_DIR)
9018  {
9019  /*
9020  * It's possible to use allow_in_place_tablespaces to create
9021  * directories directly under pg_tblspc, for testing purposes
9022  * only.
9023  *
9024  * In this case, we store a relative path rather than an
9025  * absolute path into the tablespaceinfo.
9026  */
9027  snprintf(linkpath, sizeof(linkpath), "pg_tblspc/%s",
9028  de->d_name);
9029  relpath = pstrdup(linkpath);
9030  }
9031  else
9032  {
9033  /* Skip any other file type that appears here. */
9034  continue;
9035  }
9036 
9037  ti = palloc(sizeof(tablespaceinfo));
9038  ti->oid = tsoid;
9039  ti->path = pstrdup(linkpath);
9040  ti->rpath = relpath;
9041  ti->size = -1;
9042 
9043  if (tablespaces)
9044  *tablespaces = lappend(*tablespaces, ti);
9045  }
9046  FreeDir(tblspcdir);
9047 
9048  state->starttime = (pg_time_t) time(NULL);
9049  }
9051 
9052  state->started_in_recovery = backup_started_in_recovery;
9053 
9054  /*
9055  * Mark that the start phase has correctly finished for the backup.
9056  */
9058 }
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
Definition: checkpointer.c:941
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int FreeDir(DIR *dir)
Definition: fd.c:2961
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:525
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:69
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
char * pstrdup(const char *in)
Definition: mcxt.c:1696
#define MAXPGPATH
#define snprintf
Definition: port.h:238
#define IS_DIR_SEP(ch)
Definition: port.h:102
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
unsigned int Oid
Definition: postgres_ext.h:31
#define relpath(rlocator, forknum)
Definition: relpath.h:94
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:194
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Definition: dirent.c:26
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:549
XLogRecPtr lastBackupStart
Definition: xlog.c:437
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:236
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:8023
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9357
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:287
#define CHECKPOINT_WAIT
Definition: xlog.h:145
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:141
#define XLogIsNeeded()
Definition: xlog.h:109

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, DatumGetBool(), do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9083 of file xlog.c.

9084 {
9085  bool backup_stopped_in_recovery = false;
9086  char histfilepath[MAXPGPATH];
9087  char lastxlogfilename[MAXFNAMELEN];
9088  char histfilename[MAXFNAMELEN];
9089  XLogSegNo _logSegNo;
9090  FILE *fp;
9091  int seconds_before_warning;
9092  int waits = 0;
9093  bool reported_waiting = false;
9094 
9095  Assert(state != NULL);
9096 
9097  backup_stopped_in_recovery = RecoveryInProgress();
9098 
9099  /*
9100  * During recovery, we don't need to check WAL level. Because, if WAL
9101  * level is not sufficient, it's impossible to get here during recovery.
9102  */
9103  if (!backup_stopped_in_recovery && !XLogIsNeeded())
9104  ereport(ERROR,
9105  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9106  errmsg("WAL level not sufficient for making an online backup"),
9107  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
9108 
9109  /*
9110  * OK to update backup counter and session-level lock.
9111  *
9112  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9113  * otherwise they can be updated inconsistently, which might cause
9114  * do_pg_abort_backup() to fail.
9115  */
9117 
9118  /*
9119  * It is expected that each do_pg_backup_start() call is matched by
9120  * exactly one do_pg_backup_stop() call.
9121  */
9124 
9125  /*
9126  * Clean up session-level lock.
9127  *
9128  * You might think that WALInsertLockRelease() can be called before
9129  * cleaning up session-level lock because session-level lock doesn't need
9130  * to be protected with WAL insertion lock. But since
9131  * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9132  * cleaned up before it.
9133  */
9135 
9137 
9138  /*
9139  * If we are taking an online backup from the standby, we confirm that the
9140  * standby has not been promoted during the backup.
9141  */
9142  if (state->started_in_recovery && !backup_stopped_in_recovery)
9143  ereport(ERROR,
9144  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9145  errmsg("the standby was promoted during online backup"),
9146  errhint("This means that the backup being taken is corrupt "
9147  "and should not be used. "
9148  "Try taking another online backup.")));
9149 
9150  /*
9151  * During recovery, we don't write an end-of-backup record. We assume that
9152  * pg_control was backed up last and its minimum recovery point can be
9153  * available as the backup end location. Since we don't have an
9154  * end-of-backup record, we use the pg_control value to check whether
9155  * we've reached the end of backup when starting recovery from this
9156  * backup. We have no way of checking if pg_control wasn't backed up last
9157  * however.
9158  *
9159  * We don't force a switch to new WAL file but it is still possible to
9160  * wait for all the required files to be archived if waitforarchive is
9161  * true. This is okay if we use the backup to start a standby and fetch
9162  * the missing WAL using streaming replication. But in the case of an
9163  * archive recovery, a user should set waitforarchive to true and wait for
9164  * them to be archived to ensure that all the required files are
9165  * available.
9166  *
9167  * We return the current minimum recovery point as the backup end
9168  * location. Note that it can be greater than the exact backup end
9169  * location if the minimum recovery point is updated after the backup of
9170  * pg_control. This is harmless for current uses.
9171  *
9172  * XXX currently a backup history file is for informational and debug
9173  * purposes only. It's not essential for an online backup. Furthermore,
9174  * even if it's created, it will not be archived during recovery because
9175  * an archiver is not invoked. So it doesn't seem worthwhile to write a
9176  * backup history file during recovery.
9177  */
9178  if (backup_stopped_in_recovery)
9179  {
9180  XLogRecPtr recptr;
9181 
9182  /*
9183  * Check to see if all WAL replayed during online backup contain
9184  * full-page writes.
9185  */
9187  recptr = XLogCtl->lastFpwDisableRecPtr;
9189 
9190  if (state->startpoint <= recptr)
9191  ereport(ERROR,
9192  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9193  errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9194  "during online backup"),
9195  errhint("This means that the backup being taken on the standby "
9196  "is corrupt and should not be used. "
9197  "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9198  "and then try an online backup again.")));
9199 
9200 
9201  LWLockAcquire(ControlFileLock, LW_SHARED);
9202  state->stoppoint = ControlFile->minRecoveryPoint;
9204  LWLockRelease(ControlFileLock);
9205  }
9206  else
9207  {
9208  char *history_file;
9209 
9210  /*
9211  * Write the backup-end xlog record
9212  */
9213  XLogBeginInsert();
9214  XLogRegisterData((char *) (&state->startpoint),
9215  sizeof(state->startpoint));
9216  state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9217 
9218  /*
9219  * Given that we're not in recovery, InsertTimeLineID is set and can't
9220  * change, so we can read it without a lock.
9221  */
9222  state->stoptli = XLogCtl->InsertTimeLineID;
9223 
9224  /*
9225  * Force a switch to a new xlog segment file, so that the backup is
9226  * valid as soon as archiver moves out the current segment file.
9227  */
9228  RequestXLogSwitch(false);
9229 
9230  state->stoptime = (pg_time_t) time(NULL);
9231 
9232  /*
9233  * Write the backup history file
9234  */
9235  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9236  BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9237  state->startpoint, wal_segment_size);
9238  fp = AllocateFile(histfilepath, "w");
9239  if (!fp)
9240  ereport(ERROR,
9242  errmsg("could not create file \"%s\": %m",
9243  histfilepath)));
9244 
9245  /* Build and save the contents of the backup history file */
9246  history_file = build_backup_content(state, true);
9247  fprintf(fp, "%s", history_file);
9248  pfree(history_file);
9249 
9250  if (fflush(fp) || ferror(fp) || FreeFile(fp))
9251  ereport(ERROR,
9253  errmsg("could not write file \"%s\": %m",
9254  histfilepath)));
9255 
9256  /*
9257  * Clean out any no-longer-needed history files. As a side effect,
9258  * this will post a .ready file for the newly created history file,
9259  * notifying the archiver that history file may be archived
9260  * immediately.
9261  */
9263  }
9264 
9265  /*
9266  * If archiving is enabled, wait for all the required WAL files to be
9267  * archived before returning. If archiving isn't enabled, the required WAL
9268  * needs to be transported via streaming replication (hopefully with
9269  * wal_keep_size set high enough), or some more exotic mechanism like
9270  * polling and copying files from pg_wal with script. We have no knowledge
9271  * of those mechanisms, so it's up to the user to ensure that he gets all
9272  * the required WAL.
9273  *
9274  * We wait until both the last WAL file filled during backup and the
9275  * history file have been archived, and assume that the alphabetic sorting
9276  * property of the WAL files ensures any earlier WAL files are safely
9277  * archived as well.
9278  *
9279  * We wait forever, since archive_command is supposed to work and we
9280  * assume the admin wanted his backup to work completely. If you don't
9281  * wish to wait, then either waitforarchive should be passed in as false,
9282  * or you can set statement_timeout. Also, some notices are issued to
9283  * clue in anyone who might be doing this interactively.
9284  */
9285 
9286  if (waitforarchive &&
9287  ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9288  (backup_stopped_in_recovery && XLogArchivingAlways())))
9289  {
9290  XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9291  XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9293 
9294  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9295  BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9296  state->startpoint, wal_segment_size);
9297 
9298  seconds_before_warning = 60;
9299  waits = 0;
9300 
9301  while (XLogArchiveIsBusy(lastxlogfilename) ||
9302  XLogArchiveIsBusy(histfilename))
9303  {
9305 
9306  if (!reported_waiting && waits > 5)
9307  {
9308  ereport(NOTICE,
9309  (errmsg("base backup done, waiting for required WAL segments to be archived")));
9310  reported_waiting = true;
9311  }
9312 
9313  (void) WaitLatch(MyLatch,
9315  1000L,
9316  WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9318 
9319  if (++waits >= seconds_before_warning)
9320  {
9321  seconds_before_warning *= 2; /* This wraps in >10 years... */
9322  ereport(WARNING,
9323  (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9324  waits),
9325  errhint("Check that your \"archive_command\" is executing properly. "
9326  "You can safely cancel this backup, "
9327  "but the database backup will not be usable without all the WAL segments.")));
9328  }
9329  }
9330 
9331  ereport(NOTICE,
9332  (errmsg("all required WAL segments have been archived")));
9333  }
9334  else if (waitforarchive)
9335  ereport(NOTICE,
9336  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9337 }
#define NOTICE
Definition: elog.h:35
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2583
int FreeFile(FILE *file)
Definition: fd.c:2781
struct Latch * MyLatch
Definition: globals.c:61
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
static void const char fflush(stdout)
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:73
#define fprintf
Definition: port.h:242
static void CleanupBackupHistory(void)
Definition: xlog.c:4153
#define XLogArchivingActive()
Definition: xlog.h:99
#define XLogArchivingAlways()
Definition: xlog.h:102
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fflush(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9064 of file xlog.c.

9065 {
9066  return sessionBackupState;
9067 }

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4829 of file xlog.c.

4830 {
4831  return ControlFile->wal_level;
4832 }

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4574 of file xlog.c.

4575 {
4577 }
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:515

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

Definition at line 6469 of file xlog.c.

6470 {
6472 
6474 
6475  /*
6476  * If we're writing and flushing WAL, the time line can't be changing, so
6477  * no lock is required.
6478  */
6479  if (insertTLI)
6480  *insertTLI = XLogCtl->InsertTimeLineID;
6481 
6482  return LogwrtResult.Flush;
6483 }
RecoveryState SharedRecoveryState
Definition: xlog.c:514
XLogRecPtr Flush
Definition: xlog.c:326
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:618
static XLogwrtResult LogwrtResult
Definition: xlog.c:610

References Assert, XLogwrtResult::Flush, XLogCtlData::InsertTimeLineID, LogwrtResult, RECOVERY_STATE_DONE, RefreshXLogWriteResult, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by binary_upgrade_logical_slot_has_caught_up(), get_flush_position(), GetCurrentLSN(), GetLatestLSN(), IdentifySystem(), pg_current_wal_flush_lsn(), pg_logical_slot_get_changes_guts(), pg_replication_slot_advance(), read_local_xlog_page_guts(), StartReplication(), WalSndWaitForWal(), XLogSendLogical(), and XLogSendPhysical().

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6437 of file xlog.c.

6438 {
6439  *RedoRecPtr_p = RedoRecPtr;
6440  *doPageWrites_p = doPageWrites;
6441 }
static bool doPageWrites
Definition: xlog.c:284

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 6452 of file xlog.c.

6453 {
6454  XLogRecPtr recptr;
6455 
6457  recptr = XLogCtl->LogwrtRqst.Write;
6459 
6460  return recptr;
6461 }
XLogwrtRqst LogwrtRqst
Definition: xlog.c:453
XLogRecPtr Write
Definition: xlog.c:319

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, XLogwrtRqst::Write, and XLogCtl.

Referenced by CheckpointerMain(), gistvacuumscan(), and IsCheckpointOnSchedule().

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6526 of file xlog.c.

6527 {
6529  int i;
6530 
6531  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6532  {
6533  XLogRecPtr last_important;
6534 
6535  /*
6536  * Need to take a lock to prevent torn reads of the LSN, which are
6537  * possible on some of the supported platforms. WAL insert locks only
6538  * support exclusive mode, so we have to use that.
6539  */
6541  last_important = WALInsertLocks[i].l.lastImportantAt;
6542  LWLockRelease(&WALInsertLocks[i].l.lock);
6543 
6544  if (res < last_important)
6545  res = last_important;
6546  }
6547 
6548  return res;
6549 }
int i
Definition: isn.c:73
XLogRecPtr lastImportantAt
Definition: xlog.c:369
WALInsertLock l
Definition: xlog.c:381
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:567
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:148

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, res, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetMockAuthenticationNonce()

char* GetMockAuthenticationNonce ( void  )

Definition at line 4548 of file xlog.c.

4549 {
4550  Assert(ControlFile != NULL);
4552 }
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:229

References Assert, ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6340 of file xlog.c.

6341 {
6342  RecoveryState retval;
6343 
6345  retval = XLogCtl->SharedRecoveryState;
6347 
6348  return retval;
6349 }

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6407 of file xlog.c.

6408 {
6409  XLogRecPtr ptr;
6410 
6411  /*
6412  * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6413  * grabbed a WAL insertion lock to read the authoritative value in
6414  * Insert->RedoRecPtr, someone might update it just after we've released
6415  * the lock.
6416  */
6418  ptr = XLogCtl->RedoRecPtr;
6420 
6421  if (RedoRecPtr < ptr)
6422  RedoRecPtr = ptr;
6423 
6424  return RedoRecPtr;
6425 }

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

uint64 GetSystemIdentifier ( void  )

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7832 of file xlog.c.

7833 {
7834  XLogRecPtr currpos; /* current write LSN */
7835  XLogSegNo currSeg; /* segid of currpos */
7836  XLogSegNo targetSeg; /* segid of targetLSN */
7837  XLogSegNo oldestSeg; /* actual oldest segid */
7838  XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7839  XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7840  uint64 keepSegs;
7841 
7842  /*
7843  * slot does not reserve WAL. Either deactivated, or has never been active
7844  */
7845  if (XLogRecPtrIsInvalid(targetLSN))
7846  return WALAVAIL_INVALID_LSN;
7847 
7848  /*
7849  * Calculate the oldest segment currently reserved by all slots,
7850  * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7851  * oldestSlotSeg to the current segment.
7852  */
7853  currpos = GetXLogWriteRecPtr();
7854  XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7855  KeepLogSeg(currpos, &oldestSlotSeg);
7856 
7857  /*
7858  * Find the oldest extant segment file. We get 1 until checkpoint removes
7859  * the first WAL segment file since startup, which causes the status being
7860  * wrong under certain abnormal conditions but that doesn't actually harm.
7861  */
7862  oldestSeg = XLogGetLastRemovedSegno() + 1;
7863 
7864  /* calculate oldest segment by max_wal_size */
7865  XLByteToSeg(currpos, currSeg, wal_segment_size);
7867 
7868  if (currSeg > keepSegs)
7869  oldestSegMaxWalSize = currSeg - keepSegs;
7870  else
7871  oldestSegMaxWalSize = 1;
7872 
7873  /* the segment we care about */
7874  XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7875 
7876  /*
7877  * No point in returning reserved or extended status values if the
7878  * targetSeg is known to be lost.
7879  */
7880  if (targetSeg >= oldestSlotSeg)
7881  {
7882  /* show "reserved" when targetSeg is within max_wal_size */
7883  if (targetSeg >= oldestSegMaxWalSize)
7884  return WALAVAIL_RESERVED;
7885 
7886  /* being retained by slots exceeding max_wal_size */
7887  return WALAVAIL_EXTENDED;
7888  }
7889 
7890  /* WAL segments are no longer retained but haven't been removed yet */
7891  if (targetSeg >= oldestSeg)
7892  return WALAVAIL_UNRESERVED;
7893 
7894  /* Definitely lost */
7895  return WALAVAIL_REMOVED;
7896 }
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3750
int max_wal_size_mb
Definition: xlog.c:112
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:601
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9414
@ WALAVAIL_REMOVED
Definition: xlog.h:193

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6490 of file xlog.c.

6491 {
6493 
6494  /* Since the value can't be changing, no lock is required. */
6495  return XLogCtl->InsertTimeLineID;
6496 }

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

Definition at line 6506 of file xlog.c.

6507 {
6508  TimeLineID insertTLI;
6509 
6511  insertTLI = XLogCtl->InsertTimeLineID;
6513 
6514  return insertTLI;
6515 }

References XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by GetLatestLSN().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9398 of file xlog.c.

9399 {
9401  uint64 current_bytepos;
9402 
9403  SpinLockAcquire(&Insert->insertpos_lck);
9404  current_bytepos = Insert->CurrBytePos;
9405  SpinLockRelease(&Insert->insertpos_lck);
9406 
9407  return XLogBytePosToRecPtr(current_bytepos);
9408 }

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

XLogRecPtr GetXLogWriteRecPtr ( void  )

Definition at line 9414 of file xlog.c.

9415 {
9417 
9418  return LogwrtResult.Write;
9419 }
XLogRecPtr Write
Definition: xlog.c:325

References LogwrtResult, RefreshXLogWriteResult, and XLogwrtResult::Write.

Referenced by GetWALAvailability(), pg_attribute_noreturn(), pg_current_wal_lsn(), and pg_get_replication_slots().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4754 of file xlog.c.

4755 {
4757 
4759  {
4760  struct config_generic *guc;
4761 
4762  guc = find_option("wal_consistency_checking", false, false, ERROR);
4763 
4765 
4766  set_config_option_ext("wal_consistency_checking",
4768  guc->scontext, guc->source, guc->srole,
4769  GUC_ACTION_SET, true, ERROR, false);
4770 
4771  /* checking should not be deferred again */
4773  }
4774 }
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1234
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3380
@ GUC_ACTION_SET
Definition: guc.h:199
bool process_shared_preload_libraries_done
Definition: miscinit.c:1779
GucContext scontext
Definition: guc_tables.h:167
GucSource source
Definition: guc_tables.h:165
char * wal_consistency_checking_string
Definition: xlog.c:123
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:164

References Assert, check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9455 of file xlog.c.

9456 {
9457  bool result;
9458 
9459  LWLockAcquire(ControlFileLock, LW_SHARED);
9461  LWLockRelease(ControlFileLock);
9462 
9463  return result;
9464 }
bool InstallXLogFileSegmentActive
Definition: xlog.c:524

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8646 of file xlog.c.

8647 {
8648  char *msg = NULL;
8649  instr_time start;
8650 
8651  Assert(tli != 0);
8652 
8653  /*
8654  * Quick exit if fsync is disabled or write() has already synced the WAL
8655  * file.
8656  */
8657  if (!enableFsync ||
8660  return;
8661 
8662  /* Measure I/O timing to sync the WAL file */
8663  if (track_wal_io_timing)
8665  else
8667 
8668  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8669  switch (wal_sync_method)
8670  {
8671  case WAL_SYNC_METHOD_FSYNC:
8672  if (pg_fsync_no_writethrough(fd) != 0)
8673  msg = _("could not fsync file \"%s\": %m");
8674  break;
8675 #ifdef HAVE_FSYNC_WRITETHROUGH
8677  if (pg_fsync_writethrough(fd) != 0)
8678  msg = _("could not fsync write-through file \"%s\": %m");
8679  break;
8680 #endif
8682  if (pg_fdatasync(fd) != 0)
8683  msg = _("could not fdatasync file \"%s\": %m");
8684  break;
8685  case WAL_SYNC_METHOD_OPEN:
8687  /* not reachable */
8688  Assert(false);
8689  break;
8690  default:
8691  ereport(PANIC,
8692  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8693  errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8694  break;
8695  }
8696 
8697  /* PANIC if failed to fsync */
8698  if (msg)
8699  {
8700  char xlogfname[MAXFNAMELEN];
8701  int save_errno = errno;
8702 
8703  XLogFileName(xlogfname, tli, segno, wal_segment_size);
8704  errno = save_errno;
8705  ereport(PANIC,
8707  errmsg(msg, xlogfname)));
8708  }
8709 
8711 
8712  /*
8713  * Increment the I/O timing and the number of times WAL files were synced.
8714  */
8715  if (track_wal_io_timing)
8716  {
8717  instr_time end;
8718 
8721  }
8722 
8724 }
#define _(x)
Definition: elog.c:90
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:441
int pg_fdatasync(int fd)
Definition: fd.c:480
int pg_fsync_writethrough(int fd)
Definition: fd.c:461
bool enableFsync
Definition: globals.c:127
return str start
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
#define INSTR_TIME_ACCUM_DIFF(x, y, z)
Definition: instr_time.h:184
PgStat_PendingWalStats PendingWalStats
Definition: pgstat_wal.c:24
static int fd(const char *x, int i)
Definition: preproc-init.c:105
instr_time wal_sync_time
Definition: pgstat.h:456
PgStat_Counter wal_sync
Definition: pgstat.h:454
int wal_sync_method
Definition: xlog.c:128
bool track_wal_io_timing
Definition: xlog.c:135

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, MAXFNAMELEN, PANIC, PendingWalStats, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, PgStat_PendingWalStats::wal_sync, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_sync_time, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4816 of file xlog.c.

4817 {
4818  Assert(reset || ControlFile == NULL);
4819  ControlFile = palloc(sizeof(ControlFileData));
4820  ReadControlFile();
4821 }
void reset(void)
Definition: sql-declare.c:600

References Assert, ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6217 of file xlog.c.

6218 {
6219  /*
6220  * We have reached the end of base backup, as indicated by pg_control. The
6221  * data on disk is now consistent (unless minRecoveryPoint is further
6222  * ahead, which can happen if we crashed during previous recovery). Reset
6223  * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6224  * make sure we don't allow starting up at an earlier point even if
6225  * recovery is stopped and restarted soon after this.
6226  */
6227  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6228 
6229  if (ControlFile->minRecoveryPoint < EndRecPtr)
6230  {
6231  ControlFile->minRecoveryPoint = EndRecPtr;
6233  }
6234 
6237  ControlFile->backupEndRequired = false;
6239 
6240  LWLockRelease(ControlFileLock);
6241 }
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
bool backupEndRequired
Definition: pg_control.h:172
XLogRecPtr backupEndPoint
Definition: pg_control.h:171

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6304 of file xlog.c.

6305 {
6306  /*
6307  * We check shared state each time only until we leave recovery mode. We
6308  * can't re-enter recovery, so there's no need to keep checking after the
6309  * shared variable has once been seen false.
6310  */
6312  return false;
6313  else
6314  {
6315  /*
6316  * use volatile pointer to make sure we make a fresh read of the
6317  * shared variable.
6318  */
6319  volatile XLogCtlData *xlogctl = XLogCtl;
6320 
6322 
6323  /*
6324  * Note: We don't need a memory barrier when we're still in recovery.
6325  * We might exit recovery immediately after return, so the caller
6326  * can't rely on 'true' meaning that we're still in recovery anyway.
6327  */
6328 
6329  return LocalRecoveryInProgress;
6330  }
6331 }
static bool LocalRecoveryInProgress
Definition: xlog.c:222

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), btree_index_mainfork_expected(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9384 of file xlog.c.

9385 {
9386  static bool already_done = false;
9387 
9388  if (already_done)
9389  return;
9391  already_done = true;
9392 }
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), DatumGetBool(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3932 of file xlog.c.

3933 {
3934  DIR *xldir;
3935  struct dirent *xlde;
3936  char switchseg[MAXFNAMELEN];
3937  XLogSegNo endLogSegNo;
3938  XLogSegNo switchLogSegNo;
3939  XLogSegNo recycleSegNo;
3940 
3941  /*
3942  * Initialize info about where to begin the work. This will recycle,
3943  * somewhat arbitrarily, 10 future segments.
3944  */
3945  XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3946  XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3947  recycleSegNo = endLogSegNo + 10;
3948 
3949  /*
3950  * Construct a filename of the last segment to be kept.
3951  */
3952  XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3953 
3954  elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3955  switchseg);
3956 
3957  xldir = AllocateDir(XLOGDIR);
3958 
3959  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3960  {
3961  /* Ignore files that are not XLOG segments */
3962  if (!IsXLogFileName(xlde->d_name))
3963  continue;
3964 
3965  /*
3966  * Remove files that are on a timeline older than the new one we're
3967  * switching to, but with a segment number >= the first segment on the
3968  * new timeline.
3969  */
3970  if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
3971  strcmp(xlde->d_name + 8, switchseg + 8) > 0)
3972  {
3973  /*
3974  * If the file has already been marked as .ready, however, don't
3975  * remove it yet. It should be OK to remove it - files that are
3976  * not part of our timeline history are not required for recovery
3977  * - but seems safer to let them be archived and removed later.
3978  */
3979  if (!XLogArchiveIsReady(xlde->d_name))
3980  RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
3981  }
3982  }
3983 
3984  FreeDir(xldir);
3985 }
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:4001
static bool IsXLogFileName(const char *fname)
#define XLOGDIR
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9447 of file xlog.c.

9448 {
9449  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9451  LWLockRelease(ControlFileLock);
9452 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9470 of file xlog.c.

9471 {
9473  XLogCtl->WalWriterSleeping = sleeping;
9475 }
bool WalWriterSleeping
Definition: xlog.c:531

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6572 of file xlog.c.

6573 {
6574  /*
6575  * We should have an aux process resource owner to use, and we should not
6576  * be in a transaction that's installed some other resowner.
6577  */
6579  Assert(CurrentResourceOwner == NULL ||
6582 
6583  /* Don't be chatty in standalone mode */
6585  (errmsg("shutting down")));
6586 
6587  /*
6588  * Signal walsenders to move to stopping state.
6589  */
6591 
6592  /*
6593  * Wait for WAL senders to be in stopping state. This prevents commands
6594  * from writing new WAL.
6595  */
6597 
6598  if (RecoveryInProgress())
6600  else
6601  {
6602  /*
6603  * If archiving is enabled, rotate the last XLOG file so that all the
6604  * remaining records are archived (postmaster wakes up the archiver
6605  * process one more time at the end of shutdown). The checkpoint
6606  * record will go to the next XLOG file and won't be archived (yet).
6607  */
6608  if (XLogArchivingActive())
6609  RequestXLogSwitch(false);
6610 
6612  }
6613 }
bool IsPostmasterEnvironment
Definition: globals.c:117
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:168
void WalSndInitStopping(void)
Definition: walsender.c:3748
void WalSndWaitStopping(void)
Definition: walsender.c:3774
bool CreateRestartPoint(int flags)
Definition: xlog.c:7554
void CreateCheckPoint(int flags)
Definition: xlog.c:6854

References Assert, AuxProcessResourceOwner, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by HandleCheckpointerInterrupts(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5399 of file xlog.c.

5400 {
5402  CheckPoint checkPoint;
5403  bool wasShutdown;
5404  bool didCrash;
5405  bool haveTblspcMap;
5406  bool haveBackupLabel;
5407  XLogRecPtr EndOfLog;
5408  TimeLineID EndOfLogTLI;
5409  TimeLineID newTLI;
5410  bool performedWalRecovery;
5411  EndOfWalRecoveryInfo *endOfRecoveryInfo;
5414  TransactionId oldestActiveXID;
5415  bool promoted = false;
5416 
5417  /*
5418  * We should have an aux process resource owner to use, and we should not
5419  * be in a transaction that's installed some other resowner.
5420  */
5422  Assert(CurrentResourceOwner == NULL ||
5425 
5426  /*
5427  * Check that contents look valid.
5428  */
5430  ereport(FATAL,
5432  errmsg("control file contains invalid checkpoint location")));
5433 
5434  switch (ControlFile->state)
5435  {
5436  case DB_SHUTDOWNED:
5437 
5438  /*
5439  * This is the expected case, so don't be chatty in standalone
5440  * mode
5441  */
5443  (errmsg("database system was shut down at %s",
5444  str_time(ControlFile->time))));
5445  break;
5446 
5448  ereport(LOG,
5449  (errmsg("database system was shut down in recovery at %s",
5450  str_time(ControlFile->time))));
5451  break;
5452 
5453  case DB_SHUTDOWNING:
5454  ereport(LOG,
5455  (errmsg("database system shutdown was interrupted; last known up at %s",
5456  str_time(ControlFile->time))));
5457  break;
5458 
5459  case DB_IN_CRASH_RECOVERY:
5460  ereport(LOG,
5461  (errmsg("database system was interrupted while in recovery at %s",
5463  errhint("This probably means that some data is corrupted and"
5464  " you will have to use the last backup for recovery.")));
5465  break;
5466 
5468  ereport(LOG,
5469  (errmsg("database system was interrupted while in recovery at log time %s",
5471  errhint("If this has occurred more than once some data might be corrupted"
5472  " and you might need to choose an earlier recovery target.")));
5473  break;
5474 
5475  case DB_IN_PRODUCTION:
5476  ereport(LOG,
5477  (errmsg("database system was interrupted; last known up at %s",
5478  str_time(ControlFile->time))));
5479  break;
5480 
5481  default:
5482  ereport(FATAL,
5484  errmsg("control file contains invalid database cluster state")));
5485  }
5486 
5487  /* This is just to allow attaching to startup process with a debugger */
5488 #ifdef XLOG_REPLAY_DELAY
5490  pg_usleep(60000000L);
5491 #endif
5492 
5493  /*
5494  * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5495  * In cases where someone has performed a copy for PITR, these directories
5496  * may have been excluded and need to be re-created.
5497  */
5499 
5500  /* Set up timeout handler needed to report startup progress. */
5504 
5505  /*----------
5506  * If we previously crashed, perform a couple of actions:
5507  *
5508  * - The pg_wal directory may still include some temporary WAL segments
5509  * used when creating a new segment, so perform some clean up to not
5510  * bloat this path. This is done first as there is no point to sync
5511  * this temporary data.
5512  *
5513  * - There might be data which we had written, intending to fsync it, but
5514  * which we had not actually fsync'd yet. Therefore, a power failure in
5515  * the near future might cause earlier unflushed writes to be lost, even
5516  * though more recent data written to disk from here on would be
5517  * persisted. To avoid that, fsync the entire data directory.
5518  */
5519  if (ControlFile->state != DB_SHUTDOWNED &&
5521  {
5524  didCrash = true;
5525  }
5526  else
5527  didCrash = false;
5528 
5529  /*
5530  * Prepare for WAL recovery if needed.
5531  *
5532  * InitWalRecovery analyzes the control file and the backup label file, if
5533  * any. It updates the in-memory ControlFile buffer according to the
5534  * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5535  * It also applies the tablespace map file, if any.
5536  */
5537  InitWalRecovery(ControlFile, &wasShutdown,
5538  &haveBackupLabel, &haveTblspcMap);
5539  checkPoint = ControlFile->checkPointCopy;
5540 
5541  /* initialize shared memory variables from the checkpoint record */
5542  TransamVariables->nextXid = checkPoint.nextXid;
5543  TransamVariables->nextOid = checkPoint.nextOid;
5545  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5546  AdvanceOldestClogXid(checkPoint.oldestXid);
5547  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5548  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5550  checkPoint.newestCommitTsXid);
5551  XLogCtl->ckptFullXid = checkPoint.nextXid;
5552 
5553  /*
5554  * Clear out any old relcache cache files. This is *necessary* if we do
5555  * any WAL replay, since that would probably result in the cache files
5556  * being out of sync with database reality. In theory we could leave them
5557  * in place if the database had been cleanly shut down, but it seems
5558  * safest to just remove them always and let them be rebuilt during the
5559  * first backend startup. These files needs to be removed from all
5560  * directories including pg_tblspc, however the symlinks are created only
5561  * after reading tablespace_map file in case of archive recovery from
5562  * backup, so needs to clear old relcache files here after creating
5563  * symlinks.
5564  */
5566 
5567  /*
5568  * Initialize replication slots, before there's a chance to remove
5569  * required resources.
5570  */
5572 
5573  /*
5574  * Startup logical state, needs to be setup now so we have proper data
5575  * during crash recovery.
5576  */
5578 
5579  /*
5580  * Startup CLOG. This must be done after TransamVariables->nextXid has
5581  * been initialized and before we accept connections or begin WAL replay.
5582  */
5583  StartupCLOG();
5584 
5585  /*
5586  * Startup MultiXact. We need to do this early to be able to replay
5587  * truncations.
5588  */
5589  StartupMultiXact();
5590 
5591  /*
5592  * Ditto for commit timestamps. Activate the facility if the setting is
5593  * enabled in the control file, as there should be no tracking of commit
5594  * timestamps done when the setting was disabled. This facility can be
5595  * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5596  */
5598  StartupCommitTs();
5599 
5600  /*
5601  * Recover knowledge about replay progress of known replication partners.
5602  */
5604 
5605  /*
5606  * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5607  * control file. On recovery, all unlogged relations are blown away, so
5608  * the unlogged LSN counter can be reset too.
5609  */
5613  else
5616 
5617  /*
5618  * Copy any missing timeline history files between 'now' and the recovery
5619  * target timeline from archive to pg_wal. While we don't need those files
5620  * ourselves - the history file of the recovery target timeline covers all
5621  * the previous timelines in the history too - a cascading standby server
5622  * might be interested in them. Or, if you archive the WAL from this
5623  * server to a different archive than the primary, it'd be good for all
5624  * the history files to get archived there after failover, so that you can
5625  * use one of the old timelines as a PITR target. Timeline history files
5626  * are small, so it's better to copy them unnecessarily than not copy them
5627  * and regret later.
5628  */
5630 
5631  /*
5632  * Before running in recovery, scan pg_twophase and fill in its status to
5633  * be able to work on entries generated by redo. Doing a scan before
5634  * taking any recovery action has the merit to discard any 2PC files that
5635  * are newer than the first record to replay, saving from any conflicts at
5636  * replay. This avoids as well any subsequent scans when doing recovery
5637  * of the on-disk two-phase data.
5638  */
5640 
5641  /*
5642  * When starting with crash recovery, reset pgstat data - it might not be
5643  * valid. Otherwise restore pgstat data. It's safe to do this here,
5644  * because postmaster will not yet have started any other processes.
5645  *
5646  * NB: Restoring replication slot stats relies on slot state to have
5647  * already been restored from disk.
5648  *
5649  * TODO: With a bit of extra work we could just start with a pgstat file
5650  * associated with the checkpoint redo location we're starting from.
5651  */
5652  if (didCrash)
5654  else
5656 
5657  lastFullPageWrites = checkPoint.fullPageWrites;
5658 
5661 
5662  /* REDO */
5663  if (InRecovery)
5664  {
5665  /* Initialize state for RecoveryInProgress() */
5667  if (InArchiveRecovery)
5669  else
5672 
5673  /*
5674  * Update pg_control to show that we are recovering and to show the
5675  * selected checkpoint as the place we are starting from. We also mark
5676  * pg_control with any minimum recovery stop point obtained from a
5677  * backup history file.
5678  *
5679  * No need to hold ControlFileLock yet, we aren't up far enough.
5680  */
5682 
5683  /*
5684  * If there was a backup label file, it's done its job and the info
5685  * has now been propagated into pg_control. We must get rid of the
5686  * label file so that if we crash during recovery, we'll pick up at
5687  * the latest recovery restartpoint instead of going all the way back
5688  * to the backup start point. It seems prudent though to just rename
5689  * the file out of the way rather than delete it completely.
5690  */
5691  if (haveBackupLabel)
5692  {
5693  unlink(BACKUP_LABEL_OLD);
5695  }
5696 
5697  /*
5698  * If there was a tablespace_map file, it's done its job and the
5699  * symlinks have been created. We must get rid of the map file so
5700  * that if we crash during recovery, we don't create symlinks again.
5701  * It seems prudent though to just rename the file out of the way
5702  * rather than delete it completely.
5703  */
5704  if (haveTblspcMap)
5705  {
5706  unlink(TABLESPACE_MAP_OLD);
5708  }
5709 
5710  /*
5711  * Initialize our local copy of minRecoveryPoint. When doing crash
5712  * recovery we want to replay up to the end of WAL. Particularly, in
5713  * the case of a promoted standby minRecoveryPoint value in the
5714  * control file is only updated after the first checkpoint. However,
5715  * if the instance crashes before the first post-recovery checkpoint
5716  * is completed then recovery will use a stale location causing the
5717  * startup process to think that there are still invalid page
5718  * references when checking for data consistency.
5719  */
5720  if (InArchiveRecovery)
5721  {
5724  }
5725  else
5726  {
5729  }
5730 
5731  /* Check that the GUCs used to generate the WAL allow recovery */
5733 
5734  /*
5735  * We're in recovery, so unlogged relations may be trashed and must be
5736  * reset. This should be done BEFORE allowing Hot Standby
5737  * connections, so that read-only backends don't try to read whatever
5738  * garbage is left over from before.
5739  */
5741 
5742  /*
5743  * Likewise, delete any saved transaction snapshot files that got left
5744  * behind by crashed backends.
5745  */
5747 
5748  /*
5749  * Initialize for Hot Standby, if enabled. We won't let backends in
5750  * yet, not until we've reached the min recovery point specified in
5751  * control file and we've established a recovery snapshot from a
5752  * running-xacts WAL record.
5753  */
5755  {
5756  TransactionId *xids;
5757  int nxids;
5758 
5759  ereport(DEBUG1,
5760  (errmsg_internal("initializing for hot standby")));
5761 
5763 
5764  if (wasShutdown)
5765  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5766  else
5767  oldestActiveXID = checkPoint.oldestActiveXid;
5768  Assert(TransactionIdIsValid(oldestActiveXID));
5769 
5770  /* Tell procarray about the range of xids it has to deal with */
5772 
5773  /*
5774  * Startup subtrans only. CLOG, MultiXact and commit timestamp
5775  * have already been started up and other SLRUs are not maintained
5776  * during recovery and need not be started yet.
5777  */
5778  StartupSUBTRANS(oldestActiveXID);
5779 
5780  /*
5781  * If we're beginning at a shutdown checkpoint, we know that
5782  * nothing was running on the primary at this point. So fake-up an
5783  * empty running-xacts record and use that here and now. Recover
5784  * additional standby state for prepared transactions.
5785  */
5786  if (wasShutdown)
5787  {
5788  RunningTransactionsData running;
5789  TransactionId latestCompletedXid;
5790 
5791  /* Update pg_subtrans entries for any prepared transactions */
5793 
5794  /*
5795  * Construct a RunningTransactions snapshot representing a
5796  * shut down server, with only prepared transactions still
5797  * alive. We're never overflowed at this point because all
5798  * subxids are listed with their parent prepared transactions.
5799  */
5800  running.xcnt = nxids;
5801  running.subxcnt = 0;
5803  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5804  running.oldestRunningXid = oldestActiveXID;
5805  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5806  TransactionIdRetreat(latestCompletedXid);
5807  Assert(TransactionIdIsNormal(latestCompletedXid));
5808  running.latestCompletedXid = latestCompletedXid;
5809  running.xids = xids;
5810 
5811  ProcArrayApplyRecoveryInfo(&running);
5812  }
5813  }
5814 
5815  /*
5816  * We're all set for replaying the WAL now. Do it.
5817  */
5819  performedWalRecovery = true;
5820  }
5821  else
5822  performedWalRecovery = false;
5823 
5824  /*
5825  * Finish WAL recovery.
5826  */
5827  endOfRecoveryInfo = FinishWalRecovery();
5828  EndOfLog = endOfRecoveryInfo->endOfLog;
5829  EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5830  abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5831  missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5832 
5833  /*
5834  * Reset ps status display, so as no information related to recovery shows
5835  * up.
5836  */
5837  set_ps_display("");
5838 
5839  /*
5840  * When recovering from a backup (we are in recovery, and archive recovery
5841  * was requested), complain if we did not roll forward far enough to reach
5842  * the point where the database is consistent. For regular online
5843  * backup-from-primary, that means reaching the end-of-backup WAL record
5844  * (at which point we reset backupStartPoint to be Invalid), for
5845  * backup-from-replica (which can't inject records into the WAL stream),
5846  * that point is when we reach the minRecoveryPoint in pg_control (which
5847  * we purposefully copy last when backing up from a replica). For
5848  * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5849  * or snapshot-style backups (which don't), backupEndRequired will be set
5850  * to false.
5851  *
5852  * Note: it is indeed okay to look at the local variable
5853  * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5854  * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5855  * been advanced beyond the WAL we processed.
5856  */
5857  if (InRecovery &&
5858  (EndOfLog < LocalMinRecoveryPoint ||
5860  {
5861  /*
5862  * Ran off end of WAL before reaching end-of-backup WAL record, or
5863  * minRecoveryPoint. That's a bad sign, indicating that you tried to
5864  * recover from an online backup but never called pg_backup_stop(), or
5865  * you didn't archive all the WAL needed.
5866  */
5868  {
5870  ereport(FATAL,
5871  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5872  errmsg("WAL ends before end of online backup"),
5873  errhint("All WAL generated while online backup was taken must be available at recovery.")));
5874  else
5875  ereport(FATAL,
5876  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5877  errmsg("WAL ends before consistent recovery point")));
5878  }
5879  }
5880 
5881  /*
5882  * Reset unlogged relations to the contents of their INIT fork. This is
5883  * done AFTER recovery is complete so as to include any unlogged relations
5884  * created during recovery, but BEFORE recovery is marked as having
5885  * completed successfully. Otherwise we'd not retry if any of the post
5886  * end-of-recovery steps fail.
5887  */
5888  if (InRecovery)
5890 
5891  /*
5892  * Pre-scan prepared transactions to find out the range of XIDs present.
5893  * This information is not quite needed yet, but it is positioned here so
5894  * as potential problems are detected before any on-disk change is done.
5895  */
5896  oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5897 
5898  /*
5899  * Allow ordinary WAL segment creation before possibly switching to a new
5900  * timeline, which creates a new segment, and after the last ReadRecord().
5901  */
5903 
5904  /*
5905  * Consider whether we need to assign a new timeline ID.
5906  *
5907  * If we did archive recovery, we always assign a new ID. This handles a
5908  * couple of issues. If we stopped short of the end of WAL during
5909  * recovery, then we are clearly generating a new timeline and must assign
5910  * it a unique new ID. Even if we ran to the end, modifying the current
5911  * last segment is problematic because it may result in trying to
5912  * overwrite an already-archived copy of that segment, and we encourage
5913  * DBAs to make their archive_commands reject that. We can dodge the
5914  * problem by making the new active segment have a new timeline ID.
5915  *
5916  * In a normal crash recovery, we can just extend the timeline we were in.
5917  */
5918  newTLI = endOfRecoveryInfo->lastRecTLI;
5920  {
5921  newTLI = findNewestTimeLine(recoveryTargetTLI) + 1;
5922  ereport(LOG,
5923  (errmsg("selected new timeline ID: %u", newTLI)));
5924 
5925  /*
5926  * Make a writable copy of the last WAL segment. (Note that we also
5927  * have a copy of the last block of the old WAL in
5928  * endOfRecovery->lastPage; we will use that below.)
5929  */
5930  XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
5931 
5932  /*
5933  * Remove the signal files out of the way, so that we don't
5934  * accidentally re-enter archive recovery mode in a subsequent crash.
5935  */
5936  if (endOfRecoveryInfo->standby_signal_file_found)
5938 
5939  if (endOfRecoveryInfo->recovery_signal_file_found)
5941 
5942  /*
5943  * Write the timeline history file, and have it archived. After this
5944  * point (or rather, as soon as the file is archived), the timeline
5945  * will appear as "taken" in the WAL archive and to any standby
5946  * servers. If we crash before actually switching to the new
5947  * timeline, standby servers will nevertheless think that we switched
5948  * to the new timeline, and will try to connect to the new timeline.
5949  * To minimize the window for that, try to do as little as possible
5950  * between here and writing the end-of-recovery record.
5951  */
5953  EndOfLog, endOfRecoveryInfo->recoveryStopReason);
5954 
5955  ereport(LOG,
5956  (errmsg("archive recovery complete")));
5957  }
5958 
5959  /* Save the selected TimeLineID in shared memory, too */
5961  XLogCtl->InsertTimeLineID = newTLI;
5962  XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
5964 
5965  /*
5966  * Actually, if WAL ended in an incomplete record, skip the parts that
5967  * made it through and start writing after the portion that persisted.
5968  * (It's critical to first write an OVERWRITE_CONTRECORD message, which
5969  * we'll do as soon as we're open for writing new WAL.)
5970  */
5972  {
5973  /*
5974  * We should only have a missingContrecPtr if we're not switching to a
5975  * new timeline. When a timeline switch occurs, WAL is copied from the
5976  * old timeline to the new only up to the end of the last complete
5977  * record, so there can't be an incomplete WAL record that we need to
5978  * disregard.
5979  */
5980  Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
5982  EndOfLog = missingContrecPtr;
5983  }
5984 
5985  /*
5986  * Prepare to write WAL starting at EndOfLog location, and init xlog
5987  * buffer cache using the block containing the last record from the
5988  * previous incarnation.
5989  */
5990  Insert = &XLogCtl->Insert;
5991  Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
5992  Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
5993 
5994  /*
5995  * Tricky point here: lastPage contains the *last* block that the LastRec
5996  * record spans, not the one it starts in. The last block is indeed the
5997  * one we want to use.
5998  */
5999  if (EndOfLog % XLOG_BLCKSZ != 0)
6000  {
6001  char *page;
6002  int len;
6003  int firstIdx;
6004 
6005  firstIdx = XLogRecPtrToBufIdx(EndOfLog);
6006  len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6007  Assert(len < XLOG_BLCKSZ);
6008 
6009  /* Copy the valid part of the last block, and zero the rest */
6010  page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6011  memcpy(page, endOfRecoveryInfo->lastPage, len);
6012  memset(page + len, 0, XLOG_BLCKSZ - len);
6013 
6014  pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6015  XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
6016  }
6017  else
6018  {
6019  /*
6020  * There is no partial block to copy. Just set InitializedUpTo, and
6021  * let the first attempt to insert a log record to initialize the next
6022  * buffer.
6023  */
6024  XLogCtl->InitializedUpTo = EndOfLog;
6025  }
6026 
6027  /*
6028  * Update local and shared status. This is OK to do without any locks
6029  * because no other process can be reading or writing WAL yet.
6030  */
6031  LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6035  XLogCtl->LogwrtRqst.Write = EndOfLog;
6036  XLogCtl->LogwrtRqst.Flush = EndOfLog;
6037 
6038  /*
6039  * Preallocate additional log files, if wanted.
6040  */
6041  PreallocXlogFiles(EndOfLog, newTLI);
6042 
6043  /*
6044  * Okay, we're officially UP.
6045  */
6046  InRecovery = false;
6047 
6048  /* start the archive_timeout timer and LSN running */
6049  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6050  XLogCtl->lastSegSwitchLSN = EndOfLog;
6051 
6052  /* also initialize latestCompletedXid, to nextXid - 1 */
6053  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6056  LWLockRelease(ProcArrayLock);
6057 
6058  /*
6059  * Start up subtrans, if not already done for hot standby. (commit
6060  * timestamps are started below, if necessary.)
6061  */
6063  StartupSUBTRANS(oldestActiveXID);
6064 
6065  /*
6066  * Perform end of recovery actions for any SLRUs that need it.
6067  */
6068  TrimCLOG();
6069  TrimMultiXact();
6070 
6071  /*
6072  * Reload shared-memory state for prepared transactions. This needs to
6073  * happen before renaming the last partial segment of the old timeline as
6074  * it may be possible that we have to recovery some transactions from it.
6075  */
6077 
6078  /* Shut down xlogreader */
6080 
6081  /* Enable WAL writes for this backend only. */
6083 
6084  /* If necessary, write overwrite-contrecord before doing anything else */
6086  {
6089  }
6090 
6091  /*
6092  * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6093  * record before resource manager writes cleanup WAL records or checkpoint
6094  * record is written.
6095  */
6096  Insert->fullPageWrites = lastFullPageWrites;
6098 
6099  /*
6100  * Emit checkpoint or end-of-recovery record in XLOG, if required.
6101  */
6102  if (performedWalRecovery)
6103  promoted = PerformRecoveryXLogAction();
6104 
6105  /*
6106  * If any of the critical GUCs have changed, log them before we allow
6107  * backends to write WAL.
6108  */
6110 
6111  /* If this is archive recovery, perform post-recovery cleanup actions. */
6113  CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6114 
6115  /*
6116  * Local WAL inserts enabled, so it's time to finish initialization of
6117  * commit timestamp.
6118  */
6120 
6121  /*
6122  * All done with end-of-recovery actions.
6123  *
6124  * Now allow backends to write WAL and update the control file status in
6125  * consequence. SharedRecoveryState, that controls if backends can write
6126  * WAL, is updated while holding ControlFileLock to prevent other backends
6127  * to look at an inconsistent state of the control file in shared memory.
6128  * There is still a small window during which backends can write WAL and
6129  * the control file is still referring to a system not in DB_IN_PRODUCTION
6130  * state while looking at the on-disk control file.
6131  *
6132  * Also, we use info_lck to update SharedRecoveryState to ensure that
6133  * there are no race conditions concerning visibility of other recent
6134  * updates to shared memory.
6135  */
6136  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6138 
6142 
6144  LWLockRelease(ControlFileLock);
6145 
6146  /*
6147  * Shutdown the recovery environment. This must occur after
6148  * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6149  * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6150  * any session building a snapshot will not rely on KnownAssignedXids as
6151  * RecoveryInProgress() would return false at this stage. This is
6152  * particularly critical for prepared 2PC transactions, that would still
6153  * need to be included in snapshots once recovery has ended.
6154  */
6157 
6158  /*
6159  * If there were cascading standby servers connected to us, nudge any wal
6160  * sender processes to notice that we've been promoted.
6161  */
6162  WalSndWakeup(true, true);
6163 
6164  /*
6165  * If this was a promotion, request an (online) checkpoint now. This isn't
6166  * required for consistency, but the last restartpoint might be far back,
6167  * and in case of a crash, recovering from it might take a longer than is
6168  * appropriate now that we're not in standby mode anymore.
6169  */
6170  if (promoted)
6172 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:478
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:487
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:652
void StartupCLOG(void)
Definition: clog.c:877
void TrimCLOG(void)
Definition: clog.c:892
void StartupCommitTs(void)
Definition: commit_ts.c:632
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:642
#define FATAL
Definition: elog.h:41
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:872
void SyncDataDirectory(void)
Definition: fd.c:3544
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
void TrimMultiXact(void)
Definition: multixact.c:2170
void StartupMultiXact(void)
Definition: multixact.c:2145
void StartupReplicationOrigin(void)
Definition: origin.c:699
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:95
const void size_t len
void pgstat_restore_stats(void)
Definition: pgstat.c:450
void pgstat_discard_stats(void)
Definition: pgstat.c:462
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6801
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:1917
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1567
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:94
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:160
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
bool track_commit_timestamp
Definition: pg_control.h:185
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:111
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:120
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:121
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:109
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
FullTransactionId latestCompletedXid
Definition: transam.h:238
XLogRecPtr InitializedUpTo
Definition: xlog.c:483
char * pages
Definition: xlog.c:490
pg_time_t lastSegSwitchTime
Definition: xlog.c:465
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:466
pg_atomic_uint64 * xlblocks
Definition: xlog.c:491
pg_atomic_uint64 logWriteResult
Definition: xlog.c:470
pg_atomic_uint64 logFlushResult
Definition: xlog.c:471
pg_atomic_uint64 logInsertResult
Definition: xlog.c:469
XLogRecPtr Flush
Definition: xlog.c:320
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:309
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2083
void restoreTwoPhaseData(void)
Definition: twophase.c:1898
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1962
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2042
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3669
void UpdateFullPageWrites(void)
Definition: xlog.c:8129
static char * str_time(pg_time_t tnow)
Definition: xlog.c:5169
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4091
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7403
static void XLogReportParameters(void)
Definition: xlog.c:8066
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6254
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5259
static bool lastFullPageWrites
Definition: xlog.c:215
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1939
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5184
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5355
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:589
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3824
#define TABLESPACE_MAP_OLD
Definition: xlog.h:305
#define TABLESPACE_MAP
Definition: xlog.h:304
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:300
#define BACKUP_LABEL_OLD
Definition: xlog.h:302
#define BACKUP_LABEL_FILE
Definition: xlog.h:301
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:299
#define XRecOffIsValid(xlrp)
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36
void ShutdownWalRecovery(void)
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:137
bool InArchiveRecovery
Definition: xlogrecovery.c:138
void PerformWalRecovery(void)
EndOfWalRecoveryInfo * FinishWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:373
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:372
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:512
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:122
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6179 of file xlog.c.

6180 {
6181  /* initialize minRecoveryPoint to this record */
6182  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6184  if (ControlFile->minRecoveryPoint < EndRecPtr)
6185  {
6186  ControlFile->minRecoveryPoint = EndRecPtr;
6187  ControlFile->minRecoveryPointTLI = replayTLI;
6188  }
6189  /* update local copy */
6192 
6193  /*
6194  * The startup process can update its local copy of minRecoveryPoint from
6195  * this point.
6196  */
6197  updateMinRecoveryPoint = true;
6198 
6200 
6201  /*
6202  * We update SharedRecoveryState while holding the lock on ControlFileLock
6203  * so both states are consistent in shared memory.
6204  */
6208 
6209  LWLockRelease(ControlFileLock);
6210 }
static bool updateMinRecoveryPoint
Definition: xlog.c:646

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8129 of file xlog.c.

8130 {
8132  bool recoveryInProgress;
8133 
8134  /*
8135  * Do nothing if full_page_writes has not been changed.
8136  *
8137  * It's safe to check the shared full_page_writes without the lock,
8138  * because we assume that there is no concurrently running process which
8139  * can update it.
8140  */
8141  if (fullPageWrites == Insert->fullPageWrites)
8142  return;
8143 
8144  /*
8145  * Perform this outside critical section so that the WAL insert
8146  * initialization done by RecoveryInProgress() doesn't trigger an
8147  * assertion failure.
8148  */
8149  recoveryInProgress = RecoveryInProgress();
8150 
8152 
8153  /*
8154  * It's always safe to take full page images, even when not strictly
8155  * required, but not the other round. So if we're setting full_page_writes
8156  * to true, first set it true and then write the WAL record. If we're
8157  * setting it to false, first write the WAL record and then set the global
8158  * flag.
8159  */
8160  if (fullPageWrites)
8161  {
8163  Insert->fullPageWrites = true;
8165  }
8166 
8167  /*
8168  * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8169  * full_page_writes during archive recovery, if required.
8170  */
8171  if (XLogStandbyInfoActive() && !recoveryInProgress)
8172  {
8173  XLogBeginInsert();
8174  XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
8175 
8176  XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8177  }
8178 
8179  if (!fullPageWrites)
8180  {
8182  Insert->fullPageWrites = false;
8184  }
8185  END_CRIT_SECTION();
8186 }
#define XLOG_FPW_CHANGE
Definition: pg_control.h:76

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1746 of file xlog.c.

1748 {
1749  char *pdst = dstbuf;
1750  XLogRecPtr recptr = startptr;
1751  XLogRecPtr inserted;
1752  Size nbytes = count;
1753 
1754  if (RecoveryInProgress() || tli != GetWALInsertionTimeLine())
1755  return 0;
1756 
1757  Assert(!XLogRecPtrIsInvalid(startptr));
1758 
1759  /*
1760  * Caller should ensure that the requested data has been inserted into WAL
1761  * buffers before we try to read it.
1762  */
1764  if (startptr + count > inserted)
1765  ereport(ERROR,
1766  errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
1767  LSN_FORMAT_ARGS(startptr + count),
1768  LSN_FORMAT_ARGS(inserted)));
1769 
1770  /*
1771  * Loop through the buffers without a lock. For each buffer, atomically
1772  * read and verify the end pointer, then copy the data out, and finally
1773  * re-read and re-verify the end pointer.
1774  *
1775  * Once a page is evicted, it never returns to the WAL buffers, so if the
1776  * end pointer matches the expected end pointer before and after we copy
1777  * the data, then the right page must have been present during the data
1778  * copy. Read barriers are necessary to ensure that the data copy actually
1779  * happens between the two verification steps.
1780  *
1781  * If either verification fails, we simply terminate the loop and return
1782  * with the data that had been already copied out successfully.
1783  */
1784  while (nbytes > 0)
1785  {
1786  uint32 offset = recptr % XLOG_BLCKSZ;
1787  int idx = XLogRecPtrToBufIdx(recptr);
1788  XLogRecPtr expectedEndPtr;
1789  XLogRecPtr endptr;
1790  const char *page;
1791  const char *psrc;
1792  Size npagebytes;
1793 
1794  /*
1795  * Calculate the end pointer we expect in the xlblocks array if the
1796  * correct page is present.
1797  */
1798  expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1799 
1800  /*
1801  * First verification step: check that the correct page is present in
1802  * the WAL buffers.
1803  */
1804  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1805  if (expectedEndPtr != endptr)
1806  break;
1807 
1808  /*
1809  * The correct page is present (or was at the time the endptr was
1810  * read; must re-verify later). Calculate pointer to source data and
1811  * determine how much data to read from this page.
1812  */
1813  page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1814  psrc = page + offset;
1815  npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1816 
1817  /*
1818  * Ensure that the data copy and the first verification step are not
1819  * reordered.
1820  */
1821  pg_read_barrier();
1822 
1823  /* data copy */
1824  memcpy(pdst, psrc, npagebytes);
1825 
1826  /*
1827  * Ensure that the data copy and the second verification step are not
1828  * reordered.
1829  */
1830  pg_read_barrier();
1831 
1832  /*
1833  * Second verification step: check that the page we read from wasn't
1834  * evicted while we were copying the data.
1835  */
1836  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1837  if (expectedEndPtr != endptr)
1838  break;
1839 
1840  pdst += npagebytes;
1841  recptr += npagebytes;
1842  nbytes -= npagebytes;
1843  }
1844 
1845  Assert(pdst - dstbuf <= count);
1846 
1847  return pdst - dstbuf;
1848 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define pg_read_barrier()
Definition: atomics.h:149
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:460
#define Min(x, y)
Definition: c.h:1004
size_t Size
Definition: c.h:605
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6490

References Assert, ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ xlog_desc()

void xlog_desc ( StringInfo  buf,
struct XLogReaderState record 
)

Definition at line 58 of file xlogdesc.c.

59 {
60  char *rec = XLogRecGetData(record);
61  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
62 
63  if (info == XLOG_CHECKPOINT_SHUTDOWN ||
64  info == XLOG_CHECKPOINT_ONLINE)
65  {
66  CheckPoint *checkpoint = (CheckPoint *) rec;
67 
68  appendStringInfo(buf, "redo %X/%X; "
69  "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
70  "oldest xid %u in DB %u; oldest multi %u in DB %u; "
71  "oldest/newest commit timestamp xid: %u/%u; "
72  "oldest running xid %u; %s",
73  LSN_FORMAT_ARGS(checkpoint->redo),
74  checkpoint->ThisTimeLineID,
75  checkpoint->PrevTimeLineID,
76  checkpoint->fullPageWrites ? "true" : "false",
77  get_wal_level_string(checkpoint->wal_level),
79  XidFromFullTransactionId(checkpoint->nextXid),
80  checkpoint->nextOid,
81  checkpoint->nextMulti,
82  checkpoint->nextMultiOffset,
83  checkpoint->oldestXid,
84  checkpoint->oldestXidDB,
85  checkpoint->oldestMulti,
86  checkpoint->oldestMultiDB,
87  checkpoint->oldestCommitTsXid,
88  checkpoint->newestCommitTsXid,
89  checkpoint->oldestActiveXid,
90  (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
91  }
92  else if (info == XLOG_NEXTOID)
93  {
94  Oid nextOid;
95 
96  memcpy(&nextOid, rec, sizeof(Oid));
97  appendStringInfo(buf, "%u", nextOid);
98  }
99  else if (info == XLOG_RESTORE_POINT)
100  {
101  xl_restore_point *xlrec = (xl_restore_point *) rec;
102 
104  }
105  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
106  {
107  /* no further information to print */
108  }
109  else if (info == XLOG_BACKUP_END)
110  {
111  XLogRecPtr startpoint;
112 
113  memcpy(&startpoint, rec, sizeof(XLogRecPtr));
114  appendStringInfo(buf, "%X/%X", LSN_FORMAT_ARGS(startpoint));
115  }
116  else if (info == XLOG_PARAMETER_CHANGE)
117  {
118  xl_parameter_change xlrec;
119  const char *wal_level_str;
120 
121  memcpy(&xlrec, rec, sizeof(xl_parameter_change));
123 
124  appendStringInfo(buf, "max_connections=%d max_worker_processes=%d "
125  "max_wal_senders=%d max_prepared_xacts=%d "
126  "max_locks_per_xact=%d wal_level=%s "
127  "wal_log_hints=%s track_commit_timestamp=%s",
128  xlrec.MaxConnections,
129  xlrec.max_worker_processes,
130  xlrec.max_wal_senders,
131  xlrec.max_prepared_xacts,
132  xlrec.max_locks_per_xact,
134  xlrec.wal_log_hints ? "on" : "off",
135  xlrec.track_commit_timestamp ? "on" : "off");
136  }
137  else if (info == XLOG_FPW_CHANGE)
138  {
139  bool fpw;
140 
141  memcpy(&fpw, rec, sizeof(bool));
142  appendStringInfoString(buf, fpw ? "true" : "false");
143  }
144  else if (info == XLOG_END_OF_RECOVERY)
145  {
146  xl_end_of_recovery xlrec;
147 
148  memcpy(&xlrec, rec, sizeof(xl_end_of_recovery));
149  appendStringInfo(buf, "tli %u; prev tli %u; time %s; wal_level %s",
150  xlrec.ThisTimeLineID, xlrec.PrevTimeLineID,
153  }
154  else if (info == XLOG_OVERWRITE_CONTRECORD)
155  {
157 
158  memcpy(&xlrec, rec, sizeof(xl_overwrite_contrecord));
159  appendStringInfo(buf, "lsn %X/%X; time %s",
162  }
163  else if (info == XLOG_CHECKPOINT_REDO)
164  {
165  int wal_level;
166 
167  memcpy(&wal_level, rec, sizeof(int));
169  }
170 }
static const char * wal_level_str(WalLevel wal_level)
unsigned char uint8
Definition: c.h:504
#define XLOG_RESTORE_POINT
Definition: pg_control.h:75
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:81
#define XLOG_FPI
Definition: pg_control.h:79
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:78
#define XLOG_NEXTOID
Definition: pg_control.h:71
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:74
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:77
static char * buf
Definition: pg_test_fsync.c:73
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID
char rp_name[MAXFNAMELEN]
#define EpochFromFullTransactionId(x)
Definition: transam.h:47
static const char * get_wal_level_string(int wal_level)
Definition: xlogdesc.c:40
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLR_INFO_MASK
Definition: xlogrecord.h:62

References appendStringInfo(), appendStringInfoString(), buf, xl_end_of_recovery::end_time, EpochFromFullTransactionId, CheckPoint::fullPageWrites, get_wal_level_string(), LSN_FORMAT_ARGS, xl_parameter_change::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, xl_parameter_change::max_wal_senders, xl_parameter_change::max_worker_processes, xl_parameter_change::MaxConnections, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, CheckPoint::nextXid, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, xl_end_of_recovery::PrevTimeLineID, CheckPoint::PrevTimeLineID, CheckPoint::redo, xl_restore_point::rp_name, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, timestamptz_to_str(), xl_parameter_change::track_commit_timestamp, wal_level, xl_parameter_change::wal_level, xl_end_of_recovery::wal_level, CheckPoint::wal_level, wal_level_str(), xl_parameter_change::wal_log_hints, XidFromFullTransactionId, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLogRecGetData, XLogRecGetInfo, and XLR_INFO_MASK.

◆ xlog_identify()

const char* xlog_identify ( uint8  info)

Definition at line 173 of file xlogdesc.c.

174 {
175  const char *id = NULL;
176 
177  switch (info & ~XLR_INFO_MASK)
178  {
180  id = "CHECKPOINT_SHUTDOWN";
181  break;
183  id = "CHECKPOINT_ONLINE";
184  break;
185  case XLOG_NOOP:
186  id = "NOOP";
187  break;
188  case XLOG_NEXTOID:
189  id = "NEXTOID";
190  break;
191  case XLOG_SWITCH:
192  id = "SWITCH";
193  break;
194  case XLOG_BACKUP_END:
195  id = "BACKUP_END";
196  break;
198  id = "PARAMETER_CHANGE";
199  break;
200  case XLOG_RESTORE_POINT:
201  id = "RESTORE_POINT";
202  break;
203  case XLOG_FPW_CHANGE:
204  id = "FPW_CHANGE";
205  break;
207  id = "END_OF_RECOVERY";
208  break;
210  id = "OVERWRITE_CONTRECORD";
211  break;
212  case XLOG_FPI:
213  id = "FPI";
214  break;
215  case XLOG_FPI_FOR_HINT:
216  id = "FPI_FOR_HINT";
217  break;
219  id = "CHECKPOINT_REDO";
220  break;
221  }
222 
223  return id;
224 }
#define XLOG_NOOP
Definition: pg_control.h:70
#define XLOG_SWITCH
Definition: pg_control.h:72

References XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, and XLR_INFO_MASK.

◆ xlog_redo()

void xlog_redo ( struct XLogReaderState record)

Definition at line 8198 of file xlog.c.

8199 {
8200  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8201  XLogRecPtr lsn = record->EndRecPtr;
8202 
8203  /*
8204  * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8205  * XLOG_FPI_FOR_HINT records.
8206  */
8207  Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8208  !XLogRecHasAnyBlockRefs(record));
8209 
8210  if (info == XLOG_NEXTOID)
8211  {
8212  Oid nextOid;
8213 
8214  /*
8215  * We used to try to take the maximum of TransamVariables->nextOid and
8216  * the recorded nextOid, but that fails if the OID counter wraps
8217  * around. Since no OID allocation should be happening during replay
8218  * anyway, better to just believe the record exactly. We still take
8219  * OidGenLock while setting the variable, just in case.
8220  */
8221  memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8222  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8223  TransamVariables->nextOid = nextOid;
8225  LWLockRelease(OidGenLock);
8226  }
8227  else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8228  {
8229  CheckPoint checkPoint;
8230  TimeLineID replayTLI;
8231 
8232  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8233  /* In a SHUTDOWN checkpoint, believe the counters exactly */
8234  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8235  TransamVariables->nextXid = checkPoint.nextXid;
8236  LWLockRelease(XidGenLock);
8237  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8238  TransamVariables->nextOid = checkPoint.nextOid;
8240  LWLockRelease(OidGenLock);
8241  MultiXactSetNextMXact(checkPoint.nextMulti,
8242  checkPoint.nextMultiOffset);
8243 
8245  checkPoint.oldestMultiDB);
8246 
8247  /*
8248  * No need to set oldestClogXid here as well; it'll be set when we
8249  * redo an xl_clog_truncate if it changed since initialization.
8250  */
8251  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8252 
8253  /*
8254  * If we see a shutdown checkpoint while waiting for an end-of-backup
8255  * record, the backup was canceled and the end-of-backup record will
8256  * never arrive.
8257  */
8261  ereport(PANIC,
8262  (errmsg("online backup was canceled, recovery cannot continue")));
8263 
8264  /*
8265  * If we see a shutdown checkpoint, we know that nothing was running
8266  * on the primary at this point. So fake-up an empty running-xacts
8267  * record and use that here and now. Recover additional standby state
8268  * for prepared transactions.
8269  */
8271  {
8272  TransactionId *xids;
8273  int nxids;
8274  TransactionId oldestActiveXID;
8275  TransactionId latestCompletedXid;
8276  RunningTransactionsData running;
8277 
8278  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8279 
8280  /* Update pg_subtrans entries for any prepared transactions */
8282 
8283  /*
8284  * Construct a RunningTransactions snapshot representing a shut
8285  * down server, with only prepared transactions still alive. We're
8286  * never overflowed at this point because all subxids are listed
8287  * with their parent prepared transactions.
8288  */
8289  running.xcnt = nxids;
8290  running.subxcnt = 0;
8292  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8293  running.oldestRunningXid = oldestActiveXID;
8294  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8295  TransactionIdRetreat(latestCompletedXid);
8296  Assert(TransactionIdIsNormal(latestCompletedXid));
8297  running.latestCompletedXid = latestCompletedXid;
8298  running.xids = xids;
8299 
8300  ProcArrayApplyRecoveryInfo(&running);
8301  }
8302 
8303  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8304  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8305  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8306  LWLockRelease(ControlFileLock);
8307 
8308  /* Update shared-memory copy of checkpoint XID/epoch */
8310  XLogCtl->ckptFullXid = checkPoint.nextXid;
8312 
8313  /*
8314  * We should've already switched to the new TLI before replaying this
8315  * record.
8316  */
8317  (void) GetCurrentReplayRecPtr(&replayTLI);
8318  if (checkPoint.ThisTimeLineID != replayTLI)
8319  ereport(PANIC,
8320  (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8321  checkPoint.ThisTimeLineID, replayTLI)));
8322 
8323  RecoveryRestartPoint(&checkPoint, record);
8324  }
8325  else if (info == XLOG_CHECKPOINT_ONLINE)
8326  {
8327  CheckPoint checkPoint;
8328  TimeLineID replayTLI;
8329 
8330  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8331  /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8332  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8334  checkPoint.nextXid))
8335  TransamVariables->nextXid = checkPoint.nextXid;
8336  LWLockRelease(XidGenLock);
8337 
8338  /*
8339  * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8340  * to track OID assignment through XLOG_NEXTOID records. The nextOid
8341  * counter is from the start of the checkpoint and might well be stale
8342  * compared to later XLOG_NEXTOID records. We could try to take the
8343  * maximum of the nextOid counter and our latest value, but since
8344  * there's no particular guarantee about the speed with which the OID
8345  * counter wraps around, that's a risky thing to do. In any case,
8346  * users of the nextOid counter are required to avoid assignment of
8347  * duplicates, so that a somewhat out-of-date value should be safe.
8348  */
8349 
8350  /* Handle multixact */
8352  checkPoint.nextMultiOffset);
8353 
8354  /*
8355  * NB: This may perform multixact truncation when replaying WAL
8356  * generated by an older primary.
8357  */
8359  checkPoint.oldestMultiDB);
8361  checkPoint.oldestXid))
8362  SetTransactionIdLimit(checkPoint.oldestXid,
8363  checkPoint.oldestXidDB);
8364  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8365  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8366  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8367  LWLockRelease(ControlFileLock);
8368 
8369  /* Update shared-memory copy of checkpoint XID/epoch */
8371  XLogCtl->ckptFullXid = checkPoint.nextXid;
8373 
8374  /* TLI should not change in an on-line checkpoint */
8375  (void) GetCurrentReplayRecPtr(&replayTLI);
8376  if (checkPoint.ThisTimeLineID != replayTLI)
8377  ereport(PANIC,
8378  (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8379  checkPoint.ThisTimeLineID, replayTLI)));
8380 
8381  RecoveryRestartPoint(&checkPoint, record);
8382  }
8383  else if (info == XLOG_OVERWRITE_CONTRECORD)
8384  {
8385  /* nothing to do here, handled in xlogrecovery_redo() */
8386  }
8387  else if (info == XLOG_END_OF_RECOVERY)
8388  {
8389  xl_end_of_recovery xlrec;
8390  TimeLineID replayTLI;
8391 
8392  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8393 
8394  /*
8395  * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8396  * but this case is rarer and harder to test, so the benefit doesn't
8397  * outweigh the potential extra cost of maintenance.
8398  */
8399 
8400  /*
8401  * We should've already switched to the new TLI before replaying this
8402  * record.
8403  */
8404  (void) GetCurrentReplayRecPtr(&replayTLI);
8405  if (xlrec.ThisTimeLineID != replayTLI)
8406  ereport(PANIC,
8407  (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8408  xlrec.ThisTimeLineID, replayTLI)));
8409  }
8410  else if (info == XLOG_NOOP)
8411  {
8412  /* nothing to do here */
8413  }
8414  else if (info == XLOG_SWITCH)
8415  {
8416  /* nothing to do here */
8417  }
8418  else if (info == XLOG_RESTORE_POINT)
8419  {
8420  /* nothing to do here, handled in xlogrecovery.c */
8421  }
8422  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8423  {
8424  /*
8425  * XLOG_FPI records contain nothing else but one or more block
8426  * references. Every block reference must include a full-page image
8427  * even if full_page_writes was disabled when the record was generated
8428  * - otherwise there would be no point in this record.
8429  *
8430  * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8431  * WAL-logged because of a hint bit update. They are only generated
8432  * when checksums and/or wal_log_hints are enabled. They may include
8433  * no full-page images if full_page_writes was disabled when they were
8434  * generated. In this case there is nothing to do here.
8435  *
8436  * No recovery conflicts are generated by these generic records - if a
8437  * resource manager needs to generate conflicts, it has to define a
8438  * separate WAL record type and redo routine.
8439  */
8440  for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8441  {
8442  Buffer buffer;
8443 
8444  if (!XLogRecHasBlockImage(record, block_id))
8445  {
8446  if (info == XLOG_FPI)
8447  elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8448  continue;
8449  }
8450 
8451  if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8452  elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8453  UnlockReleaseBuffer(buffer);
8454  }
8455  }
8456  else if (info == XLOG_BACKUP_END)
8457  {
8458  /* nothing to do here, handled in xlogrecovery_redo() */
8459  }
8460  else if (info == XLOG_PARAMETER_CHANGE)
8461  {
8462  xl_parameter_change xlrec;
8463 
8464  /* Update our copy of the parameters in pg_control */
8465  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8466 
8467  /*
8468  * Invalidate logical slots if we are in hot standby and the primary
8469  * does not have a WAL level sufficient for logical decoding. No need
8470  * to search for potentially conflicting logically slots if standby is
8471  * running with wal_level lower than logical, because in that case, we
8472  * would have either disallowed creation of logical slots or
8473  * invalidated existing ones.
8474  */
8475  if (InRecovery && InHotStandby &&
8476  xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8479  0, InvalidOid,
8481 
8482  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8488  ControlFile->wal_level = xlrec.wal_level;
8490 
8491  /*
8492  * Update minRecoveryPoint to ensure that if recovery is aborted, we
8493  * recover back up to this point before allowing hot standby again.
8494  * This is important if the max_* settings are decreased, to ensure
8495  * you don't run queries against the WAL preceding the change. The
8496  * local copies cannot be updated as long as crash recovery is
8497  * happening and we expect all the WAL to be replayed.
8498  */
8499  if (InArchiveRecovery)
8500  {
8503  }
8505  {
8506  TimeLineID replayTLI;
8507 
8508  (void) GetCurrentReplayRecPtr(&replayTLI);
8510  ControlFile->minRecoveryPointTLI = replayTLI;
8511  }
8512 
8516 
8518  LWLockRelease(ControlFileLock);
8519 
8520  /* Check to see if any parameter change gives a problem on recovery */
8522  }
8523  else if (info == XLOG_FPW_CHANGE)
8524  {
8525  bool fpw;
8526 
8527  memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8528 
8529  /*
8530  * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8531  * do_pg_backup_start() and do_pg_backup_stop() can check whether
8532  * full_page_writes has been disabled during online backup.
8533  */
8534  if (!fpw)
8535  {
8537  if (XLogCtl->lastFpwDisableRecPtr < record->ReadRecPtr)
8540  }
8541 
8542  /* Keep track of full_page_writes */
8543  lastFullPageWrites = fpw;
8544  }
8545  else if (info == XLOG_CHECKPOINT_REDO)
8546  {
8547  /* nothing to do here, just for informational purposes */
8548  }
8549 }
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4953
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:664
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2528
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2503
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:55
int max_worker_processes
Definition: pg_control.h:181
int max_locks_per_xact
Definition: pg_control.h:184
int max_prepared_xacts
Definition: pg_control.h:183
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7513
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60
@ BLK_RESTORED
Definition: xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, XLogRecPtrIsInvalid, and XLR_INFO_MASK.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2982 of file xlog.c.

2983 {
2984  XLogwrtRqst WriteRqst;
2985  bool flexible = true;
2986  static TimestampTz lastflush;
2987  TimestampTz now;
2988  int flushblocks;
2989  TimeLineID insertTLI;
2990 
2991  /* XLOG doesn't need flushing during recovery */
2992  if (RecoveryInProgress())
2993  return false;
2994 
2995  /*
2996  * Since we're not in recovery, InsertTimeLineID is set and can't change,
2997  * so we can read it without a lock.
2998  */
2999  insertTLI = XLogCtl->InsertTimeLineID;
3000 
3001  /* read updated LogwrtRqst */
3003  WriteRqst = XLogCtl->LogwrtRqst;
3005 
3006  /* back off to last completed page boundary */
3007  WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3008 
3009  /* if we have already flushed that far, consider async commit records */
3011  if (WriteRqst.Write <= LogwrtResult.Flush)
3012  {
3014  WriteRqst.Write = XLogCtl->asyncXactLSN;
3016  flexible = false; /* ensure it all gets written */
3017  }
3018 
3019  /*
3020  * If already known flushed, we're done. Just need to check if we are
3021  * holding an open file handle to a logfile that's no longer in use,
3022  * preventing the file from being deleted.
3023  */
3024  if (WriteRqst.Write <= LogwrtResult.Flush)
3025  {
3026  if (openLogFile >= 0)
3027  {
3030  {
3031  XLogFileClose();
3032  }
3033  }
3034  return false;
3035  }
3036 
3037  /*
3038  * Determine how far to flush WAL, based on the wal_writer_delay and
3039  * wal_writer_flush_after GUCs.
3040  *
3041  * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3042  * wal_writer_flush_after, to decide when to wake us up. Make sure the
3043  * logic is the same in both places if you change this.
3044  */
3046  flushblocks =
3047  WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3048 
3049  if (WalWriterFlushAfter == 0 || lastflush == 0)
3050  {
3051  /* first call, or block based limits disabled */
3052  WriteRqst.Flush = WriteRqst.Write;
3053  lastflush = now;
3054  }
3055  else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3056  {
3057  /*
3058  * Flush the writes at least every WalWriterDelay ms. This is
3059  * important to bound the amount of time it takes for an asynchronous
3060  * commit to hit disk.
3061  */
3062  WriteRqst.Flush = WriteRqst.Write;
3063  lastflush = now;
3064  }
3065  else if (flushblocks >= WalWriterFlushAfter)
3066  {
3067  /* exceeded wal_writer_flush_after blocks, flush */
3068  WriteRqst.Flush = WriteRqst.Write;
3069  lastflush = now;
3070  }
3071  else
3072  {
3073  /* no flushing, this time round */
3074  WriteRqst.Flush = 0;
3075  }
3076 
3077 #ifdef WAL_DEBUG
3078  if (XLOG_DEBUG)
3079  elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3080  LSN_FORMAT_ARGS(WriteRqst.Write),
3081  LSN_FORMAT_ARGS(WriteRqst.Flush),
3084 #endif
3085 
3087 
3088  /* now wait for any in-progress insertions to finish and get write lock */
3089  WaitXLogInsertionsToFinish(WriteRqst.Write);
3090  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3092  if (WriteRqst.Write > LogwrtResult.Write ||
3093  WriteRqst.Flush > LogwrtResult.Flush)
3094  {
3095  XLogWrite(WriteRqst, insertTLI, flexible);
3096  }
3097  LWLockRelease(WALWriteLock);
3098 
3099  END_CRIT_SECTION();
3100 
3101  /* wake up walsenders now that we've released heavily contended locks */
3103 
3104  /*
3105  * Great, done. To take some work off the critical path, try to initialize
3106  * as many of the no-longer-needed WAL buffers for future use as we can.
3107  */
3108  AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3109 
3110  /*
3111  * If we determined that we need to write data, but somebody else
3112  * wrote/flushed already, it should be considered as being active, to
3113  * avoid hibernating too early.
3114  */
3115  return true;
3116 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1791
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1619
XLogRecPtr asyncXactLSN
Definition: xlog.c:456
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:66
int WalWriterFlushAfter
Definition: walwriter.c:72
int WalWriterDelay
Definition: walwriter.c:71
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1502
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1983
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2312
static void XLogFileClose(void)
Definition: xlog.c:3631
static XLogSegNo openLogSegNo
Definition: xlog.c:634
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2288 of file xlog.c.

2289 {
2290  XLogSegNo old_segno;
2291 
2293 
2294  if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2295  return true;
2296  return false;
2297 }
int CheckPointSegments
Definition: xlog.c:154

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3372 of file xlog.c.

3373 {
3374  bool ignore_added;
3375  char path[MAXPGPATH];
3376  int fd;
3377 
3378  Assert(logtli != 0);
3379 
3380  fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3381  if (fd >= 0)
3382  return fd;
3383 
3384  /* Now open original target segment (might not be file I just made) */
3385  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3387  if (fd < 0)
3388  ereport(ERROR,
3390  errmsg("could not open file \"%s\": %m", path)));
3391  return fd;
3392 }
#define PG_BINARY
Definition: c.h:1273
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1087
#define O_CLOEXEC
Definition: win32_port.h:359
static int get_sync_bit(int method)
Definition: xlog.c:8556
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3202

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3610 of file xlog.c.

3611 {
3612  char path[MAXPGPATH];
3613  int fd;
3614 
3615  XLogFilePath(path, tli, segno, wal_segment_size);
3616 
3617  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3619  if (fd < 0)
3620  ereport(PANIC,
3622  errmsg("could not open file \"%s\": %m", path)));
3623 
3624  return fd;
3625 }
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2794 of file xlog.c.

2795 {
2796  XLogRecPtr WriteRqstPtr;
2797  XLogwrtRqst WriteRqst;
2798  TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2799 
2800  /*
2801  * During REDO, we are reading not writing WAL. Therefore, instead of
2802  * trying to flush the WAL, we should update minRecoveryPoint instead. We
2803  * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2804  * to act this way too, and because when it tries to write the
2805  * end-of-recovery checkpoint, it should indeed flush.
2806  */
2807  if (!XLogInsertAllowed())
2808  {
2809  UpdateMinRecoveryPoint(record, false);
2810  return;
2811  }
2812 
2813  /* Quick exit if already known flushed */
2814  if (record <= LogwrtResult.Flush)
2815  return;
2816 
2817 #ifdef WAL_DEBUG
2818  if (XLOG_DEBUG)
2819  elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
2820  LSN_FORMAT_ARGS(record),
2823 #endif
2824 
2826 
2827  /*
2828  * Since fsync is usually a horribly expensive operation, we try to
2829  * piggyback as much data as we can on each fsync: if we see any more data
2830  * entered into the xlog buffer, we'll write and fsync that too, so that
2831  * the final value of LogwrtResult.Flush is as large as possible. This
2832  * gives us some chance of avoiding another fsync immediately after.
2833  */
2834 
2835  /* initialize to given target; may increase below */
2836  WriteRqstPtr = record;
2837 
2838  /*
2839  * Now wait until we get the write lock, or someone else does the flush
2840  * for us.
2841  */
2842  for (;;)
2843  {
2844  XLogRecPtr insertpos;
2845 
2846  /* done already? */
2848  if (record <= LogwrtResult.Flush)
2849  break;
2850 
2851  /*
2852  * Before actually performing the write, wait for all in-flight
2853  * insertions to the pages we're about to write to finish.
2854  */
2856  if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2857  WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2859  insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2860 
2861  /*
2862  * Try to get the write lock. If we can't get it immediately, wait
2863  * until it's released, and recheck if we still need to do the flush
2864  * or if the backend that held the lock did it for us already. This
2865  * helps to maintain a good rate of group committing when the system
2866  * is bottlenecked by the speed of fsyncing.
2867  */
2868  if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2869  {
2870  /*
2871  * The lock is now free, but we didn't acquire it yet. Before we
2872  * do, loop back to check if someone else flushed the record for
2873  * us already.
2874  */
2875  continue;
2876  }
2877 
2878  /* Got the lock; recheck whether request is satisfied */
2880  if (record <= LogwrtResult.Flush)
2881  {
2882  LWLockRelease(WALWriteLock);
2883  break;
2884  }
2885 
2886  /*
2887  * Sleep before flush! By adding a delay here, we may give further
2888  * backends the opportunity to join the backlog of group commit
2889  * followers; this can significantly improve transaction throughput,
2890  * at the risk of increasing transaction latency.
2891  *
2892  * We do not sleep if enableFsync is not turned on, nor if there are
2893  * fewer than CommitSiblings other backends with active transactions.
2894  */
2895  if (CommitDelay > 0 && enableFsync &&
2897  {
2899 
2900  /*
2901  * Re-check how far we can now flush the WAL. It's generally not
2902  * safe to call WaitXLogInsertionsToFinish while holding
2903  * WALWriteLock, because an in-progress insertion might need to
2904  * also grab WALWriteLock to make progress. But we know that all
2905  * the insertions up to insertpos have already finished, because
2906  * that's what the earlier WaitXLogInsertionsToFinish() returned.
2907  * We're only calling it again to allow insertpos to be moved
2908  * further forward, not to actually wait for anyone.
2909  */
2910  insertpos = WaitXLogInsertionsToFinish(insertpos);
2911  }
2912 
2913  /* try to write/flush later additions to XLOG as well */
2914  WriteRqst.Write = insertpos;
2915  WriteRqst.Flush = insertpos;
2916 
2917  XLogWrite(WriteRqst, insertTLI, false);
2918 
2919  LWLockRelease(WALWriteLock);
2920  /* done */
2921  break;
2922  }
2923 
2924  END_CRIT_SECTION();
2925 
2926  /* wake up walsenders now that we've released heavily contended locks */
2928 
2929  /*
2930  * If we still haven't flushed to the request point then we have a
2931  * problem; most likely, the requested flush point is past end of XLOG.
2932  * This has been seen to occur when a disk page has a corrupted LSN.
2933  *
2934  * Formerly we treated this as a PANIC condition, but that hurts the
2935  * system's robustness rather than helping it: we do not want to take down
2936  * the whole system due to corruption on one data page. In particular, if
2937  * the bad page is encountered again during recovery then we would be
2938  * unable to restart the database at all! (This scenario actually
2939  * happened in the field several times with 7.1 releases.) As of 8.4, bad
2940  * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2941  * the only time we can reach here during recovery is while flushing the
2942  * end-of-recovery checkpoint record, and we don't expect that to have a
2943  * bad LSN.
2944  *
2945  * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2946  * since xact.c calls this routine inside a critical section. However,
2947  * calls from bufmgr.c are not within critical sections and so we will not
2948  * force a restart for a bad LSN on a data page.
2949  */
2950  if (LogwrtResult.Flush < record)
2951  elog(ERROR,
2952  "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2953  LSN_FORMAT_ARGS(record),
2955 }
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1396
bool MinimumActiveBackends(int min)
Definition: procarray.c:3550
int CommitDelay
Definition: xlog.c:130
int CommitSiblings
Definition: xlog.c:131
bool XLogInsertAllowed(void)
Definition: xlog.c:6359

References CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FlushBuffer(), LogLogicalMessage(), pg_attribute_noreturn(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3750 of file xlog.c.

3751 {
3752  XLogSegNo lastRemovedSegNo;
3753 
3755  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3757 
3758  return lastRemovedSegNo;
3759 }

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3766 of file xlog.c.

3767 {
3768  DIR *xldir;
3769  struct dirent *xlde;
3770  XLogSegNo oldest_segno = 0;
3771 
3772  xldir = AllocateDir(XLOGDIR);
3773  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3774  {
3775  TimeLineID file_tli;
3776  XLogSegNo file_segno;
3777 
3778  /* Ignore files that are not XLOG segments. */
3779  if (!IsXLogFileName(xlde->d_name))
3780  continue;
3781 
3782  /* Parse filename to get TLI and segno. */
3783  XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3785 
3786  /* Ignore anything that's not from the TLI of interest. */
3787  if (tli != file_tli)
3788  continue;
3789 
3790  /* If it's the oldest so far, update oldest_segno. */
3791  if (oldest_segno == 0 || file_segno < oldest_segno)
3792  oldest_segno = file_segno;
3793  }
3794 
3795  FreeDir(xldir);
3796  return oldest_segno;
3797 }
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6359 of file xlog.c.

6360 {
6361  /*
6362  * If value is "unconditionally true" or "unconditionally false", just
6363  * return it. This provides the normal fast path once recovery is known
6364  * done.
6365  */
6366  if (LocalXLogInsertAllowed >= 0)
6367  return (bool) LocalXLogInsertAllowed;
6368 
6369  /*
6370  * Else, must check to see if we're still in recovery.
6371  */
6372  if (RecoveryInProgress())
6373  return false;
6374 
6375  /*
6376  * On exit from recovery, reset to "unconditionally true", since there is
6377  * no need to keep checking.
6378  */
6380  return true;
6381 }

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), and XLogInsertRecord().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( struct XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 746 of file xlog.c.

751 {
753  pg_crc32c rdata_crc;
754  bool inserted;
755  XLogRecord *rechdr = (XLogRecord *) rdata->data;
756  uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
758  XLogRecPtr StartPos;
759  XLogRecPtr EndPos;
760  bool prevDoPageWrites = doPageWrites;
761  TimeLineID insertTLI;
762 
763  /* Does this record type require special handling? */
764  if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
765  {
766  if (info == XLOG_SWITCH)
767  class = WALINSERT_SPECIAL_SWITCH;
768  else if (info == XLOG_CHECKPOINT_REDO)
770  }
771 
772  /* we assume that all of the record header is in the first chunk */
773  Assert(rdata->len >= SizeOfXLogRecord);
774 
775  /* cross-check on whether we should be here or not */
776  if (!XLogInsertAllowed())
777  elog(ERROR, "cannot make new WAL entries during recovery");
778 
779  /*
780  * Given that we're not in recovery, InsertTimeLineID is set and can't
781  * change, so we can read it without a lock.
782  */
783  insertTLI = XLogCtl->InsertTimeLineID;
784 
785  /*----------
786  *
787  * We have now done all the preparatory work we can without holding a
788  * lock or modifying shared state. From here on, inserting the new WAL
789  * record to the shared WAL buffer cache is a two-step process:
790  *
791  * 1. Reserve the right amount of space from the WAL. The current head of
792  * reserved space is kept in Insert->CurrBytePos, and is protected by
793  * insertpos_lck.
794  *
795  * 2. Copy the record to the reserved WAL space. This involves finding the
796  * correct WAL buffer containing the reserved space, and copying the
797  * record in place. This can be done concurrently in multiple processes.
798  *
799  * To keep track of which insertions are still in-progress, each concurrent
800  * inserter acquires an insertion lock. In addition to just indicating that
801  * an insertion is in progress, the lock tells others how far the inserter
802  * has progressed. There is a small fixed number of insertion locks,
803  * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
804  * boundary, it updates the value stored in the lock to the how far it has
805  * inserted, to allow the previous buffer to be flushed.
806  *
807  * Holding onto an insertion lock also protects RedoRecPtr and
808  * fullPageWrites from changing until the insertion is finished.
809  *
810  * Step 2 can usually be done completely in parallel. If the required WAL
811  * page is not initialized yet, you have to grab WALBufMappingLock to
812  * initialize it, but the WAL writer tries to do that ahead of insertions
813  * to avoid that from happening in the critical path.
814  *
815  *----------
816  */
818 
819  if (likely(class == WALINSERT_NORMAL))
820  {
822 
823  /*
824  * Check to see if my copy of RedoRecPtr is out of date. If so, may
825  * have to go back and have the caller recompute everything. This can
826  * only happen just after a checkpoint, so it's better to be slow in
827  * this case and fast otherwise.
828  *
829  * Also check to see if fullPageWrites was just turned on or there's a
830  * running backup (which forces full-page writes); if we weren't
831  * already doing full-page writes then go back and recompute.
832  *
833  * If we aren't doing full-page writes then RedoRecPtr doesn't
834  * actually affect the contents of the XLOG record, so we'll update
835  * our local copy but not force a recomputation. (If doPageWrites was
836  * just turned off, we could recompute the record without full pages,
837  * but we choose not to bother.)
838  */
839  if (RedoRecPtr != Insert->RedoRecPtr)
840  {
841  Assert(RedoRecPtr < Insert->RedoRecPtr);
842  RedoRecPtr = Insert->RedoRecPtr;
843  }
844  doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
845 
846  if (doPageWrites &&
847  (!prevDoPageWrites ||
848  (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
849  {
850  /*
851  * Oops, some buffer now needs to be backed up that the caller
852  * didn't back up. Start over.
853  */
856  return InvalidXLogRecPtr;
857  }
858 
859  /*
860  * Reserve space for the record in the WAL. This also sets the xl_prev
861  * pointer.
862  */
863  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
864  &rechdr->xl_prev);
865 
866  /* Normal records are always inserted. */
867  inserted = true;
868  }
869  else if (class == WALINSERT_SPECIAL_SWITCH)
870  {
871  /*
872  * In order to insert an XLOG_SWITCH record, we need to hold all of
873  * the WAL insertion locks, not just one, so that no one else can
874  * begin inserting a record until we've figured out how much space
875  * remains in the current WAL segment and claimed all of it.
876  *
877  * Nonetheless, this case is simpler than the normal cases handled
878  * below, which must check for changes in doPageWrites and RedoRecPtr.
879  * Those checks are only needed for records that can contain buffer
880  * references, and an XLOG_SWITCH record never does.
881  */
882  Assert(fpw_lsn == InvalidXLogRecPtr);
884  inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
885  }
886  else
887  {
889 
890  /*
891  * We need to update both the local and shared copies of RedoRecPtr,
892  * which means that we need to hold all the WAL insertion locks.
893  * However, there can't be any buffer references, so as above, we need
894  * not check RedoRecPtr before inserting the record; we just need to
895  * update it afterwards.
896  */
897  Assert(fpw_lsn == InvalidXLogRecPtr);
899  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
900  &rechdr->xl_prev);
901  RedoRecPtr = Insert->RedoRecPtr = StartPos;
902  inserted = true;
903  }
904 
905  if (inserted)
906  {
907  /*
908  * Now that xl_prev has been filled in, calculate CRC of the record
909  * header.
910  */
911  rdata_crc = rechdr->xl_crc;
912  COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
913  FIN_CRC32C(rdata_crc);
914  rechdr->xl_crc = rdata_crc;
915 
916  /*
917  * All the record data, including the header, is now ready to be
918  * inserted. Copy the record in the space reserved.
919  */
921  class == WALINSERT_SPECIAL_SWITCH, rdata,
922  StartPos, EndPos, insertTLI);
923 
924  /*
925  * Unless record is flagged as not important, update LSN of last
926  * important record in the current slot. When holding all locks, just
927  * update the first one.
928  */
929  if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
930  {
931  int lockno = holdingAllLocks ? 0 : MyLockNo;
932 
933  WALInsertLocks[lockno].l.lastImportantAt = StartPos;
934  }
935  }
936  else
937  {
938  /*
939  * This was an xlog-switch record, but the current insert location was
940  * already exactly at the beginning of a segment, so there was no need
941  * to do anything.
942  */
943  }
944 
945  /*
946  * Done! Let others know that we're finished.
947  */
949 
951 
953 
954  /*
955  * Mark top transaction id is logged (if needed) so that we should not try
956  * to log it again with the next WAL record in the current subtransaction.
957  */
958  if (topxid_included)
960 
961  /*
962  * Update shared LogwrtRqst.Write, if we crossed page boundary.
963  */
964  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
965  {
967  /* advance global request to include new block(s) */
968  if (XLogCtl->LogwrtRqst.Write < EndPos)
969  XLogCtl->LogwrtRqst.Write = EndPos;
972  }
973 
974  /*
975  * If this was an XLOG_SWITCH record, flush the record and the empty
976  * padding space that fills the rest of the segment, and perform
977  * end-of-segment actions (eg, notifying archiver).
978  */
979  if (class == WALINSERT_SPECIAL_SWITCH)
980  {
981  TRACE_POSTGRESQL_WAL_SWITCH();
982  XLogFlush(EndPos);
983 
984  /*
985  * Even though we reserved the rest of the segment for us, which is
986  * reflected in EndPos, we return a pointer to just the end of the
987  * xlog-switch record.
988  */
989  if (inserted)
990  {
991  EndPos = StartPos + SizeOfXLogRecord;
992  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
993  {
994  uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
995 
996  if (offset == EndPos % XLOG_BLCKSZ)
997  EndPos += SizeOfXLogLongPHD;
998  else
999  EndPos += SizeOfXLogShortPHD;
1000  }
1001  }
1002  }
1003 
1004 #ifdef WAL_DEBUG
1005  if (XLOG_DEBUG)
1006  {
1007  static XLogReaderState *debug_reader = NULL;
1008  XLogRecord *record;
1009  DecodedXLogRecord *decoded;
1011  StringInfoData recordBuf;
1012  char *errormsg = NULL;
1013  MemoryContext oldCxt;
1014 
1015  oldCxt = MemoryContextSwitchTo(walDebugCxt);
1016 
1017  initStringInfo(&buf);
1018  appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
1019 
1020  /*
1021  * We have to piece together the WAL record data from the XLogRecData
1022  * entries, so that we can pass it to the rm_desc function as one
1023  * contiguous chunk.
1024  */
1025  initStringInfo(&recordBuf);
1026  for (; rdata != NULL; rdata = rdata->next)
1027  appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1028 
1029  /* We also need temporary space to decode the record. */
1030  record = (XLogRecord *) recordBuf.data;
1031  decoded = (DecodedXLogRecord *)
1033 
1034  if (!debug_reader)
1035  debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1036  XL_ROUTINE(.page_read = NULL,
1037  .segment_open = NULL,
1038  .segment_close = NULL),
1039  NULL);
1040  if (!debug_reader)
1041  {
1042  appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1043  }
1044  else if (!DecodeXLogRecord(debug_reader,
1045  decoded,
1046  record,
1047  EndPos,
1048  &errormsg))
1049  {
1050  appendStringInfo(&buf, "error decoding record: %s",
1051  errormsg ? errormsg : "no error message");
1052  }
1053  else
1054  {
1055  appendStringInfoString(&buf, " - ");
1056 
1057  debug_reader->record = decoded;
1058  xlog_outdesc(&buf, debug_reader);
1059  debug_reader->record = NULL;
1060  }
1061  elog(LOG, "%s", buf.data);
1062 
1063  pfree(decoded);
1064  pfree(buf.data);
1065  pfree(recordBuf.data);
1066  MemoryContextSwitchTo(oldCxt);
1067  }
1068 #endif
1069 
1070  /*
1071  * Update our global variables
1072  */
1073  ProcLastRecPtr = StartPos;
1074  XactLastRecEnd = EndPos;
1075 
1076  /* Report WAL traffic to the instrumentation. */
1077  if (inserted)
1078  {
1079  pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1081  pgWalUsage.wal_fpi += num_fpi;
1082  }
1083 
1084  return EndPos;
1085 }
#define likely(x)
Definition: c.h:310
#define unlikely(x)
Definition: c.h:311
WalUsage pgWalUsage
Definition: instrument.c:22
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
MemoryContextSwitchTo(old_ctx)
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
struct XLogRecData * next
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:589
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:539
XLogRecPtr XactLastRecEnd
Definition: xlog.c:252
static void WALInsertLockAcquire(void)
Definition: xlog.c:1369
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1223
static bool holdingAllLocks
Definition: xlog.c:650
static int MyLockNo
Definition: xlog.c:649
WalInsertClass
Definition: xlog.c:558
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:560
@ WALINSERT_NORMAL
Definition: xlog.c:559
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:561
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1106
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1162
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:106
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1662
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1629
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3125 of file xlog.c.

3126 {
3127  /*
3128  * During recovery, we don't flush WAL but update minRecoveryPoint
3129  * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3130  * would need to be updated.
3131  */
3132  if (RecoveryInProgress())
3133  {
3134  /*
3135  * An invalid minRecoveryPoint means that we need to recover all the
3136  * WAL, i.e., we're doing crash recovery. We never modify the control
3137  * file's value in that case, so we can short-circuit future checks
3138  * here too. This triggers a quick exit path for the startup process,
3139  * which cannot update its local copy of minRecoveryPoint as long as
3140  * it has not replayed all WAL available when doing crash recovery.
3141  */
3143  updateMinRecoveryPoint = false;
3144 
3145  /* Quick exit if already known to be updated or cannot be updated */
3147  return false;
3148 
3149  /*
3150  * Update local copy of minRecoveryPoint. But if the lock is busy,
3151  * just return a conservative guess.
3152  */
3153  if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3154  return true;
3157  LWLockRelease(ControlFileLock);
3158 
3159  /*
3160  * Check minRecoveryPoint for any other process than the startup
3161  * process doing crash recovery, which should not update the control
3162  * file value if crash recovery is still running.
3163  */
3165  updateMinRecoveryPoint = false;
3166 
3167  /* check again */
3169  return false;
3170  else
3171  return true;
3172  }
3173 
3174  /* Quick exit if already known flushed */
3175  if (record <= LogwrtResult.Flush)
3176  return false;
3177 
3178  /* read LogwrtResult and update local state */
3180 
3181  /* check again */
3182  if (record <= LogwrtResult.Flush)
3183  return false;
3184 
3185  return true;
3186 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RecoveryInProgress(), RefreshXLogWriteResult, updateMinRecoveryPoint, and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), and SetHintBits().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 7986 of file xlog.c.

7987 {
7988  XLogBeginInsert();
7989  XLogRegisterData((char *) (&nextOid), sizeof(Oid));
7990  (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
7991 
7992  /*
7993  * We need not flush the NEXTOID record immediately, because any of the
7994  * just-allocated OIDs could only reach disk as part of a tuple insert or
7995  * update that would have its own XLOG record that must follow the NEXTOID
7996  * record. Therefore, the standard buffer LSN interlock applied to those
7997  * records will ensure no such OID reaches disk before the NEXTOID record
7998  * does.
7999  *
8000  * Note, however, that the above statement only covers state "within" the
8001  * database. When we use a generated OID as a file or directory name, we
8002  * are in a sense violating the basic WAL rule, because that filesystem
8003  * change may reach disk before the NEXTOID WAL record does. The impact
8004  * of this is that if a database crash occurs immediately afterward, we
8005  * might after restart re-generate the same OID and find that it conflicts
8006  * with the leftover file or directory. But since for safety's sake we
8007  * always loop until finding a nonconflicting filename, this poses no real
8008  * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8009  */
8010 }

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 8041 of file xlog.c.

8042 {
8043  XLogRecPtr RecPtr;
8044  xl_restore_point xlrec;
8045 
8046  xlrec.rp_time = GetCurrentTimestamp();
8047  strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8048 
8049  XLogBeginInsert();
8050  XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
8051 
8052  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8053 
8054  ereport(LOG,
8055  (errmsg("restore point \"%s\" created at %X/%X",
8056  rpName, LSN_FORMAT_ARGS(RecPtr))));
8057 
8058  return RecPtr;
8059 }
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2629 of file xlog.c.

2630 {
2631  XLogRecPtr WriteRqstPtr = asyncXactLSN;
2632  bool sleeping;
2633  bool wakeup = false;
2634  XLogRecPtr prevAsyncXactLSN;
2635 
2637  sleeping = XLogCtl->WalWriterSleeping;
2638  prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2639  if (XLogCtl->asyncXactLSN < asyncXactLSN)
2640  XLogCtl->asyncXactLSN = asyncXactLSN;
2642 
2643  /*
2644  * If somebody else already called this function with a more aggressive
2645  * LSN, they will have done what we needed (and perhaps more).
2646  */
2647  if (asyncXactLSN <= prevAsyncXactLSN)
2648  return;
2649 
2650  /*
2651  * If the WALWriter is sleeping, kick it to make it come out of low-power
2652  * mode, so that this async commit will reach disk within the expected
2653  * amount of time. Otherwise, determine whether it has enough WAL
2654  * available to flush, the same way that XLogBackgroundFlush() does.
2655  */
2656  if (sleeping)
2657  wakeup = true;
2658  else
2659  {
2660  int flushblocks;
2661 
2663 
2664  flushblocks =
2665  WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2666 
2667  if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2668  wakeup = true;
2669  }
2670 
2673 }
void SetLatch(Latch *latch)
Definition: latch.c:632
PROC_HDR * ProcGlobal
Definition: proc.c:78
Latch * walwriterLatch
Definition: proc.h:411
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:129

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, XLogCtlData::info_lck, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterLatch, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4888 of file xlog.c.

4889 {
4890  bool foundCFile,
4891  foundXLog;
4892  char *allocptr;
4893  int i;
4894  ControlFileData *localControlFile;
4895 
4896 #ifdef WAL_DEBUG
4897 
4898  /*
4899  * Create a memory context for WAL debugging that's exempt from the normal
4900  * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4901  * an allocation fails, but wal_debug is not for production use anyway.
4902  */
4903  if (walDebugCxt == NULL)
4904  {
4906  "WAL Debug",
4908  MemoryContextAllowInCriticalSection(walDebugCxt, true);
4909  }
4910 #endif
4911 
4912 
4913  XLogCtl = (XLogCtlData *)
4914  ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
4915 
4916  localControlFile = ControlFile;
4918  ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
4919 
4920  if (foundCFile || foundXLog)
4921  {
4922  /* both should be present or neither */
4923  Assert(foundCFile && foundXLog);
4924 
4925  /* Initialize local copy of WALInsertLocks */
4927 
4928  if (localControlFile)
4929  pfree(localControlFile);
4930  return;
4931  }
4932  memset(XLogCtl, 0, sizeof(XLogCtlData));
4933 
4934  /*
4935  * Already have read control file locally, unless in bootstrap mode. Move
4936  * contents into shared memory.
4937  */
4938  if (localControlFile)
4939  {
4940  memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
4941  pfree(localControlFile);
4942  }
4943 
4944  /*
4945  * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
4946  * multiple of the alignment for same, so no extra alignment padding is
4947  * needed here.
4948  */
4949  allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
4950  XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
4951  allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
4952 
4953  for (i = 0; i < XLOGbuffers; i++)
4954  {
4956  }
4957 
4958  /* WAL insertion locks. Ensure they're aligned to the full padded size */
4959  allocptr += sizeof(WALInsertLockPadded) -
4960  ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
4962  (WALInsertLockPadded *) allocptr;
4963  allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
4964 
4965  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
4966  {
4970  }
4971 
4972  /*
4973  * Align the start of the page buffers to a full xlog block size boundary.
4974  * This simplifies some calculations in XLOG insertion. It is also
4975  * required for O_DIRECT.
4976  */
4977  allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
4978  XLogCtl->pages = allocptr;
4979  memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
4980 
4981  /*
4982  * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
4983  * in additional info.)
4984  */
4988  XLogCtl->WalWriterSleeping = false;
4989 
4996 }
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:446
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:186
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:694
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:60
int XLogCacheBlck
Definition: xlog.c:492
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:442
slock_t insertpos_lck
Definition: xlog.c:396
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4838
int XLOGbuffers
Definition: xlog.c:115
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), LWTRANCHE_WAL_INSERT, MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4838 of file xlog.c.

4839 {
4840  Size size;
4841 
4842  /*
4843  * If the value of wal_buffers is -1, use the preferred auto-tune value.
4844  * This isn't an amazingly clean place to do this, but we must wait till
4845  * NBuffers has received its final value, and must do it before using the
4846  * value of XLOGbuffers to do anything important.
4847  *
4848  * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4849  * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4850  * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4851  * the matter with PGC_S_OVERRIDE.
4852  */
4853  if (XLOGbuffers == -1)
4854  {
4855  char buf[32];
4856 
4857  snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4858  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4860  if (XLOGbuffers == -1) /* failed to apply it? */
4861  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4862  PGC_S_OVERRIDE);
4863  }
4864  Assert(XLOGbuffers > 0);
4865 
4866  /* XLogCtl */
4867  size = sizeof(XLogCtlData);
4868 
4869  /* WAL insertion locks, plus alignment */
4871  /* xlblocks array */
4873  /* extra alignment padding for XLOG I/O buffers */
4874  size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4875  /* and the buffers themselves */
4876  size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4877 
4878  /*
4879  * Note: we don't count ControlFileData, it comes out of the "slop factor"
4880  * added by CreateSharedMemoryAndSemaphores. This lets us use this
4881  * routine again below to compute the actual allocation size.
4882  */
4883 
4884  return size;
4885 }
#define Max(x, y)
Definition: c.h:998
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4282
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:110
@ PGC_S_OVERRIDE
Definition: guc.h:119
@ PGC_POSTMASTER
Definition: guc.h:70
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4591

References add_size(), Assert, buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), size, snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9436 of file xlog.c.

9437 {
9438  ShutdownWalRcv();
9439 
9440  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9442  LWLockRelease(ControlFileLock);
9443 }
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

Variable Documentation

◆ CheckPointSegments

PGDLLIMPORT int CheckPointSegments
extern

◆ CheckpointStats

◆ CommitDelay

PGDLLIMPORT int CommitDelay
extern

Definition at line 130 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

PGDLLIMPORT int CommitSiblings
extern

Definition at line 131 of file xlog.c.

Referenced by XLogFlush().

◆ EnableHotStandby

◆ fullPageWrites

PGDLLIMPORT bool fullPageWrites
extern

Definition at line 120 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ log_checkpoints

PGDLLIMPORT bool log_checkpoints
extern

◆ max_slot_wal_keep_size_mb

PGDLLIMPORT int max_slot_wal_keep_size_mb
extern

Definition at line 133 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

PGDLLIMPORT int max_wal_size_mb
extern

◆ min_wal_size_mb

PGDLLIMPORT int min_wal_size_mb
extern

Definition at line 113 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ ProcLastRecPtr

PGDLLIMPORT XLogRecPtr ProcLastRecPtr
extern

◆ track_wal_io_timing

PGDLLIMPORT bool track_wal_io_timing
extern

Definition at line 135 of file xlog.c.

Referenced by issue_xlog_fsync(), and XLogWrite().

◆ wal_compression

PGDLLIMPORT int wal_compression
extern

Definition at line 122 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

PGDLLIMPORT bool* wal_consistency_checking
extern

Definition at line 124 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

PGDLLIMPORT char* wal_consistency_checking_string
extern

Definition at line 123 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

PGDLLIMPORT int wal_decode_buffer_size
extern

Definition at line 134 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

PGDLLIMPORT bool wal_init_zero
extern

Definition at line 125 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

PGDLLIMPORT int wal_keep_size_mb
extern

Definition at line 114 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

PGDLLIMPORT bool wal_log_hints
extern

Definition at line 121 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

PGDLLIMPORT bool wal_recycle
extern

Definition at line 126 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

PGDLLIMPORT int wal_retrieve_retry_interval
extern

◆ wal_segment_size

PGDLLIMPORT int wal_segment_size
extern

Definition at line 141 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

PGDLLIMPORT int wal_sync_method
extern

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

PGDLLIMPORT char* XLogArchiveCommand
extern

◆ XLogArchiveMode

◆ XLogArchiveTimeout

PGDLLIMPORT int XLogArchiveTimeout
extern

Definition at line 116 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

PGDLLIMPORT int XLOGbuffers
extern

Definition at line 115 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().