PostgreSQL Source Code  git master
xlog.h File Reference
#include "access/xlogbackup.h"
#include "access/xlogdefs.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
Include dependency graph for xlog.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  CheckpointStatsData
 

Macros

#define XLogArchivingActive()    (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
 
#define XLogArchivingAlways()    (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
 
#define XLogIsNeeded()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogHintBitIsNeeded()   (DataChecksumsEnabled() || wal_log_hints)
 
#define XLogStandbyInfoActive()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogLogicalInfoActive()   (wal_level >= WAL_LEVEL_LOGICAL)
 
#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */
 
#define CHECKPOINT_END_OF_RECOVERY
 
#define CHECKPOINT_IMMEDIATE   0x0004 /* Do it without delays */
 
#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */
 
#define CHECKPOINT_FLUSH_ALL
 
#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */
 
#define CHECKPOINT_REQUESTED   0x0040 /* Checkpoint request has been made */
 
#define CHECKPOINT_CAUSE_XLOG   0x0080 /* XLOG consumption */
 
#define CHECKPOINT_CAUSE_TIME   0x0100 /* Elapsed time */
 
#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */
 
#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */
 
#define RECOVERY_SIGNAL_FILE   "recovery.signal"
 
#define STANDBY_SIGNAL_FILE   "standby.signal"
 
#define BACKUP_LABEL_FILE   "backup_label"
 
#define BACKUP_LABEL_OLD   "backup_label.old"
 
#define TABLESPACE_MAP   "tablespace_map"
 
#define TABLESPACE_MAP_OLD   "tablespace_map.old"
 
#define PROMOTE_SIGNAL_FILE   "promote"
 

Typedefs

typedef enum ArchiveMode ArchiveMode
 
typedef enum WalLevel WalLevel
 
typedef enum WalCompression WalCompression
 
typedef enum RecoveryState RecoveryState
 
typedef struct CheckpointStatsData CheckpointStatsData
 
typedef enum WALAvailability WALAvailability
 
typedef enum SessionBackupState SessionBackupState
 

Enumerations

enum  WalSyncMethod {
  WAL_SYNC_METHOD_FSYNC = 0 , WAL_SYNC_METHOD_FDATASYNC , WAL_SYNC_METHOD_OPEN , WAL_SYNC_METHOD_FSYNC_WRITETHROUGH ,
  WAL_SYNC_METHOD_OPEN_DSYNC
}
 
enum  ArchiveMode { ARCHIVE_MODE_OFF = 0 , ARCHIVE_MODE_ON , ARCHIVE_MODE_ALWAYS }
 
enum  WalLevel { WAL_LEVEL_MINIMAL = 0 , WAL_LEVEL_REPLICA , WAL_LEVEL_LOGICAL }
 
enum  WalCompression { WAL_COMPRESSION_NONE = 0 , WAL_COMPRESSION_PGLZ , WAL_COMPRESSION_LZ4 , WAL_COMPRESSION_ZSTD }
 
enum  RecoveryState { RECOVERY_STATE_CRASH = 0 , RECOVERY_STATE_ARCHIVE , RECOVERY_STATE_DONE }
 
enum  WALAvailability {
  WALAVAIL_INVALID_LSN , WALAVAIL_RESERVED , WALAVAIL_EXTENDED , WALAVAIL_UNRESERVED ,
  WALAVAIL_REMOVED
}
 
enum  SessionBackupState { SESSION_BACKUP_NONE , SESSION_BACKUP_RUNNING }
 

Functions

XLogRecPtr XLogInsertRecord (struct XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void xlog_redo (struct XLogReaderState *record)
 
void xlog_desc (StringInfo buf, struct XLogReaderState *record)
 
const char * xlog_identify (uint8 info)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
void InitializeWalConsistencyChecking (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
void StartupXLOG (void)
 
void ShutdownXLOG (int code, Datum arg)
 
bool CreateCheckPoint (int flags)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetRedoRecPtr (void)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
void SetWalWriterSleeping (bool sleeping)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void XLogShutdownWalRcv (void)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
SessionBackupState get_backup_status (void)
 

Variables

PGDLLIMPORT int wal_sync_method
 
PGDLLIMPORT XLogRecPtr ProcLastRecPtr
 
PGDLLIMPORT XLogRecPtr XactLastRecEnd
 
PGDLLIMPORT XLogRecPtr XactLastCommitEnd
 
PGDLLIMPORT int wal_segment_size
 
PGDLLIMPORT int min_wal_size_mb
 
PGDLLIMPORT int max_wal_size_mb
 
PGDLLIMPORT int wal_keep_size_mb
 
PGDLLIMPORT int max_slot_wal_keep_size_mb
 
PGDLLIMPORT int XLOGbuffers
 
PGDLLIMPORT int XLogArchiveTimeout
 
PGDLLIMPORT int wal_retrieve_retry_interval
 
PGDLLIMPORT char * XLogArchiveCommand
 
PGDLLIMPORT bool EnableHotStandby
 
PGDLLIMPORT bool fullPageWrites
 
PGDLLIMPORT bool wal_log_hints
 
PGDLLIMPORT int wal_compression
 
PGDLLIMPORT bool wal_init_zero
 
PGDLLIMPORT bool wal_recycle
 
PGDLLIMPORT boolwal_consistency_checking
 
PGDLLIMPORT char * wal_consistency_checking_string
 
PGDLLIMPORT bool log_checkpoints
 
PGDLLIMPORT int CommitDelay
 
PGDLLIMPORT int CommitSiblings
 
PGDLLIMPORT bool track_wal_io_timing
 
PGDLLIMPORT int wal_decode_buffer_size
 
PGDLLIMPORT int CheckPointSegments
 
PGDLLIMPORT int XLogArchiveMode
 
PGDLLIMPORT int wal_level
 
PGDLLIMPORT CheckpointStatsData CheckpointStats
 

Macro Definition Documentation

◆ BACKUP_LABEL_FILE

#define BACKUP_LABEL_FILE   "backup_label"

Definition at line 302 of file xlog.h.

◆ BACKUP_LABEL_OLD

#define BACKUP_LABEL_OLD   "backup_label.old"

Definition at line 303 of file xlog.h.

◆ CHECKPOINT_CAUSE_TIME

#define CHECKPOINT_CAUSE_TIME   0x0100 /* Elapsed time */

Definition at line 149 of file xlog.h.

◆ CHECKPOINT_CAUSE_XLOG

#define CHECKPOINT_CAUSE_XLOG   0x0080 /* XLOG consumption */

Definition at line 148 of file xlog.h.

◆ CHECKPOINT_END_OF_RECOVERY

#define CHECKPOINT_END_OF_RECOVERY
Value:
0x0002 /* Like shutdown checkpoint, but
* issued at end of WAL recovery */

Definition at line 140 of file xlog.h.

◆ CHECKPOINT_FLUSH_ALL

#define CHECKPOINT_FLUSH_ALL
Value:
0x0010 /* Flush all pages, including those
* belonging to unlogged tables */

Definition at line 143 of file xlog.h.

◆ CHECKPOINT_FORCE

#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */

Definition at line 142 of file xlog.h.

◆ CHECKPOINT_IMMEDIATE

#define CHECKPOINT_IMMEDIATE   0x0004 /* Do it without delays */

Definition at line 141 of file xlog.h.

◆ CHECKPOINT_IS_SHUTDOWN

#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */

Definition at line 139 of file xlog.h.

◆ CHECKPOINT_REQUESTED

#define CHECKPOINT_REQUESTED   0x0040 /* Checkpoint request has been made */

Definition at line 146 of file xlog.h.

◆ CHECKPOINT_WAIT

#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */

Definition at line 145 of file xlog.h.

◆ PROMOTE_SIGNAL_FILE

#define PROMOTE_SIGNAL_FILE   "promote"

Definition at line 309 of file xlog.h.

◆ RECOVERY_SIGNAL_FILE

#define RECOVERY_SIGNAL_FILE   "recovery.signal"

Definition at line 300 of file xlog.h.

◆ STANDBY_SIGNAL_FILE

#define STANDBY_SIGNAL_FILE   "standby.signal"

Definition at line 301 of file xlog.h.

◆ TABLESPACE_MAP

#define TABLESPACE_MAP   "tablespace_map"

Definition at line 305 of file xlog.h.

◆ TABLESPACE_MAP_OLD

#define TABLESPACE_MAP_OLD   "tablespace_map.old"

Definition at line 306 of file xlog.h.

◆ XLOG_INCLUDE_ORIGIN

#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */

Definition at line 154 of file xlog.h.

◆ XLOG_MARK_UNIMPORTANT

#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */

Definition at line 155 of file xlog.h.

◆ XLogArchivingActive

Definition at line 99 of file xlog.h.

◆ XLogArchivingAlways

Definition at line 102 of file xlog.h.

◆ XLogHintBitIsNeeded

#define XLogHintBitIsNeeded ( )    (DataChecksumsEnabled() || wal_log_hints)

Definition at line 120 of file xlog.h.

◆ XLogIsNeeded

#define XLogIsNeeded ( )    (wal_level >= WAL_LEVEL_REPLICA)

Definition at line 109 of file xlog.h.

◆ XLogLogicalInfoActive

#define XLogLogicalInfoActive ( )    (wal_level >= WAL_LEVEL_LOGICAL)

Definition at line 126 of file xlog.h.

◆ XLogStandbyInfoActive

#define XLogStandbyInfoActive ( )    (wal_level >= WAL_LEVEL_REPLICA)

Definition at line 123 of file xlog.h.

Typedef Documentation

◆ ArchiveMode

typedef enum ArchiveMode ArchiveMode

◆ CheckpointStatsData

◆ RecoveryState

◆ SessionBackupState

◆ WALAvailability

◆ WalCompression

◆ WalLevel

typedef enum WalLevel WalLevel

Enumeration Type Documentation

◆ ArchiveMode

Enumerator
ARCHIVE_MODE_OFF 
ARCHIVE_MODE_ON 
ARCHIVE_MODE_ALWAYS 

Definition at line 63 of file xlog.h.

64 {
65  ARCHIVE_MODE_OFF = 0, /* disabled */
66  ARCHIVE_MODE_ON, /* enabled while server is running normally */
67  ARCHIVE_MODE_ALWAYS, /* enabled always (even during recovery) */
68 } ArchiveMode;
ArchiveMode
Definition: xlog.h:64
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:67
@ ARCHIVE_MODE_OFF
Definition: xlog.h:65
@ ARCHIVE_MODE_ON
Definition: xlog.h:66

◆ RecoveryState

Enumerator
RECOVERY_STATE_CRASH 
RECOVERY_STATE_ARCHIVE 
RECOVERY_STATE_DONE 

Definition at line 89 of file xlog.h.

90 {
91  RECOVERY_STATE_CRASH = 0, /* crash recovery */
92  RECOVERY_STATE_ARCHIVE, /* archive recovery */
93  RECOVERY_STATE_DONE, /* currently in production */
RecoveryState
Definition: xlog.h:90
@ RECOVERY_STATE_CRASH
Definition: xlog.h:91
@ RECOVERY_STATE_DONE
Definition: xlog.h:93
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:92

◆ SessionBackupState

Enumerator
SESSION_BACKUP_NONE 
SESSION_BACKUP_RUNNING 

Definition at line 285 of file xlog.h.

288 {
@ SESSION_BACKUP_NONE
Definition: xlog.h:287

◆ WALAvailability

Enumerator
WALAVAIL_INVALID_LSN 
WALAVAIL_RESERVED 
WALAVAIL_EXTENDED 
WALAVAIL_UNRESERVED 
WALAVAIL_REMOVED 

Definition at line 187 of file xlog.h.

190 {
191  WALAVAIL_INVALID_LSN, /* parameter error */
192  WALAVAIL_RESERVED, /* WAL segment is within max_wal_size */
193  WALAVAIL_EXTENDED, /* WAL segment is reserved by a slot or
194  * wal_keep_size */
195  WALAVAIL_UNRESERVED, /* no longer reserved, but not removed yet */
@ WALAVAIL_RESERVED
Definition: xlog.h:190
@ WALAVAIL_UNRESERVED
Definition: xlog.h:193
@ WALAVAIL_EXTENDED
Definition: xlog.h:191
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:189

◆ WalCompression

Enumerator
WAL_COMPRESSION_NONE 
WAL_COMPRESSION_PGLZ 
WAL_COMPRESSION_LZ4 
WAL_COMPRESSION_ZSTD 

Definition at line 80 of file xlog.h.

81 {
WalCompression
Definition: xlog.h:81
@ WAL_COMPRESSION_NONE
Definition: xlog.h:82
@ WAL_COMPRESSION_LZ4
Definition: xlog.h:84
@ WAL_COMPRESSION_PGLZ
Definition: xlog.h:83
@ WAL_COMPRESSION_ZSTD
Definition: xlog.h:85

◆ WalLevel

enum WalLevel
Enumerator
WAL_LEVEL_MINIMAL 
WAL_LEVEL_REPLICA 
WAL_LEVEL_LOGICAL 

Definition at line 72 of file xlog.h.

73 {
77 } WalLevel;
WalLevel
Definition: xlog.h:73
@ WAL_LEVEL_REPLICA
Definition: xlog.h:75
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:74

◆ WalSyncMethod

Enumerator
WAL_SYNC_METHOD_FSYNC 
WAL_SYNC_METHOD_FDATASYNC 
WAL_SYNC_METHOD_OPEN 
WAL_SYNC_METHOD_FSYNC_WRITETHROUGH 
WAL_SYNC_METHOD_OPEN_DSYNC 

Definition at line 22 of file xlog.h.

23 {
26  WAL_SYNC_METHOD_OPEN, /* for O_SYNC */
28  WAL_SYNC_METHOD_OPEN_DSYNC /* for O_DSYNC */
29 };
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

Function Documentation

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5026 of file xlog.c.

5027 {
5028  CheckPoint checkPoint;
5029  char *buffer;
5030  XLogPageHeader page;
5031  XLogLongPageHeader longpage;
5032  XLogRecord *record;
5033  char *recptr;
5034  uint64 sysidentifier;
5035  struct timeval tv;
5036  pg_crc32c crc;
5037 
5038  /* allow ordinary WAL segment creation, like StartupXLOG() would */
5040 
5041  /*
5042  * Select a hopefully-unique system identifier code for this installation.
5043  * We use the result of gettimeofday(), including the fractional seconds
5044  * field, as being about as unique as we can easily get. (Think not to
5045  * use random(), since it hasn't been seeded and there's no portable way
5046  * to seed it other than the system clock value...) The upper half of the
5047  * uint64 value is just the tv_sec part, while the lower half contains the
5048  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5049  * PID for a little extra uniqueness. A person knowing this encoding can
5050  * determine the initialization time of the installation, which could
5051  * perhaps be useful sometimes.
5052  */
5053  gettimeofday(&tv, NULL);
5054  sysidentifier = ((uint64) tv.tv_sec) << 32;
5055  sysidentifier |= ((uint64) tv.tv_usec) << 12;
5056  sysidentifier |= getpid() & 0xFFF;
5057 
5058  /* page buffer must be aligned suitably for O_DIRECT */
5059  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5060  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5061  memset(page, 0, XLOG_BLCKSZ);
5062 
5063  /*
5064  * Set up information for the initial checkpoint record
5065  *
5066  * The initial checkpoint record is written to the beginning of the WAL
5067  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5068  * used, so that we can use 0/0 to mean "before any valid WAL segment".
5069  */
5070  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
5071  checkPoint.ThisTimeLineID = BootstrapTimeLineID;
5072  checkPoint.PrevTimeLineID = BootstrapTimeLineID;
5073  checkPoint.fullPageWrites = fullPageWrites;
5074  checkPoint.wal_level = wal_level;
5075  checkPoint.nextXid =
5077  checkPoint.nextOid = FirstGenbkiObjectId;
5078  checkPoint.nextMulti = FirstMultiXactId;
5079  checkPoint.nextMultiOffset = 0;
5080  checkPoint.oldestXid = FirstNormalTransactionId;
5081  checkPoint.oldestXidDB = Template1DbOid;
5082  checkPoint.oldestMulti = FirstMultiXactId;
5083  checkPoint.oldestMultiDB = Template1DbOid;
5086  checkPoint.time = (pg_time_t) time(NULL);
5088 
5089  TransamVariables->nextXid = checkPoint.nextXid;
5090  TransamVariables->nextOid = checkPoint.nextOid;
5092  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5093  AdvanceOldestClogXid(checkPoint.oldestXid);
5094  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5095  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5097 
5098  /* Set up the XLOG page header */
5099  page->xlp_magic = XLOG_PAGE_MAGIC;
5100  page->xlp_info = XLP_LONG_HEADER;
5101  page->xlp_tli = BootstrapTimeLineID;
5103  longpage = (XLogLongPageHeader) page;
5104  longpage->xlp_sysid = sysidentifier;
5105  longpage->xlp_seg_size = wal_segment_size;
5106  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5107 
5108  /* Insert the initial checkpoint record */
5109  recptr = ((char *) page + SizeOfXLogLongPHD);
5110  record = (XLogRecord *) recptr;
5111  record->xl_prev = 0;
5112  record->xl_xid = InvalidTransactionId;
5113  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5115  record->xl_rmid = RM_XLOG_ID;
5116  recptr += SizeOfXLogRecord;
5117  /* fill the XLogRecordDataHeaderShort struct */
5118  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5119  *(recptr++) = sizeof(checkPoint);
5120  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5121  recptr += sizeof(checkPoint);
5122  Assert(recptr - (char *) record == record->xl_tot_len);
5123 
5124  INIT_CRC32C(crc);
5125  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5126  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5127  FIN_CRC32C(crc);
5128  record->xl_crc = crc;
5129 
5130  /* Create first XLOG segment file */
5133 
5134  /*
5135  * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5136  * close the file again in a moment.
5137  */
5138 
5139  /* Write the first page with the initial record */
5140  errno = 0;
5141  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5142  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5143  {
5144  /* if write didn't set errno, assume problem is no disk space */
5145  if (errno == 0)
5146  errno = ENOSPC;
5147  ereport(PANIC,
5149  errmsg("could not write bootstrap write-ahead log file: %m")));
5150  }
5152 
5153  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5154  if (pg_fsync(openLogFile) != 0)
5155  ereport(PANIC,
5157  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5159 
5160  if (close(openLogFile) != 0)
5161  ereport(PANIC,
5163  errmsg("could not close bootstrap write-ahead log file: %m")));
5164 
5165  openLogFile = -1;
5166 
5167  /* Now create pg_control */
5168  InitControlFile(sysidentifier, data_checksum_version);
5169  ControlFile->time = checkPoint.time;
5170  ControlFile->checkPoint = checkPoint.redo;
5171  ControlFile->checkPointCopy = checkPoint;
5172 
5173  /* some additional ControlFile fields are set in WriteControlFile() */
5174  WriteControlFile();
5175 
5176  /* Bootstrap the commit log, too */
5177  BootStrapCLOG();
5181 
5182  pfree(buffer);
5183 
5184  /*
5185  * Force control file to be read - in contrast to normal processing we'd
5186  * otherwise never run the checks and GUC related initializations therein.
5187  */
5188  ReadControlFile();
5189 }
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:795
#define Assert(condition)
Definition: c.h:849
void BootStrapCLOG(void)
Definition: clog.c:833
void BootStrapCommitTs(void)
Definition: commit_ts.c:596
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:909
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
int pg_fsync(int fd)
Definition: fd.c:386
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2328
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2362
void BootStrapMultiXact(void)
Definition: multixact.c:2034
#define FirstMultiXactId
Definition: multixact.h:25
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:68
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:51
MultiXactId oldestMulti
Definition: pg_control.h:50
MultiXactOffset nextMultiOffset
Definition: pg_control.h:47
TransactionId newestCommitTsXid
Definition: pg_control.h:55
TransactionId oldestXid
Definition: pg_control.h:48
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:45
TransactionId oldestActiveXid
Definition: pg_control.h:64
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:46
FullTransactionId nextXid
Definition: pg_control.h:44
TransactionId oldestCommitTsXid
Definition: pg_control.h:53
pg_time_t time
Definition: pg_control.h:52
int wal_level
Definition: pg_control.h:43
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:49
CheckPoint checkPointCopy
Definition: pg_control.h:135
pg_time_t time
Definition: pg_control.h:132
XLogRecPtr checkPoint
Definition: pg_control.h:133
FullTransactionId nextXid
Definition: transam.h:220
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:270
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3373
bool fullPageWrites
Definition: xlog.c:121
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition: xlog.c:4197
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9483
static int openLogFile
Definition: xlog.c:634
int wal_level
Definition: xlog.c:130
static void WriteControlFile(void)
Definition: xlog.c:4232
int wal_segment_size
Definition: xlog.c:142
static TimeLineID openLogTLI
Definition: xlog.c:636
static ControlFileData * ControlFile
Definition: xlog.c:573
#define BootstrapTimeLineID
Definition: xlog.c:110
static void ReadControlFile(void)
Definition: xlog.c:4314
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_level, CheckPoint::wal_level, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3720 of file xlog.c.

3721 {
3722  int save_errno = errno;
3723  XLogSegNo lastRemovedSegNo;
3724 
3726  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3728 
3729  if (segno <= lastRemovedSegNo)
3730  {
3731  char filename[MAXFNAMELEN];
3732 
3733  XLogFileName(filename, tli, segno, wal_segment_size);
3734  errno = save_errno;
3735  ereport(ERROR,
3737  errmsg("requested WAL segment %s has already been removed",
3738  filename)));
3739  }
3740  errno = save_errno;
3741 }
#define ERROR
Definition: elog.h:39
static char * filename
Definition: pg_dumpall.c:119
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
slock_t info_lck
Definition: xlog.c:552
XLogSegNo lastRemovedSegNo
Definition: xlog.c:460
static XLogCtlData * XLogCtl
Definition: xlog.c:565
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
uint64 XLogSegNo
Definition: xlogdefs.h:48

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 6888 of file xlog.c.

6889 {
6890  bool shutdown;
6891  CheckPoint checkPoint;
6892  XLogRecPtr recptr;
6893  XLogSegNo _logSegNo;
6895  uint32 freespace;
6896  XLogRecPtr PriorRedoPtr;
6897  XLogRecPtr last_important_lsn;
6898  VirtualTransactionId *vxids;
6899  int nvxids;
6900  int oldXLogAllowed = 0;
6901 
6902  /*
6903  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6904  * issued at a different time.
6905  */
6907  shutdown = true;
6908  else
6909  shutdown = false;
6910 
6911  /* sanity check */
6912  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6913  elog(ERROR, "can't create a checkpoint during recovery");
6914 
6915  /*
6916  * Prepare to accumulate statistics.
6917  *
6918  * Note: because it is possible for log_checkpoints to change while a
6919  * checkpoint proceeds, we always accumulate stats, even if
6920  * log_checkpoints is currently off.
6921  */
6922  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
6924 
6925  /*
6926  * Let smgr prepare for checkpoint; this has to happen outside the
6927  * critical section and before we determine the REDO pointer. Note that
6928  * smgr must not do anything that'd have to be undone if we decide no
6929  * checkpoint is needed.
6930  */
6932 
6933  /*
6934  * Use a critical section to force system panic if we have trouble.
6935  */
6937 
6938  if (shutdown)
6939  {
6940  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6943  LWLockRelease(ControlFileLock);
6944  }
6945 
6946  /* Begin filling in the checkpoint WAL record */
6947  MemSet(&checkPoint, 0, sizeof(checkPoint));
6948  checkPoint.time = (pg_time_t) time(NULL);
6949 
6950  /*
6951  * For Hot Standby, derive the oldestActiveXid before we fix the redo
6952  * pointer. This allows us to begin accumulating changes to assemble our
6953  * starting snapshot of locks and transactions.
6954  */
6955  if (!shutdown && XLogStandbyInfoActive())
6957  else
6959 
6960  /*
6961  * Get location of last important record before acquiring insert locks (as
6962  * GetLastImportantRecPtr() also locks WAL locks).
6963  */
6964  last_important_lsn = GetLastImportantRecPtr();
6965 
6966  /*
6967  * If this isn't a shutdown or forced checkpoint, and if there has been no
6968  * WAL activity requiring a checkpoint, skip it. The idea here is to
6969  * avoid inserting duplicate checkpoints when the system is idle.
6970  */
6972  CHECKPOINT_FORCE)) == 0)
6973  {
6974  if (last_important_lsn == ControlFile->checkPoint)
6975  {
6976  END_CRIT_SECTION();
6977  ereport(DEBUG1,
6978  (errmsg_internal("checkpoint skipped because system is idle")));
6979  return false;
6980  }
6981  }
6982 
6983  /*
6984  * An end-of-recovery checkpoint is created before anyone is allowed to
6985  * write WAL. To allow us to write the checkpoint record, temporarily
6986  * enable XLogInsertAllowed.
6987  */
6988  if (flags & CHECKPOINT_END_OF_RECOVERY)
6989  oldXLogAllowed = LocalSetXLogInsertAllowed();
6990 
6991  checkPoint.ThisTimeLineID = XLogCtl->InsertTimeLineID;
6992  if (flags & CHECKPOINT_END_OF_RECOVERY)
6993  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
6994  else
6995  checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
6996 
6997  /*
6998  * We must block concurrent insertions while examining insert state.
6999  */
7001 
7002  checkPoint.fullPageWrites = Insert->fullPageWrites;
7003  checkPoint.wal_level = wal_level;
7004 
7005  if (shutdown)
7006  {
7007  XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
7008 
7009  /*
7010  * Compute new REDO record ptr = location of next XLOG record.
7011  *
7012  * Since this is a shutdown checkpoint, there can't be any concurrent
7013  * WAL insertion.
7014  */
7015  freespace = INSERT_FREESPACE(curInsert);
7016  if (freespace == 0)
7017  {
7018  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
7019  curInsert += SizeOfXLogLongPHD;
7020  else
7021  curInsert += SizeOfXLogShortPHD;
7022  }
7023  checkPoint.redo = curInsert;
7024 
7025  /*
7026  * Here we update the shared RedoRecPtr for future XLogInsert calls;
7027  * this must be done while holding all the insertion locks.
7028  *
7029  * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7030  * left pointing past where it really needs to point. This is okay;
7031  * the only consequence is that XLogInsert might back up whole buffers
7032  * that it didn't really need to. We can't postpone advancing
7033  * RedoRecPtr because XLogInserts that happen while we are dumping
7034  * buffers must assume that their buffer changes are not included in
7035  * the checkpoint.
7036  */
7037  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7038  }
7039 
7040  /*
7041  * Now we can release the WAL insertion locks, allowing other xacts to
7042  * proceed while we are flushing disk buffers.
7043  */
7045 
7046  /*
7047  * If this is an online checkpoint, we have not yet determined the redo
7048  * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7049  * record; the LSN at which it starts becomes the new redo pointer. We
7050  * don't do this for a shutdown checkpoint, because in that case no WAL
7051  * can be written between the redo point and the insertion of the
7052  * checkpoint record itself, so the checkpoint record itself serves to
7053  * mark the redo point.
7054  */
7055  if (!shutdown)
7056  {
7057  /* Include WAL level in record for WAL summarizer's benefit. */
7058  XLogBeginInsert();
7059  XLogRegisterData((char *) &wal_level, sizeof(wal_level));
7060  (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
7061 
7062  /*
7063  * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7064  * shared memory and RedoRecPtr in backend-local memory, but we need
7065  * to copy that into the record that will be inserted when the
7066  * checkpoint is complete.
7067  */
7068  checkPoint.redo = RedoRecPtr;
7069  }
7070 
7071  /* Update the info_lck-protected copy of RedoRecPtr as well */
7073  XLogCtl->RedoRecPtr = checkPoint.redo;
7075 
7076  /*
7077  * If enabled, log checkpoint start. We postpone this until now so as not
7078  * to log anything if we decided to skip the checkpoint.
7079  */
7080  if (log_checkpoints)
7081  LogCheckpointStart(flags, false);
7082 
7083  /* Update the process title */
7084  update_checkpoint_display(flags, false, false);
7085 
7086  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7087 
7088  /*
7089  * Get the other info we need for the checkpoint record.
7090  *
7091  * We don't need to save oldestClogXid in the checkpoint, it only matters
7092  * for the short period in which clog is being truncated, and if we crash
7093  * during that we'll redo the clog truncation and fix up oldestClogXid
7094  * there.
7095  */
7096  LWLockAcquire(XidGenLock, LW_SHARED);
7097  checkPoint.nextXid = TransamVariables->nextXid;
7098  checkPoint.oldestXid = TransamVariables->oldestXid;
7100  LWLockRelease(XidGenLock);
7101 
7102  LWLockAcquire(CommitTsLock, LW_SHARED);
7105  LWLockRelease(CommitTsLock);
7106 
7107  LWLockAcquire(OidGenLock, LW_SHARED);
7108  checkPoint.nextOid = TransamVariables->nextOid;
7109  if (!shutdown)
7110  checkPoint.nextOid += TransamVariables->oidCount;
7111  LWLockRelease(OidGenLock);
7112 
7113  MultiXactGetCheckptMulti(shutdown,
7114  &checkPoint.nextMulti,
7115  &checkPoint.nextMultiOffset,
7116  &checkPoint.oldestMulti,
7117  &checkPoint.oldestMultiDB);
7118 
7119  /*
7120  * Having constructed the checkpoint record, ensure all shmem disk buffers
7121  * and commit-log buffers are flushed to disk.
7122  *
7123  * This I/O could fail for various reasons. If so, we will fail to
7124  * complete the checkpoint, but there is no reason to force a system
7125  * panic. Accordingly, exit critical section while doing it.
7126  */
7127  END_CRIT_SECTION();
7128 
7129  /*
7130  * In some cases there are groups of actions that must all occur on one
7131  * side or the other of a checkpoint record. Before flushing the
7132  * checkpoint record we must explicitly wait for any backend currently
7133  * performing those groups of actions.
7134  *
7135  * One example is end of transaction, so we must wait for any transactions
7136  * that are currently in commit critical sections. If an xact inserted
7137  * its commit record into XLOG just before the REDO point, then a crash
7138  * restart from the REDO point would not replay that record, which means
7139  * that our flushing had better include the xact's update of pg_xact. So
7140  * we wait till he's out of his commit critical section before proceeding.
7141  * See notes in RecordTransactionCommit().
7142  *
7143  * Because we've already released the insertion locks, this test is a bit
7144  * fuzzy: it is possible that we will wait for xacts we didn't really need
7145  * to wait for. But the delay should be short and it seems better to make
7146  * checkpoint take a bit longer than to hold off insertions longer than
7147  * necessary. (In fact, the whole reason we have this issue is that xact.c
7148  * does commit record XLOG insertion and clog update as two separate steps
7149  * protected by different locks, but again that seems best on grounds of
7150  * minimizing lock contention.)
7151  *
7152  * A transaction that has not yet set delayChkptFlags when we look cannot
7153  * be at risk, since it has not inserted its commit record yet; and one
7154  * that's already cleared it is not at risk either, since it's done fixing
7155  * clog and we will correctly flush the update below. So we cannot miss
7156  * any xacts we need to wait for.
7157  */
7159  if (nvxids > 0)
7160  {
7161  do
7162  {
7163  /*
7164  * Keep absorbing fsync requests while we wait. There could even
7165  * be a deadlock if we don't, if the process that prevents the
7166  * checkpoint is trying to add a request to the queue.
7167  */
7169 
7170  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7171  pg_usleep(10000L); /* wait for 10 msec */
7173  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7175  }
7176  pfree(vxids);
7177 
7178  CheckPointGuts(checkPoint.redo, flags);
7179 
7181  if (nvxids > 0)
7182  {
7183  do
7184  {
7186 
7187  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7188  pg_usleep(10000L); /* wait for 10 msec */
7190  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7192  }
7193  pfree(vxids);
7194 
7195  /*
7196  * Take a snapshot of running transactions and write this to WAL. This
7197  * allows us to reconstruct the state of running transactions during
7198  * archive recovery, if required. Skip, if this info disabled.
7199  *
7200  * If we are shutting down, or Startup process is completing crash
7201  * recovery we don't need to write running xact data.
7202  */
7203  if (!shutdown && XLogStandbyInfoActive())
7205 
7207 
7208  /*
7209  * Now insert the checkpoint record into XLOG.
7210  */
7211  XLogBeginInsert();
7212  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
7213  recptr = XLogInsert(RM_XLOG_ID,
7214  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7216 
7217  XLogFlush(recptr);
7218 
7219  /*
7220  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7221  * overwritten at next startup. No-one should even try, this just allows
7222  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7223  * to just temporarily disable writing until the system has exited
7224  * recovery.
7225  */
7226  if (shutdown)
7227  {
7228  if (flags & CHECKPOINT_END_OF_RECOVERY)
7229  LocalXLogInsertAllowed = oldXLogAllowed;
7230  else
7231  LocalXLogInsertAllowed = 0; /* never again write WAL */
7232  }
7233 
7234  /*
7235  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7236  * = end of actual checkpoint record.
7237  */
7238  if (shutdown && checkPoint.redo != ProcLastRecPtr)
7239  ereport(PANIC,
7240  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7241 
7242  /*
7243  * Remember the prior checkpoint's redo ptr for
7244  * UpdateCheckPointDistanceEstimate()
7245  */
7246  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7247 
7248  /*
7249  * Update the control file.
7250  */
7251  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7252  if (shutdown)
7255  ControlFile->checkPointCopy = checkPoint;
7256  /* crash recovery should always recover to the end of WAL */
7259 
7260  /*
7261  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7262  * unused on non-shutdown checkpoints, but seems useful to store it always
7263  * for debugging purposes.
7264  */
7266 
7268  LWLockRelease(ControlFileLock);
7269 
7270  /* Update shared-memory copy of checkpoint XID/epoch */
7272  XLogCtl->ckptFullXid = checkPoint.nextXid;
7274 
7275  /*
7276  * We are now done with critical updates; no need for system panic if we
7277  * have trouble while fooling with old log segments.
7278  */
7279  END_CRIT_SECTION();
7280 
7281  /*
7282  * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7283  * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7284  * where (a) we're not inside of a critical section and (b) we can be
7285  * certain that the relevant record has been flushed to disk, which must
7286  * happen before it can be summarized.
7287  *
7288  * If this is a shutdown checkpoint, then this happens reasonably
7289  * promptly: we've only just inserted and flushed the
7290  * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7291  * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7292  * record was written before we began flushing data to disk, and that
7293  * could be many minutes ago at this point. However, we don't XLogFlush()
7294  * after inserting that record, so we're not guaranteed that it's on disk
7295  * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7296  * record.
7297  */
7299 
7300  /*
7301  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7302  */
7304 
7305  /*
7306  * Update the average distance between checkpoints if the prior checkpoint
7307  * exists.
7308  */
7309  if (PriorRedoPtr != InvalidXLogRecPtr)
7311 
7312  /*
7313  * Delete old log files, those no longer needed for last checkpoint to
7314  * prevent the disk holding the xlog from growing full.
7315  */
7317  KeepLogSeg(recptr, &_logSegNo);
7319  _logSegNo, InvalidOid,
7321  {
7322  /*
7323  * Some slots have been invalidated; recalculate the old-segment
7324  * horizon, starting again from RedoRecPtr.
7325  */
7327  KeepLogSeg(recptr, &_logSegNo);
7328  }
7329  _logSegNo--;
7330  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7331  checkPoint.ThisTimeLineID);
7332 
7333  /*
7334  * Make more log segments if needed. (Do this after recycling old log
7335  * segments, since that may supply some of the needed files.)
7336  */
7337  if (!shutdown)
7338  PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7339 
7340  /*
7341  * Truncate pg_subtrans if possible. We can throw away all data before
7342  * the oldest XMIN of any running transaction. No future transaction will
7343  * attempt to reference any pg_subtrans entry older than that (see Asserts
7344  * in subtrans.c). During recovery, though, we mustn't do this because
7345  * StartupSUBTRANS hasn't been called yet.
7346  */
7347  if (!RecoveryInProgress())
7349 
7350  /* Real work is done; log and update stats. */
7351  LogCheckpointEnd(false);
7352 
7353  /* Reset the process title */
7354  update_checkpoint_display(flags, false, true);
7355 
7356  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7357  NBuffers,
7361 
7362  return true;
7363 }
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:476
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
unsigned int uint32
Definition: c.h:506
#define MemSet(start, val, len)
Definition: c.h:1011
void AbsorbSyncRequests(void)
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:225
static void Insert(File file)
Definition: fd.c:1313
int NBuffers
Definition: globals.c:141
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2282
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:82
@ DB_SHUTDOWNING
Definition: pg_control.h:94
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:69
#define InvalidOid
Definition: postgres_ext.h:36
#define DELAY_CHKPT_START
Definition: proc.h:119
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:120
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3047
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2884
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3093
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1811
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:54
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1281
TimestampTz ckpt_start_t
Definition: xlog.h:161
int ckpt_segs_removed
Definition: xlog.h:171
int ckpt_segs_added
Definition: xlog.h:170
int ckpt_bufs_written
Definition: xlog.h:167
int ckpt_segs_recycled
Definition: xlog.h:172
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
XLogRecPtr unloggedLSN
Definition: pg_control.h:137
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId ckptFullXid
Definition: xlog.c:456
TimeLineID InsertTimeLineID
Definition: xlog.c:508
XLogRecPtr RedoRecPtr
Definition: xlog.c:455
XLogCtlInsert Insert
Definition: xlog.c:451
TimeLineID PrevTimeLineID
Definition: xlog.c:509
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:463
XLogRecPtr RedoRecPtr
Definition: xlog.c:429
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:411
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void SetWalSummarizerLatch(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:252
bool RecoveryInProgress(void)
Definition: xlog.c:6333
static void WALInsertLockRelease(void)
Definition: xlog.c:1444
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1857
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1415
static void UpdateControlFile(void)
Definition: xlog.c:4552
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3858
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6648
static XLogRecPtr RedoRecPtr
Definition: xlog.c:272
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6680
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3683
bool log_checkpoints
Definition: xlog.c:128
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:7952
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6421
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6555
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6785
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:579
static int LocalXLogInsertAllowed
Definition: xlog.c:235
CheckpointStatsData CheckpointStats
Definition: xlog.c:208
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2795
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7509
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6823
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FORCE
Definition: xlog.h:142
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SetWalSummarizerLatch(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7590 of file xlog.c.

7591 {
7592  XLogRecPtr lastCheckPointRecPtr;
7593  XLogRecPtr lastCheckPointEndPtr;
7594  CheckPoint lastCheckPoint;
7595  XLogRecPtr PriorRedoPtr;
7596  XLogRecPtr receivePtr;
7597  XLogRecPtr replayPtr;
7598  TimeLineID replayTLI;
7599  XLogRecPtr endptr;
7600  XLogSegNo _logSegNo;
7601  TimestampTz xtime;
7602 
7603  /* Concurrent checkpoint/restartpoint cannot happen */
7605 
7606  /* Get a local copy of the last safe checkpoint record. */
7608  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7609  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7610  lastCheckPoint = XLogCtl->lastCheckPoint;
7612 
7613  /*
7614  * Check that we're still in recovery mode. It's ok if we exit recovery
7615  * mode after this check, the restart point is valid anyway.
7616  */
7617  if (!RecoveryInProgress())
7618  {
7619  ereport(DEBUG2,
7620  (errmsg_internal("skipping restartpoint, recovery has already ended")));
7621  return false;
7622  }
7623 
7624  /*
7625  * If the last checkpoint record we've replayed is already our last
7626  * restartpoint, we can't perform a new restart point. We still update
7627  * minRecoveryPoint in that case, so that if this is a shutdown restart
7628  * point, we won't start up earlier than before. That's not strictly
7629  * necessary, but when hot standby is enabled, it would be rather weird if
7630  * the database opened up for read-only connections at a point-in-time
7631  * before the last shutdown. Such time travel is still possible in case of
7632  * immediate shutdown, though.
7633  *
7634  * We don't explicitly advance minRecoveryPoint when we do create a
7635  * restartpoint. It's assumed that flushing the buffers will do that as a
7636  * side-effect.
7637  */
7638  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7639  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7640  {
7641  ereport(DEBUG2,
7642  (errmsg_internal("skipping restartpoint, already performed at %X/%X",
7643  LSN_FORMAT_ARGS(lastCheckPoint.redo))));
7644 
7646  if (flags & CHECKPOINT_IS_SHUTDOWN)
7647  {
7648  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7651  LWLockRelease(ControlFileLock);
7652  }
7653  return false;
7654  }
7655 
7656  /*
7657  * Update the shared RedoRecPtr so that the startup process can calculate
7658  * the number of segments replayed since last restartpoint, and request a
7659  * restartpoint if it exceeds CheckPointSegments.
7660  *
7661  * Like in CreateCheckPoint(), hold off insertions to update it, although
7662  * during recovery this is just pro forma, because no WAL insertions are
7663  * happening.
7664  */
7666  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7668 
7669  /* Also update the info_lck-protected copy */
7671  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7673 
7674  /*
7675  * Prepare to accumulate statistics.
7676  *
7677  * Note: because it is possible for log_checkpoints to change while a
7678  * checkpoint proceeds, we always accumulate stats, even if
7679  * log_checkpoints is currently off.
7680  */
7681  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
7683 
7684  if (log_checkpoints)
7685  LogCheckpointStart(flags, true);
7686 
7687  /* Update the process title */
7688  update_checkpoint_display(flags, true, false);
7689 
7690  CheckPointGuts(lastCheckPoint.redo, flags);
7691 
7692  /*
7693  * This location needs to be after CheckPointGuts() to ensure that some
7694  * work has already happened during this checkpoint.
7695  */
7696  INJECTION_POINT("create-restart-point");
7697 
7698  /*
7699  * Remember the prior checkpoint's redo ptr for
7700  * UpdateCheckPointDistanceEstimate()
7701  */
7702  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7703 
7704  /*
7705  * Update pg_control, using current time. Check that it still shows an
7706  * older checkpoint, else do nothing; this is a quick hack to make sure
7707  * nothing really bad happens if somehow we get here after the
7708  * end-of-recovery checkpoint.
7709  */
7710  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7711  if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7712  {
7713  /*
7714  * Update the checkpoint information. We do this even if the cluster
7715  * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7716  * segments recycled below.
7717  */
7718  ControlFile->checkPoint = lastCheckPointRecPtr;
7719  ControlFile->checkPointCopy = lastCheckPoint;
7720 
7721  /*
7722  * Ensure minRecoveryPoint is past the checkpoint record and update it
7723  * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7724  * this will have happened already while writing out dirty buffers,
7725  * but not necessarily - e.g. because no buffers were dirtied. We do
7726  * this because a backup performed in recovery uses minRecoveryPoint
7727  * to determine which WAL files must be included in the backup, and
7728  * the file (or files) containing the checkpoint record must be
7729  * included, at a minimum. Note that for an ordinary restart of
7730  * recovery there's no value in having the minimum recovery point any
7731  * earlier than this anyway, because redo will begin just after the
7732  * checkpoint record.
7733  */
7735  {
7736  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7737  {
7738  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7740 
7741  /* update local copy */
7744  }
7745  if (flags & CHECKPOINT_IS_SHUTDOWN)
7747  }
7749  }
7750  LWLockRelease(ControlFileLock);
7751 
7752  /*
7753  * Update the average distance between checkpoints/restartpoints if the
7754  * prior checkpoint exists.
7755  */
7756  if (PriorRedoPtr != InvalidXLogRecPtr)
7758 
7759  /*
7760  * Delete old log files, those no longer needed for last restartpoint to
7761  * prevent the disk holding the xlog from growing full.
7762  */
7764 
7765  /*
7766  * Retreat _logSegNo using the current end of xlog replayed or received,
7767  * whichever is later.
7768  */
7769  receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7770  replayPtr = GetXLogReplayRecPtr(&replayTLI);
7771  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7772  KeepLogSeg(endptr, &_logSegNo);
7774  _logSegNo, InvalidOid,
7776  {
7777  /*
7778  * Some slots have been invalidated; recalculate the old-segment
7779  * horizon, starting again from RedoRecPtr.
7780  */
7782  KeepLogSeg(endptr, &_logSegNo);
7783  }
7784  _logSegNo--;
7785 
7786  /*
7787  * Try to recycle segments on a useful timeline. If we've been promoted
7788  * since the beginning of this restartpoint, use the new timeline chosen
7789  * at end of recovery. If we're still in recovery, use the timeline we're
7790  * currently replaying.
7791  *
7792  * There is no guarantee that the WAL segments will be useful on the
7793  * current timeline; if recovery proceeds to a new timeline right after
7794  * this, the pre-allocated WAL segments on this timeline will not be used,
7795  * and will go wasted until recycled on the next restartpoint. We'll live
7796  * with that.
7797  */
7798  if (!RecoveryInProgress())
7799  replayTLI = XLogCtl->InsertTimeLineID;
7800 
7801  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7802 
7803  /*
7804  * Make more log segments if needed. (Do this after recycling old log
7805  * segments, since that may supply some of the needed files.)
7806  */
7807  PreallocXlogFiles(endptr, replayTLI);
7808 
7809  /*
7810  * Truncate pg_subtrans if possible. We can throw away all data before
7811  * the oldest XMIN of any running transaction. No future transaction will
7812  * attempt to reference any pg_subtrans entry older than that (see Asserts
7813  * in subtrans.c). When hot standby is disabled, though, we mustn't do
7814  * this because StartupSUBTRANS hasn't been called yet.
7815  */
7816  if (EnableHotStandby)
7818 
7819  /* Real work is done; log and update stats. */
7820  LogCheckpointEnd(true);
7821 
7822  /* Reset the process title */
7823  update_checkpoint_display(flags, true, true);
7824 
7825  xtime = GetLatestXTime();
7827  (errmsg("recovery restart point at %X/%X",
7828  LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7829  xtime ? errdetail("Last completed transaction was at log time %s.",
7830  timestamptz_to_str(xtime)) : 0));
7831 
7832  /*
7833  * Finally, execute archive_cleanup_command, if any.
7834  */
7835  if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7837  "archive_cleanup_command",
7838  false,
7839  WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7840 
7841  return true;
7842 }
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1843
int64 TimestampTz
Definition: timestamp.h:39
int errdetail(const char *fmt,...)
Definition: elog.c:1203
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
bool IsUnderPostmaster
Definition: globals.c:119
#define INJECTION_POINT(name)
@ B_CHECKPOINTER
Definition: miscadmin.h:354
BackendType MyBackendType
Definition: miscinit.c:63
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
CheckPoint lastCheckPoint
Definition: xlog.c:544
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:542
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:543
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
bool EnableHotStandby
Definition: xlog.c:120
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2715
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:645
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:646
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
char * archiveCleanupCommand
Definition: xlogrecovery.c:85
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9393 of file xlog.c.

9394 {
9395  bool during_backup_start = DatumGetBool(arg);
9396 
9397  /* If called during backup start, there shouldn't be one already running */
9398  Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9399 
9400  if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9401  {
9405 
9408 
9409  if (!during_backup_start)
9410  ereport(WARNING,
9411  errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9412  }
9413 }
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
int runningBackups
Definition: xlog.c:437
static SessionBackupState sessionBackupState
Definition: xlog.c:390

References arg, Assert, DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8791 of file xlog.c.

8793 {
8795 
8796  Assert(state != NULL);
8798 
8799  /*
8800  * During recovery, we don't need to check WAL level. Because, if WAL
8801  * level is not sufficient, it's impossible to get here during recovery.
8802  */
8804  ereport(ERROR,
8805  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8806  errmsg("WAL level not sufficient for making an online backup"),
8807  errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8808 
8809  if (strlen(backupidstr) > MAXPGPATH)
8810  ereport(ERROR,
8811  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8812  errmsg("backup label too long (max %d bytes)",
8813  MAXPGPATH)));
8814 
8815  strlcpy(state->name, backupidstr, sizeof(state->name));
8816 
8817  /*
8818  * Mark backup active in shared memory. We must do full-page WAL writes
8819  * during an on-line backup even if not doing so at other times, because
8820  * it's quite possible for the backup dump to obtain a "torn" (partially
8821  * written) copy of a database page if it reads the page concurrently with
8822  * our write to the same page. This can be fixed as long as the first
8823  * write to the page in the WAL sequence is a full-page write. Hence, we
8824  * increment runningBackups then force a CHECKPOINT, to ensure there are
8825  * no dirty pages in shared memory that might get dumped while the backup
8826  * is in progress without having a corresponding WAL record. (Once the
8827  * backup is complete, we need not force full-page writes anymore, since
8828  * we expect that any pages not modified during the backup interval must
8829  * have been correctly captured by the backup.)
8830  *
8831  * Note that forcing full-page writes has no effect during an online
8832  * backup from the standby.
8833  *
8834  * We must hold all the insertion locks to change the value of
8835  * runningBackups, to ensure adequate interlocking against
8836  * XLogInsertRecord().
8837  */
8841 
8842  /*
8843  * Ensure we decrement runningBackups if we fail below. NB -- for this to
8844  * work correctly, it is critical that sessionBackupState is only updated
8845  * after this block is over.
8846  */
8848  {
8849  bool gotUniqueStartpoint = false;
8850  DIR *tblspcdir;
8851  struct dirent *de;
8852  tablespaceinfo *ti;
8853  int datadirpathlen;
8854 
8855  /*
8856  * Force an XLOG file switch before the checkpoint, to ensure that the
8857  * WAL segment the checkpoint is written to doesn't contain pages with
8858  * old timeline IDs. That would otherwise happen if you called
8859  * pg_backup_start() right after restoring from a PITR archive: the
8860  * first WAL segment containing the startup checkpoint has pages in
8861  * the beginning with the old timeline ID. That can cause trouble at
8862  * recovery: we won't have a history file covering the old timeline if
8863  * pg_wal directory was not included in the base backup and the WAL
8864  * archive was cleared too before starting the backup.
8865  *
8866  * This also ensures that we have emitted a WAL page header that has
8867  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8868  * Therefore, if a WAL archiver (such as pglesslog) is trying to
8869  * compress out removable backup blocks, it won't remove any that
8870  * occur after this point.
8871  *
8872  * During recovery, we skip forcing XLOG file switch, which means that
8873  * the backup taken during recovery is not available for the special
8874  * recovery case described above.
8875  */
8877  RequestXLogSwitch(false);
8878 
8879  do
8880  {
8881  bool checkpointfpw;
8882 
8883  /*
8884  * Force a CHECKPOINT. Aside from being necessary to prevent torn
8885  * page problems, this guarantees that two successive backup runs
8886  * will have different checkpoint positions and hence different
8887  * history file names, even if nothing happened in between.
8888  *
8889  * During recovery, establish a restartpoint if possible. We use
8890  * the last restartpoint as the backup starting checkpoint. This
8891  * means that two successive backup runs can have same checkpoint
8892  * positions.
8893  *
8894  * Since the fact that we are executing do_pg_backup_start()
8895  * during recovery means that checkpointer is running, we can use
8896  * RequestCheckpoint() to establish a restartpoint.
8897  *
8898  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
8899  * passing fast = true). Otherwise this can take awhile.
8900  */
8902  (fast ? CHECKPOINT_IMMEDIATE : 0));
8903 
8904  /*
8905  * Now we need to fetch the checkpoint record location, and also
8906  * its REDO pointer. The oldest point in WAL that would be needed
8907  * to restore starting from the checkpoint is precisely the REDO
8908  * pointer.
8909  */
8910  LWLockAcquire(ControlFileLock, LW_SHARED);
8911  state->checkpointloc = ControlFile->checkPoint;
8912  state->startpoint = ControlFile->checkPointCopy.redo;
8914  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8915  LWLockRelease(ControlFileLock);
8916 
8918  {
8919  XLogRecPtr recptr;
8920 
8921  /*
8922  * Check to see if all WAL replayed during online backup
8923  * (i.e., since last restartpoint used as backup starting
8924  * checkpoint) contain full-page writes.
8925  */
8927  recptr = XLogCtl->lastFpwDisableRecPtr;
8929 
8930  if (!checkpointfpw || state->startpoint <= recptr)
8931  ereport(ERROR,
8932  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8933  errmsg("WAL generated with \"full_page_writes=off\" was replayed "
8934  "since last restartpoint"),
8935  errhint("This means that the backup being taken on the standby "
8936  "is corrupt and should not be used. "
8937  "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
8938  "and then try an online backup again.")));
8939 
8940  /*
8941  * During recovery, since we don't use the end-of-backup WAL
8942  * record and don't write the backup history file, the
8943  * starting WAL location doesn't need to be unique. This means
8944  * that two base backups started at the same time might use
8945  * the same checkpoint as starting locations.
8946  */
8947  gotUniqueStartpoint = true;
8948  }
8949 
8950  /*
8951  * If two base backups are started at the same time (in WAL sender
8952  * processes), we need to make sure that they use different
8953  * checkpoints as starting locations, because we use the starting
8954  * WAL location as a unique identifier for the base backup in the
8955  * end-of-backup WAL record and when we write the backup history
8956  * file. Perhaps it would be better generate a separate unique ID
8957  * for each backup instead of forcing another checkpoint, but
8958  * taking a checkpoint right after another is not that expensive
8959  * either because only few buffers have been dirtied yet.
8960  */
8962  if (XLogCtl->Insert.lastBackupStart < state->startpoint)
8963  {
8964  XLogCtl->Insert.lastBackupStart = state->startpoint;
8965  gotUniqueStartpoint = true;
8966  }
8968  } while (!gotUniqueStartpoint);
8969 
8970  /*
8971  * Construct tablespace_map file.
8972  */
8973  datadirpathlen = strlen(DataDir);
8974 
8975  /* Collect information about all tablespaces */
8976  tblspcdir = AllocateDir(PG_TBLSPC_DIR);
8977  while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
8978  {
8979  char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
8980  char linkpath[MAXPGPATH];
8981  char *relpath = NULL;
8982  char *s;
8983  PGFileType de_type;
8984  char *badp;
8985  Oid tsoid;
8986 
8987  /*
8988  * Try to parse the directory name as an unsigned integer.
8989  *
8990  * Tablespace directories should be positive integers that can be
8991  * represented in 32 bits, with no leading zeroes or trailing
8992  * garbage. If we come across a name that doesn't meet those
8993  * criteria, skip it.
8994  */
8995  if (de->d_name[0] < '1' || de->d_name[1] > '9')
8996  continue;
8997  errno = 0;
8998  tsoid = strtoul(de->d_name, &badp, 10);
8999  if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9000  continue;
9001 
9002  snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9003 
9004  de_type = get_dirent_type(fullpath, de, false, ERROR);
9005 
9006  if (de_type == PGFILETYPE_LNK)
9007  {
9008  StringInfoData escapedpath;
9009  int rllen;
9010 
9011  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9012  if (rllen < 0)
9013  {
9014  ereport(WARNING,
9015  (errmsg("could not read symbolic link \"%s\": %m",
9016  fullpath)));
9017  continue;
9018  }
9019  else if (rllen >= sizeof(linkpath))
9020  {
9021  ereport(WARNING,
9022  (errmsg("symbolic link \"%s\" target is too long",
9023  fullpath)));
9024  continue;
9025  }
9026  linkpath[rllen] = '\0';
9027 
9028  /*
9029  * Relpath holds the relative path of the tablespace directory
9030  * when it's located within PGDATA, or NULL if it's located
9031  * elsewhere.
9032  */
9033  if (rllen > datadirpathlen &&
9034  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
9035  IS_DIR_SEP(linkpath[datadirpathlen]))
9036  relpath = pstrdup(linkpath + datadirpathlen + 1);
9037 
9038  /*
9039  * Add a backslash-escaped version of the link path to the
9040  * tablespace map file.
9041  */
9042  initStringInfo(&escapedpath);
9043  for (s = linkpath; *s; s++)
9044  {
9045  if (*s == '\n' || *s == '\r' || *s == '\\')
9046  appendStringInfoChar(&escapedpath, '\\');
9047  appendStringInfoChar(&escapedpath, *s);
9048  }
9049  appendStringInfo(tblspcmapfile, "%s %s\n",
9050  de->d_name, escapedpath.data);
9051  pfree(escapedpath.data);
9052  }
9053  else if (de_type == PGFILETYPE_DIR)
9054  {
9055  /*
9056  * It's possible to use allow_in_place_tablespaces to create
9057  * directories directly under pg_tblspc, for testing purposes
9058  * only.
9059  *
9060  * In this case, we store a relative path rather than an
9061  * absolute path into the tablespaceinfo.
9062  */
9063  snprintf(linkpath, sizeof(linkpath), "%s/%s",
9064  PG_TBLSPC_DIR, de->d_name);
9065  relpath = pstrdup(linkpath);
9066  }
9067  else
9068  {
9069  /* Skip any other file type that appears here. */
9070  continue;
9071  }
9072 
9073  ti = palloc(sizeof(tablespaceinfo));
9074  ti->oid = tsoid;
9075  ti->path = pstrdup(linkpath);
9076  ti->rpath = relpath;
9077  ti->size = -1;
9078 
9079  if (tablespaces)
9080  *tablespaces = lappend(*tablespaces, ti);
9081  }
9082  FreeDir(tblspcdir);
9083 
9084  state->starttime = (pg_time_t) time(NULL);
9085  }
9087 
9088  state->started_in_recovery = backup_started_in_recovery;
9089 
9090  /*
9091  * Mark that the start phase has correctly finished for the backup.
9092  */
9094 }
static bool backup_started_in_recovery
Definition: basebackup.c:124
void RequestCheckpoint(int flags)
Definition: checkpointer.c:952
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2932
int FreeDir(DIR *dir)
Definition: fd.c:2984
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2866
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:526
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:70
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
char * pstrdup(const char *in)
Definition: mcxt.c:1696
#define MAXPGPATH
#define snprintf
Definition: port.h:238
#define IS_DIR_SEP(ch)
Definition: port.h:102
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
unsigned int Oid
Definition: postgres_ext.h:31
#define relpath(rlocator, forknum)
Definition: relpath.h:102
#define PG_TBLSPC_DIR
Definition: relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:194
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Definition: dirent.c:26
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:550
XLogRecPtr lastBackupStart
Definition: xlog.c:438
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:236
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:8059
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9393
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:288
#define CHECKPOINT_WAIT
Definition: xlog.h:145
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:141
#define XLogIsNeeded()
Definition: xlog.h:109

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, DatumGetBool(), do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9119 of file xlog.c.

9120 {
9121  bool backup_stopped_in_recovery = false;
9122  char histfilepath[MAXPGPATH];
9123  char lastxlogfilename[MAXFNAMELEN];
9124  char histfilename[MAXFNAMELEN];
9125  XLogSegNo _logSegNo;
9126  FILE *fp;
9127  int seconds_before_warning;
9128  int waits = 0;
9129  bool reported_waiting = false;
9130 
9131  Assert(state != NULL);
9132 
9133  backup_stopped_in_recovery = RecoveryInProgress();
9134 
9135  /*
9136  * During recovery, we don't need to check WAL level. Because, if WAL
9137  * level is not sufficient, it's impossible to get here during recovery.
9138  */
9139  if (!backup_stopped_in_recovery && !XLogIsNeeded())
9140  ereport(ERROR,
9141  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9142  errmsg("WAL level not sufficient for making an online backup"),
9143  errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9144 
9145  /*
9146  * OK to update backup counter and session-level lock.
9147  *
9148  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9149  * otherwise they can be updated inconsistently, which might cause
9150  * do_pg_abort_backup() to fail.
9151  */
9153 
9154  /*
9155  * It is expected that each do_pg_backup_start() call is matched by
9156  * exactly one do_pg_backup_stop() call.
9157  */
9160 
9161  /*
9162  * Clean up session-level lock.
9163  *
9164  * You might think that WALInsertLockRelease() can be called before
9165  * cleaning up session-level lock because session-level lock doesn't need
9166  * to be protected with WAL insertion lock. But since
9167  * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9168  * cleaned up before it.
9169  */
9171 
9173 
9174  /*
9175  * If we are taking an online backup from the standby, we confirm that the
9176  * standby has not been promoted during the backup.
9177  */
9178  if (state->started_in_recovery && !backup_stopped_in_recovery)
9179  ereport(ERROR,
9180  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9181  errmsg("the standby was promoted during online backup"),
9182  errhint("This means that the backup being taken is corrupt "
9183  "and should not be used. "
9184  "Try taking another online backup.")));
9185 
9186  /*
9187  * During recovery, we don't write an end-of-backup record. We assume that
9188  * pg_control was backed up last and its minimum recovery point can be
9189  * available as the backup end location. Since we don't have an
9190  * end-of-backup record, we use the pg_control value to check whether
9191  * we've reached the end of backup when starting recovery from this
9192  * backup. We have no way of checking if pg_control wasn't backed up last
9193  * however.
9194  *
9195  * We don't force a switch to new WAL file but it is still possible to
9196  * wait for all the required files to be archived if waitforarchive is
9197  * true. This is okay if we use the backup to start a standby and fetch
9198  * the missing WAL using streaming replication. But in the case of an
9199  * archive recovery, a user should set waitforarchive to true and wait for
9200  * them to be archived to ensure that all the required files are
9201  * available.
9202  *
9203  * We return the current minimum recovery point as the backup end
9204  * location. Note that it can be greater than the exact backup end
9205  * location if the minimum recovery point is updated after the backup of
9206  * pg_control. This is harmless for current uses.
9207  *
9208  * XXX currently a backup history file is for informational and debug
9209  * purposes only. It's not essential for an online backup. Furthermore,
9210  * even if it's created, it will not be archived during recovery because
9211  * an archiver is not invoked. So it doesn't seem worthwhile to write a
9212  * backup history file during recovery.
9213  */
9214  if (backup_stopped_in_recovery)
9215  {
9216  XLogRecPtr recptr;
9217 
9218  /*
9219  * Check to see if all WAL replayed during online backup contain
9220  * full-page writes.
9221  */
9223  recptr = XLogCtl->lastFpwDisableRecPtr;
9225 
9226  if (state->startpoint <= recptr)
9227  ereport(ERROR,
9228  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9229  errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9230  "during online backup"),
9231  errhint("This means that the backup being taken on the standby "
9232  "is corrupt and should not be used. "
9233  "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9234  "and then try an online backup again.")));
9235 
9236 
9237  LWLockAcquire(ControlFileLock, LW_SHARED);
9238  state->stoppoint = ControlFile->minRecoveryPoint;
9240  LWLockRelease(ControlFileLock);
9241  }
9242  else
9243  {
9244  char *history_file;
9245 
9246  /*
9247  * Write the backup-end xlog record
9248  */
9249  XLogBeginInsert();
9250  XLogRegisterData((char *) (&state->startpoint),
9251  sizeof(state->startpoint));
9252  state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9253 
9254  /*
9255  * Given that we're not in recovery, InsertTimeLineID is set and can't
9256  * change, so we can read it without a lock.
9257  */
9258  state->stoptli = XLogCtl->InsertTimeLineID;
9259 
9260  /*
9261  * Force a switch to a new xlog segment file, so that the backup is
9262  * valid as soon as archiver moves out the current segment file.
9263  */
9264  RequestXLogSwitch(false);
9265 
9266  state->stoptime = (pg_time_t) time(NULL);
9267 
9268  /*
9269  * Write the backup history file
9270  */
9271  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9272  BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9273  state->startpoint, wal_segment_size);
9274  fp = AllocateFile(histfilepath, "w");
9275  if (!fp)
9276  ereport(ERROR,
9278  errmsg("could not create file \"%s\": %m",
9279  histfilepath)));
9280 
9281  /* Build and save the contents of the backup history file */
9282  history_file = build_backup_content(state, true);
9283  fprintf(fp, "%s", history_file);
9284  pfree(history_file);
9285 
9286  if (fflush(fp) || ferror(fp) || FreeFile(fp))
9287  ereport(ERROR,
9289  errmsg("could not write file \"%s\": %m",
9290  histfilepath)));
9291 
9292  /*
9293  * Clean out any no-longer-needed history files. As a side effect,
9294  * this will post a .ready file for the newly created history file,
9295  * notifying the archiver that history file may be archived
9296  * immediately.
9297  */
9299  }
9300 
9301  /*
9302  * If archiving is enabled, wait for all the required WAL files to be
9303  * archived before returning. If archiving isn't enabled, the required WAL
9304  * needs to be transported via streaming replication (hopefully with
9305  * wal_keep_size set high enough), or some more exotic mechanism like
9306  * polling and copying files from pg_wal with script. We have no knowledge
9307  * of those mechanisms, so it's up to the user to ensure that he gets all
9308  * the required WAL.
9309  *
9310  * We wait until both the last WAL file filled during backup and the
9311  * history file have been archived, and assume that the alphabetic sorting
9312  * property of the WAL files ensures any earlier WAL files are safely
9313  * archived as well.
9314  *
9315  * We wait forever, since archive_command is supposed to work and we
9316  * assume the admin wanted his backup to work completely. If you don't
9317  * wish to wait, then either waitforarchive should be passed in as false,
9318  * or you can set statement_timeout. Also, some notices are issued to
9319  * clue in anyone who might be doing this interactively.
9320  */
9321 
9322  if (waitforarchive &&
9323  ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9324  (backup_stopped_in_recovery && XLogArchivingAlways())))
9325  {
9326  XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9327  XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9329 
9330  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9331  BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9332  state->startpoint, wal_segment_size);
9333 
9334  seconds_before_warning = 60;
9335  waits = 0;
9336 
9337  while (XLogArchiveIsBusy(lastxlogfilename) ||
9338  XLogArchiveIsBusy(histfilename))
9339  {
9341 
9342  if (!reported_waiting && waits > 5)
9343  {
9344  ereport(NOTICE,
9345  (errmsg("base backup done, waiting for required WAL segments to be archived")));
9346  reported_waiting = true;
9347  }
9348 
9349  (void) WaitLatch(MyLatch,
9351  1000L,
9352  WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9354 
9355  if (++waits >= seconds_before_warning)
9356  {
9357  seconds_before_warning *= 2; /* This wraps in >10 years... */
9358  ereport(WARNING,
9359  (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9360  waits),
9361  errhint("Check that your \"archive_command\" is executing properly. "
9362  "You can safely cancel this backup, "
9363  "but the database backup will not be usable without all the WAL segments.")));
9364  }
9365  }
9366 
9367  ereport(NOTICE,
9368  (errmsg("all required WAL segments have been archived")));
9369  }
9370  else if (waitforarchive)
9371  ereport(NOTICE,
9372  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9373 }
#define NOTICE
Definition: elog.h:35
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2606
int FreeFile(FILE *file)
Definition: fd.c:2804
struct Latch * MyLatch
Definition: globals.c:62
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
static void const char fflush(stdout)
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:73
#define fprintf
Definition: port.h:242
static void CleanupBackupHistory(void)
Definition: xlog.c:4154
#define XLogArchivingActive()
Definition: xlog.h:99
#define XLogArchivingAlways()
Definition: xlog.h:102
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fflush(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9100 of file xlog.c.

9101 {
9102  return sessionBackupState;
9103 }

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4852 of file xlog.c.

4853 {
4854  return ControlFile->wal_level;
4855 }

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4597 of file xlog.c.

4598 {
4600 }
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:522

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

Definition at line 6498 of file xlog.c.

6499 {
6501 
6503 
6504  /*
6505  * If we're writing and flushing WAL, the time line can't be changing, so
6506  * no lock is required.
6507  */
6508  if (insertTLI)
6509  *insertTLI = XLogCtl->InsertTimeLineID;
6510 
6511  return LogwrtResult.Flush;
6512 }
RecoveryState SharedRecoveryState
Definition: xlog.c:515
XLogRecPtr Flush
Definition: xlog.c:327
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:619
static XLogwrtResult LogwrtResult
Definition: xlog.c:611

References Assert, XLogwrtResult::Flush, XLogCtlData::InsertTimeLineID, LogwrtResult, RECOVERY_STATE_DONE, RefreshXLogWriteResult, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by binary_upgrade_logical_slot_has_caught_up(), get_flush_position(), GetCurrentLSN(), GetLatestLSN(), IdentifySystem(), pg_current_wal_flush_lsn(), pg_logical_slot_get_changes_guts(), pg_replication_slot_advance(), read_local_xlog_page_guts(), StartReplication(), WalSndWaitForWal(), XLogSendLogical(), and XLogSendPhysical().

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6466 of file xlog.c.

6467 {
6468  *RedoRecPtr_p = RedoRecPtr;
6469  *doPageWrites_p = doPageWrites;
6470 }
static bool doPageWrites
Definition: xlog.c:285

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 6481 of file xlog.c.

6482 {
6483  XLogRecPtr recptr;
6484 
6486  recptr = XLogCtl->LogwrtRqst.Write;
6488 
6489  return recptr;
6490 }
XLogwrtRqst LogwrtRqst
Definition: xlog.c:454
XLogRecPtr Write
Definition: xlog.c:320

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, XLogwrtRqst::Write, and XLogCtl.

Referenced by CheckpointerMain(), gistvacuumscan(), and IsCheckpointOnSchedule().

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6555 of file xlog.c.

6556 {
6558  int i;
6559 
6560  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6561  {
6562  XLogRecPtr last_important;
6563 
6564  /*
6565  * Need to take a lock to prevent torn reads of the LSN, which are
6566  * possible on some of the supported platforms. WAL insert locks only
6567  * support exclusive mode, so we have to use that.
6568  */
6570  last_important = WALInsertLocks[i].l.lastImportantAt;
6571  LWLockRelease(&WALInsertLocks[i].l.lock);
6572 
6573  if (res < last_important)
6574  res = last_important;
6575  }
6576 
6577  return res;
6578 }
int i
Definition: isn.c:73
XLogRecPtr lastImportantAt
Definition: xlog.c:370
WALInsertLock l
Definition: xlog.c:382
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:568
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:149

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, res, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetMockAuthenticationNonce()

char* GetMockAuthenticationNonce ( void  )

Definition at line 4571 of file xlog.c.

4572 {
4573  Assert(ControlFile != NULL);
4575 }
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:229

References Assert, ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6369 of file xlog.c.

6370 {
6371  RecoveryState retval;
6372 
6374  retval = XLogCtl->SharedRecoveryState;
6376 
6377  return retval;
6378 }

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6436 of file xlog.c.

6437 {
6438  XLogRecPtr ptr;
6439 
6440  /*
6441  * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6442  * grabbed a WAL insertion lock to read the authoritative value in
6443  * Insert->RedoRecPtr, someone might update it just after we've released
6444  * the lock.
6445  */
6447  ptr = XLogCtl->RedoRecPtr;
6449 
6450  if (RedoRecPtr < ptr)
6451  RedoRecPtr = ptr;
6452 
6453  return RedoRecPtr;
6454 }

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), pgstat_before_server_shutdown(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

uint64 GetSystemIdentifier ( void  )

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7868 of file xlog.c.

7869 {
7870  XLogRecPtr currpos; /* current write LSN */
7871  XLogSegNo currSeg; /* segid of currpos */
7872  XLogSegNo targetSeg; /* segid of targetLSN */
7873  XLogSegNo oldestSeg; /* actual oldest segid */
7874  XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7875  XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7876  uint64 keepSegs;
7877 
7878  /*
7879  * slot does not reserve WAL. Either deactivated, or has never been active
7880  */
7881  if (XLogRecPtrIsInvalid(targetLSN))
7882  return WALAVAIL_INVALID_LSN;
7883 
7884  /*
7885  * Calculate the oldest segment currently reserved by all slots,
7886  * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7887  * oldestSlotSeg to the current segment.
7888  */
7889  currpos = GetXLogWriteRecPtr();
7890  XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7891  KeepLogSeg(currpos, &oldestSlotSeg);
7892 
7893  /*
7894  * Find the oldest extant segment file. We get 1 until checkpoint removes
7895  * the first WAL segment file since startup, which causes the status being
7896  * wrong under certain abnormal conditions but that doesn't actually harm.
7897  */
7898  oldestSeg = XLogGetLastRemovedSegno() + 1;
7899 
7900  /* calculate oldest segment by max_wal_size */
7901  XLByteToSeg(currpos, currSeg, wal_segment_size);
7903 
7904  if (currSeg > keepSegs)
7905  oldestSegMaxWalSize = currSeg - keepSegs;
7906  else
7907  oldestSegMaxWalSize = 1;
7908 
7909  /* the segment we care about */
7910  XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7911 
7912  /*
7913  * No point in returning reserved or extended status values if the
7914  * targetSeg is known to be lost.
7915  */
7916  if (targetSeg >= oldestSlotSeg)
7917  {
7918  /* show "reserved" when targetSeg is within max_wal_size */
7919  if (targetSeg >= oldestSegMaxWalSize)
7920  return WALAVAIL_RESERVED;
7921 
7922  /* being retained by slots exceeding max_wal_size */
7923  return WALAVAIL_EXTENDED;
7924  }
7925 
7926  /* WAL segments are no longer retained but haven't been removed yet */
7927  if (targetSeg >= oldestSeg)
7928  return WALAVAIL_UNRESERVED;
7929 
7930  /* Definitely lost */
7931  return WALAVAIL_REMOVED;
7932 }
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3751
int max_wal_size_mb
Definition: xlog.c:113
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:602
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9450
@ WALAVAIL_REMOVED
Definition: xlog.h:194

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6519 of file xlog.c.

6520 {
6522 
6523  /* Since the value can't be changing, no lock is required. */
6524  return XLogCtl->InsertTimeLineID;
6525 }

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

Definition at line 6535 of file xlog.c.

6536 {
6537  TimeLineID insertTLI;
6538 
6540  insertTLI = XLogCtl->InsertTimeLineID;
6542 
6543  return insertTLI;
6544 }

References XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by GetLatestLSN().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9434 of file xlog.c.

9435 {
9437  uint64 current_bytepos;
9438 
9439  SpinLockAcquire(&Insert->insertpos_lck);
9440  current_bytepos = Insert->CurrBytePos;
9441  SpinLockRelease(&Insert->insertpos_lck);
9442 
9443  return XLogBytePosToRecPtr(current_bytepos);
9444 }

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

XLogRecPtr GetXLogWriteRecPtr ( void  )

Definition at line 9450 of file xlog.c.

9451 {
9453 
9454  return LogwrtResult.Write;
9455 }
XLogRecPtr Write
Definition: xlog.c:326

References LogwrtResult, RefreshXLogWriteResult, and XLogwrtResult::Write.

Referenced by GetWALAvailability(), pg_attribute_noreturn(), pg_current_wal_lsn(), and pg_get_replication_slots().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4777 of file xlog.c.

4778 {
4780 
4782  {
4783  struct config_generic *guc;
4784 
4785  guc = find_option("wal_consistency_checking", false, false, ERROR);
4786 
4788 
4789  set_config_option_ext("wal_consistency_checking",
4791  guc->scontext, guc->source, guc->srole,
4792  GUC_ACTION_SET, true, ERROR, false);
4793 
4794  /* checking should not be deferred again */
4796  }
4797 }
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1234
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3381
@ GUC_ACTION_SET
Definition: guc.h:199
bool process_shared_preload_libraries_done
Definition: miscinit.c:1779
GucContext scontext
Definition: guc_tables.h:167
GucSource source
Definition: guc_tables.h:165
char * wal_consistency_checking_string
Definition: xlog.c:124
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:165

References Assert, check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9491 of file xlog.c.

9492 {
9493  bool result;
9494 
9495  LWLockAcquire(ControlFileLock, LW_SHARED);
9497  LWLockRelease(ControlFileLock);
9498 
9499  return result;
9500 }
bool InstallXLogFileSegmentActive
Definition: xlog.c:525

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8682 of file xlog.c.

8683 {
8684  char *msg = NULL;
8685  instr_time start;
8686 
8687  Assert(tli != 0);
8688 
8689  /*
8690  * Quick exit if fsync is disabled or write() has already synced the WAL
8691  * file.
8692  */
8693  if (!enableFsync ||
8696  return;
8697 
8698  /* Measure I/O timing to sync the WAL file */
8699  if (track_wal_io_timing)
8701  else
8703 
8704  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8705  switch (wal_sync_method)
8706  {
8707  case WAL_SYNC_METHOD_FSYNC:
8708  if (pg_fsync_no_writethrough(fd) != 0)
8709  msg = _("could not fsync file \"%s\": %m");
8710  break;
8711 #ifdef HAVE_FSYNC_WRITETHROUGH
8713  if (pg_fsync_writethrough(fd) != 0)
8714  msg = _("could not fsync write-through file \"%s\": %m");
8715  break;
8716 #endif
8718  if (pg_fdatasync(fd) != 0)
8719  msg = _("could not fdatasync file \"%s\": %m");
8720  break;
8721  case WAL_SYNC_METHOD_OPEN:
8723  /* not reachable */
8724  Assert(false);
8725  break;
8726  default:
8727  ereport(PANIC,
8728  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8729  errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8730  break;
8731  }
8732 
8733  /* PANIC if failed to fsync */
8734  if (msg)
8735  {
8736  char xlogfname[MAXFNAMELEN];
8737  int save_errno = errno;
8738 
8739  XLogFileName(xlogfname, tli, segno, wal_segment_size);
8740  errno = save_errno;
8741  ereport(PANIC,
8743  errmsg(msg, xlogfname)));
8744  }
8745 
8747 
8748  /*
8749  * Increment the I/O timing and the number of times WAL files were synced.
8750  */
8751  if (track_wal_io_timing)
8752  {
8753  instr_time end;
8754 
8757  }
8758 
8760 }
#define _(x)
Definition: elog.c:90
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:441
int pg_fdatasync(int fd)
Definition: fd.c:480
int pg_fsync_writethrough(int fd)
Definition: fd.c:461
bool enableFsync
Definition: globals.c:128
return str start
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
#define INSTR_TIME_ACCUM_DIFF(x, y, z)
Definition: instr_time.h:184
PgStat_PendingWalStats PendingWalStats
Definition: pgstat_wal.c:24
static int fd(const char *x, int i)
Definition: preproc-init.c:105
instr_time wal_sync_time
Definition: pgstat.h:491
PgStat_Counter wal_sync
Definition: pgstat.h:489
int wal_sync_method
Definition: xlog.c:129
bool track_wal_io_timing
Definition: xlog.c:136

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, MAXFNAMELEN, PANIC, PendingWalStats, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, PgStat_PendingWalStats::wal_sync, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_sync_time, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4839 of file xlog.c.

4840 {
4841  Assert(reset || ControlFile == NULL);
4842  ControlFile = palloc(sizeof(ControlFileData));
4843  ReadControlFile();
4844 }
void reset(void)
Definition: sql-declare.c:600

References Assert, ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6246 of file xlog.c.

6247 {
6248  /*
6249  * We have reached the end of base backup, as indicated by pg_control. The
6250  * data on disk is now consistent (unless minRecoveryPoint is further
6251  * ahead, which can happen if we crashed during previous recovery). Reset
6252  * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6253  * make sure we don't allow starting up at an earlier point even if
6254  * recovery is stopped and restarted soon after this.
6255  */
6256  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6257 
6258  if (ControlFile->minRecoveryPoint < EndRecPtr)
6259  {
6260  ControlFile->minRecoveryPoint = EndRecPtr;
6262  }
6263 
6266  ControlFile->backupEndRequired = false;
6268 
6269  LWLockRelease(ControlFileLock);
6270 }
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
bool backupEndRequired
Definition: pg_control.h:172
XLogRecPtr backupEndPoint
Definition: pg_control.h:171

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6333 of file xlog.c.

6334 {
6335  /*
6336  * We check shared state each time only until we leave recovery mode. We
6337  * can't re-enter recovery, so there's no need to keep checking after the
6338  * shared variable has once been seen false.
6339  */
6341  return false;
6342  else
6343  {
6344  /*
6345  * use volatile pointer to make sure we make a fresh read of the
6346  * shared variable.
6347  */
6348  volatile XLogCtlData *xlogctl = XLogCtl;
6349 
6351 
6352  /*
6353  * Note: We don't need a memory barrier when we're still in recovery.
6354  * We might exit recovery immediately after return, so the caller
6355  * can't rely on 'true' meaning that we're still in recovery anyway.
6356  */
6357 
6358  return LocalRecoveryInProgress;
6359  }
6360 }
static bool LocalRecoveryInProgress
Definition: xlog.c:223

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), btree_index_mainfork_expected(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WaitForLSNReplay(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9420 of file xlog.c.

9421 {
9422  static bool already_done = false;
9423 
9424  if (already_done)
9425  return;
9427  already_done = true;
9428 }
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), DatumGetBool(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3933 of file xlog.c.

3934 {
3935  DIR *xldir;
3936  struct dirent *xlde;
3937  char switchseg[MAXFNAMELEN];
3938  XLogSegNo endLogSegNo;
3939  XLogSegNo switchLogSegNo;
3940  XLogSegNo recycleSegNo;
3941 
3942  /*
3943  * Initialize info about where to begin the work. This will recycle,
3944  * somewhat arbitrarily, 10 future segments.
3945  */
3946  XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3947  XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3948  recycleSegNo = endLogSegNo + 10;
3949 
3950  /*
3951  * Construct a filename of the last segment to be kept.
3952  */
3953  XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3954 
3955  elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3956  switchseg);
3957 
3958  xldir = AllocateDir(XLOGDIR);
3959 
3960  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3961  {
3962  /* Ignore files that are not XLOG segments */
3963  if (!IsXLogFileName(xlde->d_name))
3964  continue;
3965 
3966  /*
3967  * Remove files that are on a timeline older than the new one we're
3968  * switching to, but with a segment number >= the first segment on the
3969  * new timeline.
3970  */
3971  if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
3972  strcmp(xlde->d_name + 8, switchseg + 8) > 0)
3973  {
3974  /*
3975  * If the file has already been marked as .ready, however, don't
3976  * remove it yet. It should be OK to remove it - files that are
3977  * not part of our timeline history are not required for recovery
3978  * - but seems safer to let them be archived and removed later.
3979  */
3980  if (!XLogArchiveIsReady(xlde->d_name))
3981  RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
3982  }
3983  }
3984 
3985  FreeDir(xldir);
3986 }
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:4002
static bool IsXLogFileName(const char *fname)
#define XLOGDIR
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9483 of file xlog.c.

9484 {
9485  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9487  LWLockRelease(ControlFileLock);
9488 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9506 of file xlog.c.

9507 {
9509  XLogCtl->WalWriterSleeping = sleeping;
9511 }
bool WalWriterSleeping
Definition: xlog.c:532

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6601 of file xlog.c.

6602 {
6603  /*
6604  * We should have an aux process resource owner to use, and we should not
6605  * be in a transaction that's installed some other resowner.
6606  */
6608  Assert(CurrentResourceOwner == NULL ||
6611 
6612  /* Don't be chatty in standalone mode */
6614  (errmsg("shutting down")));
6615 
6616  /*
6617  * Signal walsenders to move to stopping state.
6618  */
6620 
6621  /*
6622  * Wait for WAL senders to be in stopping state. This prevents commands
6623  * from writing new WAL.
6624  */
6626 
6627  if (RecoveryInProgress())
6629  else
6630  {
6631  /*
6632  * If archiving is enabled, rotate the last XLOG file so that all the
6633  * remaining records are archived (postmaster wakes up the archiver
6634  * process one more time at the end of shutdown). The checkpoint
6635  * record will go to the next XLOG file and won't be archived (yet).
6636  */
6637  if (XLogArchivingActive())
6638  RequestXLogSwitch(false);
6639 
6641  }
6642 }
bool IsPostmasterEnvironment
Definition: globals.c:118
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:168
void WalSndInitStopping(void)
Definition: walsender.c:3717
void WalSndWaitStopping(void)
Definition: walsender.c:3743
bool CreateRestartPoint(int flags)
Definition: xlog.c:7590
bool CreateCheckPoint(int flags)
Definition: xlog.c:6888

References Assert, AuxProcessResourceOwner, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by HandleCheckpointerInterrupts(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5422 of file xlog.c.

5423 {
5425  CheckPoint checkPoint;
5426  bool wasShutdown;
5427  bool didCrash;
5428  bool haveTblspcMap;
5429  bool haveBackupLabel;
5430  XLogRecPtr EndOfLog;
5431  TimeLineID EndOfLogTLI;
5432  TimeLineID newTLI;
5433  bool performedWalRecovery;
5434  EndOfWalRecoveryInfo *endOfRecoveryInfo;
5437  TransactionId oldestActiveXID;
5438  bool promoted = false;
5439 
5440  /*
5441  * We should have an aux process resource owner to use, and we should not
5442  * be in a transaction that's installed some other resowner.
5443  */
5445  Assert(CurrentResourceOwner == NULL ||
5448 
5449  /*
5450  * Check that contents look valid.
5451  */
5453  ereport(FATAL,
5455  errmsg("control file contains invalid checkpoint location")));
5456 
5457  switch (ControlFile->state)
5458  {
5459  case DB_SHUTDOWNED:
5460 
5461  /*
5462  * This is the expected case, so don't be chatty in standalone
5463  * mode
5464  */
5466  (errmsg("database system was shut down at %s",
5467  str_time(ControlFile->time))));
5468  break;
5469 
5471  ereport(LOG,
5472  (errmsg("database system was shut down in recovery at %s",
5473  str_time(ControlFile->time))));
5474  break;
5475 
5476  case DB_SHUTDOWNING:
5477  ereport(LOG,
5478  (errmsg("database system shutdown was interrupted; last known up at %s",
5479  str_time(ControlFile->time))));
5480  break;
5481 
5482  case DB_IN_CRASH_RECOVERY:
5483  ereport(LOG,
5484  (errmsg("database system was interrupted while in recovery at %s",
5486  errhint("This probably means that some data is corrupted and"
5487  " you will have to use the last backup for recovery.")));
5488  break;
5489 
5491  ereport(LOG,
5492  (errmsg("database system was interrupted while in recovery at log time %s",
5494  errhint("If this has occurred more than once some data might be corrupted"
5495  " and you might need to choose an earlier recovery target.")));
5496  break;
5497 
5498  case DB_IN_PRODUCTION:
5499  ereport(LOG,
5500  (errmsg("database system was interrupted; last known up at %s",
5501  str_time(ControlFile->time))));
5502  break;
5503 
5504  default:
5505  ereport(FATAL,
5507  errmsg("control file contains invalid database cluster state")));
5508  }
5509 
5510  /* This is just to allow attaching to startup process with a debugger */
5511 #ifdef XLOG_REPLAY_DELAY
5513  pg_usleep(60000000L);
5514 #endif
5515 
5516  /*
5517  * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5518  * In cases where someone has performed a copy for PITR, these directories
5519  * may have been excluded and need to be re-created.
5520  */
5522 
5523  /* Set up timeout handler needed to report startup progress. */
5527 
5528  /*----------
5529  * If we previously crashed, perform a couple of actions:
5530  *
5531  * - The pg_wal directory may still include some temporary WAL segments
5532  * used when creating a new segment, so perform some clean up to not
5533  * bloat this path. This is done first as there is no point to sync
5534  * this temporary data.
5535  *
5536  * - There might be data which we had written, intending to fsync it, but
5537  * which we had not actually fsync'd yet. Therefore, a power failure in
5538  * the near future might cause earlier unflushed writes to be lost, even
5539  * though more recent data written to disk from here on would be
5540  * persisted. To avoid that, fsync the entire data directory.
5541  */
5542  if (ControlFile->state != DB_SHUTDOWNED &&
5544  {
5547  didCrash = true;
5548  }
5549  else
5550  didCrash = false;
5551 
5552  /*
5553  * Prepare for WAL recovery if needed.
5554  *
5555  * InitWalRecovery analyzes the control file and the backup label file, if
5556  * any. It updates the in-memory ControlFile buffer according to the
5557  * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5558  * It also applies the tablespace map file, if any.
5559  */
5560  InitWalRecovery(ControlFile, &wasShutdown,
5561  &haveBackupLabel, &haveTblspcMap);
5562  checkPoint = ControlFile->checkPointCopy;
5563 
5564  /* initialize shared memory variables from the checkpoint record */
5565  TransamVariables->nextXid = checkPoint.nextXid;
5566  TransamVariables->nextOid = checkPoint.nextOid;
5568  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5569  AdvanceOldestClogXid(checkPoint.oldestXid);
5570  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5571  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5573  checkPoint.newestCommitTsXid);
5574  XLogCtl->ckptFullXid = checkPoint.nextXid;
5575 
5576  /*
5577  * Clear out any old relcache cache files. This is *necessary* if we do
5578  * any WAL replay, since that would probably result in the cache files
5579  * being out of sync with database reality. In theory we could leave them
5580  * in place if the database had been cleanly shut down, but it seems
5581  * safest to just remove them always and let them be rebuilt during the
5582  * first backend startup. These files needs to be removed from all
5583  * directories including pg_tblspc, however the symlinks are created only
5584  * after reading tablespace_map file in case of archive recovery from
5585  * backup, so needs to clear old relcache files here after creating
5586  * symlinks.
5587  */
5589 
5590  /*
5591  * Initialize replication slots, before there's a chance to remove
5592  * required resources.
5593  */
5595 
5596  /*
5597  * Startup logical state, needs to be setup now so we have proper data
5598  * during crash recovery.
5599  */
5601 
5602  /*
5603  * Startup CLOG. This must be done after TransamVariables->nextXid has
5604  * been initialized and before we accept connections or begin WAL replay.
5605  */
5606  StartupCLOG();
5607 
5608  /*
5609  * Startup MultiXact. We need to do this early to be able to replay
5610  * truncations.
5611  */
5612  StartupMultiXact();
5613 
5614  /*
5615  * Ditto for commit timestamps. Activate the facility if the setting is
5616  * enabled in the control file, as there should be no tracking of commit
5617  * timestamps done when the setting was disabled. This facility can be
5618  * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5619  */
5621  StartupCommitTs();
5622 
5623  /*
5624  * Recover knowledge about replay progress of known replication partners.
5625  */
5627 
5628  /*
5629  * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5630  * control file. On recovery, all unlogged relations are blown away, so
5631  * the unlogged LSN counter can be reset too.
5632  */
5636  else
5639 
5640  /*
5641  * Copy any missing timeline history files between 'now' and the recovery
5642  * target timeline from archive to pg_wal. While we don't need those files
5643  * ourselves - the history file of the recovery target timeline covers all
5644  * the previous timelines in the history too - a cascading standby server
5645  * might be interested in them. Or, if you archive the WAL from this
5646  * server to a different archive than the primary, it'd be good for all
5647  * the history files to get archived there after failover, so that you can
5648  * use one of the old timelines as a PITR target. Timeline history files
5649  * are small, so it's better to copy them unnecessarily than not copy them
5650  * and regret later.
5651  */
5653 
5654  /*
5655  * Before running in recovery, scan pg_twophase and fill in its status to
5656  * be able to work on entries generated by redo. Doing a scan before
5657  * taking any recovery action has the merit to discard any 2PC files that
5658  * are newer than the first record to replay, saving from any conflicts at
5659  * replay. This avoids as well any subsequent scans when doing recovery
5660  * of the on-disk two-phase data.
5661  */
5663 
5664  /*
5665  * When starting with crash recovery, reset pgstat data - it might not be
5666  * valid. Otherwise restore pgstat data. It's safe to do this here,
5667  * because postmaster will not yet have started any other processes.
5668  *
5669  * NB: Restoring replication slot stats relies on slot state to have
5670  * already been restored from disk.
5671  *
5672  * TODO: With a bit of extra work we could just start with a pgstat file
5673  * associated with the checkpoint redo location we're starting from.
5674  */
5675  if (didCrash)
5677  else
5678  pgstat_restore_stats(checkPoint.redo);
5679 
5680  lastFullPageWrites = checkPoint.fullPageWrites;
5681 
5684 
5685  /* REDO */
5686  if (InRecovery)
5687  {
5688  /* Initialize state for RecoveryInProgress() */
5690  if (InArchiveRecovery)
5692  else
5695 
5696  /*
5697  * Update pg_control to show that we are recovering and to show the
5698  * selected checkpoint as the place we are starting from. We also mark
5699  * pg_control with any minimum recovery stop point obtained from a
5700  * backup history file.
5701  *
5702  * No need to hold ControlFileLock yet, we aren't up far enough.
5703  */
5705 
5706  /*
5707  * If there was a backup label file, it's done its job and the info
5708  * has now been propagated into pg_control. We must get rid of the
5709  * label file so that if we crash during recovery, we'll pick up at
5710  * the latest recovery restartpoint instead of going all the way back
5711  * to the backup start point. It seems prudent though to just rename
5712  * the file out of the way rather than delete it completely.
5713  */
5714  if (haveBackupLabel)
5715  {
5716  unlink(BACKUP_LABEL_OLD);
5718  }
5719 
5720  /*
5721  * If there was a tablespace_map file, it's done its job and the
5722  * symlinks have been created. We must get rid of the map file so
5723  * that if we crash during recovery, we don't create symlinks again.
5724  * It seems prudent though to just rename the file out of the way
5725  * rather than delete it completely.
5726  */
5727  if (haveTblspcMap)
5728  {
5729  unlink(TABLESPACE_MAP_OLD);
5731  }
5732 
5733  /*
5734  * Initialize our local copy of minRecoveryPoint. When doing crash
5735  * recovery we want to replay up to the end of WAL. Particularly, in
5736  * the case of a promoted standby minRecoveryPoint value in the
5737  * control file is only updated after the first checkpoint. However,
5738  * if the instance crashes before the first post-recovery checkpoint
5739  * is completed then recovery will use a stale location causing the
5740  * startup process to think that there are still invalid page
5741  * references when checking for data consistency.
5742  */
5743  if (InArchiveRecovery)
5744  {
5747  }
5748  else
5749  {
5752  }
5753 
5754  /* Check that the GUCs used to generate the WAL allow recovery */
5756 
5757  /*
5758  * We're in recovery, so unlogged relations may be trashed and must be
5759  * reset. This should be done BEFORE allowing Hot Standby
5760  * connections, so that read-only backends don't try to read whatever
5761  * garbage is left over from before.
5762  */
5764 
5765  /*
5766  * Likewise, delete any saved transaction snapshot files that got left
5767  * behind by crashed backends.
5768  */
5770 
5771  /*
5772  * Initialize for Hot Standby, if enabled. We won't let backends in
5773  * yet, not until we've reached the min recovery point specified in
5774  * control file and we've established a recovery snapshot from a
5775  * running-xacts WAL record.
5776  */
5778  {
5779  TransactionId *xids;
5780  int nxids;
5781 
5782  ereport(DEBUG1,
5783  (errmsg_internal("initializing for hot standby")));
5784 
5786 
5787  if (wasShutdown)
5788  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5789  else
5790  oldestActiveXID = checkPoint.oldestActiveXid;
5791  Assert(TransactionIdIsValid(oldestActiveXID));
5792 
5793  /* Tell procarray about the range of xids it has to deal with */
5795 
5796  /*
5797  * Startup subtrans only. CLOG, MultiXact and commit timestamp
5798  * have already been started up and other SLRUs are not maintained
5799  * during recovery and need not be started yet.
5800  */
5801  StartupSUBTRANS(oldestActiveXID);
5802 
5803  /*
5804  * If we're beginning at a shutdown checkpoint, we know that
5805  * nothing was running on the primary at this point. So fake-up an
5806  * empty running-xacts record and use that here and now. Recover
5807  * additional standby state for prepared transactions.
5808  */
5809  if (wasShutdown)
5810  {
5811  RunningTransactionsData running;
5812  TransactionId latestCompletedXid;
5813 
5814  /* Update pg_subtrans entries for any prepared transactions */
5816 
5817  /*
5818  * Construct a RunningTransactions snapshot representing a
5819  * shut down server, with only prepared transactions still
5820  * alive. We're never overflowed at this point because all
5821  * subxids are listed with their parent prepared transactions.
5822  */
5823  running.xcnt = nxids;
5824  running.subxcnt = 0;
5826  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5827  running.oldestRunningXid = oldestActiveXID;
5828  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5829  TransactionIdRetreat(latestCompletedXid);
5830  Assert(TransactionIdIsNormal(latestCompletedXid));
5831  running.latestCompletedXid = latestCompletedXid;
5832  running.xids = xids;
5833 
5834  ProcArrayApplyRecoveryInfo(&running);
5835  }
5836  }
5837 
5838  /*
5839  * We're all set for replaying the WAL now. Do it.
5840  */
5842  performedWalRecovery = true;
5843  }
5844  else
5845  performedWalRecovery = false;
5846 
5847  /*
5848  * Finish WAL recovery.
5849  */
5850  endOfRecoveryInfo = FinishWalRecovery();
5851  EndOfLog = endOfRecoveryInfo->endOfLog;
5852  EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5853  abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5854  missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5855 
5856  /*
5857  * Reset ps status display, so as no information related to recovery shows
5858  * up.
5859  */
5860  set_ps_display("");
5861 
5862  /*
5863  * When recovering from a backup (we are in recovery, and archive recovery
5864  * was requested), complain if we did not roll forward far enough to reach
5865  * the point where the database is consistent. For regular online
5866  * backup-from-primary, that means reaching the end-of-backup WAL record
5867  * (at which point we reset backupStartPoint to be Invalid), for
5868  * backup-from-replica (which can't inject records into the WAL stream),
5869  * that point is when we reach the minRecoveryPoint in pg_control (which
5870  * we purposefully copy last when backing up from a replica). For
5871  * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5872  * or snapshot-style backups (which don't), backupEndRequired will be set
5873  * to false.
5874  *
5875  * Note: it is indeed okay to look at the local variable
5876  * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5877  * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5878  * been advanced beyond the WAL we processed.
5879  */
5880  if (InRecovery &&
5881  (EndOfLog < LocalMinRecoveryPoint ||
5883  {
5884  /*
5885  * Ran off end of WAL before reaching end-of-backup WAL record, or
5886  * minRecoveryPoint. That's a bad sign, indicating that you tried to
5887  * recover from an online backup but never called pg_backup_stop(), or
5888  * you didn't archive all the WAL needed.
5889  */
5891  {
5893  ereport(FATAL,
5894  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5895  errmsg("WAL ends before end of online backup"),
5896  errhint("All WAL generated while online backup was taken must be available at recovery.")));
5897  else
5898  ereport(FATAL,
5899  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5900  errmsg("WAL ends before consistent recovery point")));
5901  }
5902  }
5903 
5904  /*
5905  * Reset unlogged relations to the contents of their INIT fork. This is
5906  * done AFTER recovery is complete so as to include any unlogged relations
5907  * created during recovery, but BEFORE recovery is marked as having
5908  * completed successfully. Otherwise we'd not retry if any of the post
5909  * end-of-recovery steps fail.
5910  */
5911  if (InRecovery)
5913 
5914  /*
5915  * Pre-scan prepared transactions to find out the range of XIDs present.
5916  * This information is not quite needed yet, but it is positioned here so
5917  * as potential problems are detected before any on-disk change is done.
5918  */
5919  oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5920 
5921  /*
5922  * Allow ordinary WAL segment creation before possibly switching to a new
5923  * timeline, which creates a new segment, and after the last ReadRecord().
5924  */
5926 
5927  /*
5928  * Consider whether we need to assign a new timeline ID.
5929  *
5930  * If we did archive recovery, we always assign a new ID. This handles a
5931  * couple of issues. If we stopped short of the end of WAL during
5932  * recovery, then we are clearly generating a new timeline and must assign
5933  * it a unique new ID. Even if we ran to the end, modifying the current
5934  * last segment is problematic because it may result in trying to
5935  * overwrite an already-archived copy of that segment, and we encourage
5936  * DBAs to make their archive_commands reject that. We can dodge the
5937  * problem by making the new active segment have a new timeline ID.
5938  *
5939  * In a normal crash recovery, we can just extend the timeline we were in.
5940  */
5941  newTLI = endOfRecoveryInfo->lastRecTLI;
5943  {
5944  newTLI = findNewestTimeLine(recoveryTargetTLI) + 1;
5945  ereport(LOG,
5946  (errmsg("selected new timeline ID: %u", newTLI)));
5947 
5948  /*
5949  * Make a writable copy of the last WAL segment. (Note that we also
5950  * have a copy of the last block of the old WAL in
5951  * endOfRecovery->lastPage; we will use that below.)
5952  */
5953  XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
5954 
5955  /*
5956  * Remove the signal files out of the way, so that we don't
5957  * accidentally re-enter archive recovery mode in a subsequent crash.
5958  */
5959  if (endOfRecoveryInfo->standby_signal_file_found)
5961 
5962  if (endOfRecoveryInfo->recovery_signal_file_found)
5964 
5965  /*
5966  * Write the timeline history file, and have it archived. After this
5967  * point (or rather, as soon as the file is archived), the timeline
5968  * will appear as "taken" in the WAL archive and to any standby
5969  * servers. If we crash before actually switching to the new
5970  * timeline, standby servers will nevertheless think that we switched
5971  * to the new timeline, and will try to connect to the new timeline.
5972  * To minimize the window for that, try to do as little as possible
5973  * between here and writing the end-of-recovery record.
5974  */
5976  EndOfLog, endOfRecoveryInfo->recoveryStopReason);
5977 
5978  ereport(LOG,
5979  (errmsg("archive recovery complete")));
5980  }
5981 
5982  /* Save the selected TimeLineID in shared memory, too */
5984  XLogCtl->InsertTimeLineID = newTLI;
5985  XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
5987 
5988  /*
5989  * Actually, if WAL ended in an incomplete record, skip the parts that
5990  * made it through and start writing after the portion that persisted.
5991  * (It's critical to first write an OVERWRITE_CONTRECORD message, which
5992  * we'll do as soon as we're open for writing new WAL.)
5993  */
5995  {
5996  /*
5997  * We should only have a missingContrecPtr if we're not switching to a
5998  * new timeline. When a timeline switch occurs, WAL is copied from the
5999  * old timeline to the new only up to the end of the last complete
6000  * record, so there can't be an incomplete WAL record that we need to
6001  * disregard.
6002  */
6003  Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6005  EndOfLog = missingContrecPtr;
6006  }
6007 
6008  /*
6009  * Prepare to write WAL starting at EndOfLog location, and init xlog
6010  * buffer cache using the block containing the last record from the
6011  * previous incarnation.
6012  */
6013  Insert = &XLogCtl->Insert;
6014  Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
6015  Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6016 
6017  /*
6018  * Tricky point here: lastPage contains the *last* block that the LastRec
6019  * record spans, not the one it starts in. The last block is indeed the
6020  * one we want to use.
6021  */
6022  if (EndOfLog % XLOG_BLCKSZ != 0)
6023  {
6024  char *page;
6025  int len;
6026  int firstIdx;
6027 
6028  firstIdx = XLogRecPtrToBufIdx(EndOfLog);
6029  len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6030  Assert(len < XLOG_BLCKSZ);
6031 
6032  /* Copy the valid part of the last block, and zero the rest */
6033  page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6034  memcpy(page, endOfRecoveryInfo->lastPage, len);
6035  memset(page + len, 0, XLOG_BLCKSZ - len);
6036 
6037  pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6038  XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
6039  }
6040  else
6041  {
6042  /*
6043  * There is no partial block to copy. Just set InitializedUpTo, and
6044  * let the first attempt to insert a log record to initialize the next
6045  * buffer.
6046  */
6047  XLogCtl->InitializedUpTo = EndOfLog;
6048  }
6049 
6050  /*
6051  * Update local and shared status. This is OK to do without any locks
6052  * because no other process can be reading or writing WAL yet.
6053  */
6054  LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6058  XLogCtl->LogwrtRqst.Write = EndOfLog;
6059  XLogCtl->LogwrtRqst.Flush = EndOfLog;
6060 
6061  /*
6062  * Preallocate additional log files, if wanted.
6063  */
6064  PreallocXlogFiles(EndOfLog, newTLI);
6065 
6066  /*
6067  * Okay, we're officially UP.
6068  */
6069  InRecovery = false;
6070 
6071  /* start the archive_timeout timer and LSN running */
6072  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6073  XLogCtl->lastSegSwitchLSN = EndOfLog;
6074 
6075  /* also initialize latestCompletedXid, to nextXid - 1 */
6076  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6079  LWLockRelease(ProcArrayLock);
6080 
6081  /*
6082  * Start up subtrans, if not already done for hot standby. (commit
6083  * timestamps are started below, if necessary.)
6084  */
6086  StartupSUBTRANS(oldestActiveXID);
6087 
6088  /*
6089  * Perform end of recovery actions for any SLRUs that need it.
6090  */
6091  TrimCLOG();
6092  TrimMultiXact();
6093 
6094  /*
6095  * Reload shared-memory state for prepared transactions. This needs to
6096  * happen before renaming the last partial segment of the old timeline as
6097  * it may be possible that we have to recovery some transactions from it.
6098  */
6100 
6101  /* Shut down xlogreader */
6103 
6104  /* Enable WAL writes for this backend only. */
6106 
6107  /* If necessary, write overwrite-contrecord before doing anything else */
6109  {
6112  }
6113 
6114  /*
6115  * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6116  * record before resource manager writes cleanup WAL records or checkpoint
6117  * record is written.
6118  */
6119  Insert->fullPageWrites = lastFullPageWrites;
6121 
6122  /*
6123  * Emit checkpoint or end-of-recovery record in XLOG, if required.
6124  */
6125  if (performedWalRecovery)
6126  promoted = PerformRecoveryXLogAction();
6127 
6128  /*
6129  * If any of the critical GUCs have changed, log them before we allow
6130  * backends to write WAL.
6131  */
6133 
6134  /* If this is archive recovery, perform post-recovery cleanup actions. */
6136  CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6137 
6138  /*
6139  * Local WAL inserts enabled, so it's time to finish initialization of
6140  * commit timestamp.
6141  */
6143 
6144  /*
6145  * All done with end-of-recovery actions.
6146  *
6147  * Now allow backends to write WAL and update the control file status in
6148  * consequence. SharedRecoveryState, that controls if backends can write
6149  * WAL, is updated while holding ControlFileLock to prevent other backends
6150  * to look at an inconsistent state of the control file in shared memory.
6151  * There is still a small window during which backends can write WAL and
6152  * the control file is still referring to a system not in DB_IN_PRODUCTION
6153  * state while looking at the on-disk control file.
6154  *
6155  * Also, we use info_lck to update SharedRecoveryState to ensure that
6156  * there are no race conditions concerning visibility of other recent
6157  * updates to shared memory.
6158  */
6159  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6161 
6165 
6167  LWLockRelease(ControlFileLock);
6168 
6169  /*
6170  * Wake up all waiters for replay LSN. They need to report an error that
6171  * recovery was ended before reaching the target LSN.
6172  */
6174 
6175  /*
6176  * Shutdown the recovery environment. This must occur after
6177  * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6178  * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6179  * any session building a snapshot will not rely on KnownAssignedXids as
6180  * RecoveryInProgress() would return false at this stage. This is
6181  * particularly critical for prepared 2PC transactions, that would still
6182  * need to be included in snapshots once recovery has ended.
6183  */
6186 
6187  /*
6188  * If there were cascading standby servers connected to us, nudge any wal
6189  * sender processes to notice that we've been promoted.
6190  */
6191  WalSndWakeup(true, true);
6192 
6193  /*
6194  * If this was a promotion, request an (online) checkpoint now. This isn't
6195  * required for consistency, but the last restartpoint might be far back,
6196  * and in case of a crash, recovering from it might take a longer than is
6197  * appropriate now that we're not in standby mode anymore.
6198  */
6199  if (promoted)
6201 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:494
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:643
void StartupCLOG(void)
Definition: clog.c:877
void TrimCLOG(void)
Definition: clog.c:892
void StartupCommitTs(void)
Definition: commit_ts.c:632
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:642
#define FATAL
Definition: elog.h:41
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:782
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:872
void SyncDataDirectory(void)
Definition: fd.c:3568
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:451
void TrimMultiXact(void)
Definition: multixact.c:2178
void StartupMultiXact(void)
Definition: multixact.c:2153
void StartupReplicationOrigin(void)
Definition: origin.c:703
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:95
const void size_t len
void pgstat_restore_stats(XLogRecPtr redo)
Definition: pgstat.c:477
void pgstat_discard_stats(void)
Definition: pgstat.c:489
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6810
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:1924
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1567
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:94
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:160
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
bool track_commit_timestamp
Definition: pg_control.h:185
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:111
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:120
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:121
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:109
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
FullTransactionId latestCompletedXid
Definition: transam.h:238
XLogRecPtr InitializedUpTo
Definition: xlog.c:484
char * pages
Definition: xlog.c:491
pg_time_t lastSegSwitchTime
Definition: xlog.c:466
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:467
pg_atomic_uint64 * xlblocks
Definition: xlog.c:492
pg_atomic_uint64 logWriteResult
Definition: xlog.c:471
pg_atomic_uint64 logFlushResult
Definition: xlog.c:472
pg_atomic_uint64 logInsertResult
Definition: xlog.c:470
XLogRecPtr Flush
Definition: xlog.c:321
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:309
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2090
void restoreTwoPhaseData(void)
Definition: twophase.c:1905
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1969
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2049
void WaitLSNSetLatches(XLogRecPtr currentLSN)
Definition: waitlsn.c:155
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3638
void UpdateFullPageWrites(void)
Definition: xlog.c:8165
static char * str_time(pg_time_t tnow)
Definition: xlog.c:5192
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4092
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7439
static void XLogReportParameters(void)
Definition: xlog.c:8102
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6283
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5282
static bool lastFullPageWrites
Definition: xlog.c:216
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1940
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5207
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5378
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:590
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3825
#define TABLESPACE_MAP_OLD
Definition: xlog.h:306
#define TABLESPACE_MAP
Definition: xlog.h:305
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:301
#define BACKUP_LABEL_OLD
Definition: xlog.h:303
#define BACKUP_LABEL_FILE
Definition: xlog.h:302
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:300
#define XRecOffIsValid(xlrp)
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36
void ShutdownWalRecovery(void)
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:138
bool InArchiveRecovery
Definition: xlogrecovery.c:139
void PerformWalRecovery(void)
EndOfWalRecoveryInfo * FinishWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:374
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:373
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:513
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:123
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WaitLSNSetLatches(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6208 of file xlog.c.

6209 {
6210  /* initialize minRecoveryPoint to this record */
6211  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6213  if (ControlFile->minRecoveryPoint < EndRecPtr)
6214  {
6215  ControlFile->minRecoveryPoint = EndRecPtr;
6216  ControlFile->minRecoveryPointTLI = replayTLI;
6217  }
6218  /* update local copy */
6221 
6222  /*
6223  * The startup process can update its local copy of minRecoveryPoint from
6224  * this point.
6225  */
6226  updateMinRecoveryPoint = true;
6227 
6229 
6230  /*
6231  * We update SharedRecoveryState while holding the lock on ControlFileLock
6232  * so both states are consistent in shared memory.
6233  */
6237 
6238  LWLockRelease(ControlFileLock);
6239 }
static bool updateMinRecoveryPoint
Definition: xlog.c:647

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8165 of file xlog.c.

8166 {
8168  bool recoveryInProgress;
8169 
8170  /*
8171  * Do nothing if full_page_writes has not been changed.
8172  *
8173  * It's safe to check the shared full_page_writes without the lock,
8174  * because we assume that there is no concurrently running process which
8175  * can update it.
8176  */
8177  if (fullPageWrites == Insert->fullPageWrites)
8178  return;
8179 
8180  /*
8181  * Perform this outside critical section so that the WAL insert
8182  * initialization done by RecoveryInProgress() doesn't trigger an
8183  * assertion failure.
8184  */
8185  recoveryInProgress = RecoveryInProgress();
8186 
8188 
8189  /*
8190  * It's always safe to take full page images, even when not strictly
8191  * required, but not the other round. So if we're setting full_page_writes
8192  * to true, first set it true and then write the WAL record. If we're
8193  * setting it to false, first write the WAL record and then set the global
8194  * flag.
8195  */
8196  if (fullPageWrites)
8197  {
8199  Insert->fullPageWrites = true;
8201  }
8202 
8203  /*
8204  * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8205  * full_page_writes during archive recovery, if required.
8206  */
8207  if (XLogStandbyInfoActive() && !recoveryInProgress)
8208  {
8209  XLogBeginInsert();
8210  XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
8211 
8212  XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8213  }
8214 
8215  if (!fullPageWrites)
8216  {
8218  Insert->fullPageWrites = false;
8220  }
8221  END_CRIT_SECTION();
8222 }
#define XLOG_FPW_CHANGE
Definition: pg_control.h:76

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1747 of file xlog.c.

1749 {
1750  char *pdst = dstbuf;
1751  XLogRecPtr recptr = startptr;
1752  XLogRecPtr inserted;
1753  Size nbytes = count;
1754 
1755  if (RecoveryInProgress() || tli != GetWALInsertionTimeLine())
1756  return 0;
1757 
1758  Assert(!XLogRecPtrIsInvalid(startptr));
1759 
1760  /*
1761  * Caller should ensure that the requested data has been inserted into WAL
1762  * buffers before we try to read it.
1763  */
1765  if (startptr + count > inserted)
1766  ereport(ERROR,
1767  errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
1768  LSN_FORMAT_ARGS(startptr + count),
1769  LSN_FORMAT_ARGS(inserted)));
1770 
1771  /*
1772  * Loop through the buffers without a lock. For each buffer, atomically
1773  * read and verify the end pointer, then copy the data out, and finally
1774  * re-read and re-verify the end pointer.
1775  *
1776  * Once a page is evicted, it never returns to the WAL buffers, so if the
1777  * end pointer matches the expected end pointer before and after we copy
1778  * the data, then the right page must have been present during the data
1779  * copy. Read barriers are necessary to ensure that the data copy actually
1780  * happens between the two verification steps.
1781  *
1782  * If either verification fails, we simply terminate the loop and return
1783  * with the data that had been already copied out successfully.
1784  */
1785  while (nbytes > 0)
1786  {
1787  uint32 offset = recptr % XLOG_BLCKSZ;
1788  int idx = XLogRecPtrToBufIdx(recptr);
1789  XLogRecPtr expectedEndPtr;
1790  XLogRecPtr endptr;
1791  const char *page;
1792  const char *psrc;
1793  Size npagebytes;
1794 
1795  /*
1796  * Calculate the end pointer we expect in the xlblocks array if the
1797  * correct page is present.
1798  */
1799  expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1800 
1801  /*
1802  * First verification step: check that the correct page is present in
1803  * the WAL buffers.
1804  */
1805  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1806  if (expectedEndPtr != endptr)
1807  break;
1808 
1809  /*
1810  * The correct page is present (or was at the time the endptr was
1811  * read; must re-verify later). Calculate pointer to source data and
1812  * determine how much data to read from this page.
1813  */
1814  page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1815  psrc = page + offset;
1816  npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1817 
1818  /*
1819  * Ensure that the data copy and the first verification step are not
1820  * reordered.
1821  */
1822  pg_read_barrier();
1823 
1824  /* data copy */
1825  memcpy(pdst, psrc, npagebytes);
1826 
1827  /*
1828  * Ensure that the data copy and the second verification step are not
1829  * reordered.
1830  */
1831  pg_read_barrier();
1832 
1833  /*
1834  * Second verification step: check that the page we read from wasn't
1835  * evicted while we were copying the data.
1836  */
1837  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1838  if (expectedEndPtr != endptr)
1839  break;
1840 
1841  pdst += npagebytes;
1842  recptr += npagebytes;
1843  nbytes -= npagebytes;
1844  }
1845 
1846  Assert(pdst - dstbuf <= count);
1847 
1848  return pdst - dstbuf;
1849 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define pg_read_barrier()
Definition: atomics.h:156
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
#define Min(x, y)
Definition: c.h:995
size_t Size
Definition: c.h:596
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6519

References Assert, ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ xlog_desc()

void xlog_desc ( StringInfo  buf,
struct XLogReaderState record 
)

Definition at line 58 of file xlogdesc.c.

59 {
60  char *rec = XLogRecGetData(record);
61  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
62 
63  if (info == XLOG_CHECKPOINT_SHUTDOWN ||
64  info == XLOG_CHECKPOINT_ONLINE)
65  {
66  CheckPoint *checkpoint = (CheckPoint *) rec;
67 
68  appendStringInfo(buf, "redo %X/%X; "
69  "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
70  "oldest xid %u in DB %u; oldest multi %u in DB %u; "
71  "oldest/newest commit timestamp xid: %u/%u; "
72  "oldest running xid %u; %s",
73  LSN_FORMAT_ARGS(checkpoint->redo),
74  checkpoint->ThisTimeLineID,
75  checkpoint->PrevTimeLineID,
76  checkpoint->fullPageWrites ? "true" : "false",
77  get_wal_level_string(checkpoint->wal_level),
79  XidFromFullTransactionId(checkpoint->nextXid),
80  checkpoint->nextOid,
81  checkpoint->nextMulti,
82  checkpoint->nextMultiOffset,
83  checkpoint->oldestXid,
84  checkpoint->oldestXidDB,
85  checkpoint->oldestMulti,
86  checkpoint->oldestMultiDB,
87  checkpoint->oldestCommitTsXid,
88  checkpoint->newestCommitTsXid,
89  checkpoint->oldestActiveXid,
90  (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
91  }
92  else if (info == XLOG_NEXTOID)
93  {
94  Oid nextOid;
95 
96  memcpy(&nextOid, rec, sizeof(Oid));
97  appendStringInfo(buf, "%u", nextOid);
98  }
99  else if (info == XLOG_RESTORE_POINT)
100  {
101  xl_restore_point *xlrec = (xl_restore_point *) rec;
102 
104  }
105  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
106  {
107  /* no further information to print */
108  }
109  else if (info == XLOG_BACKUP_END)
110  {
111  XLogRecPtr startpoint;
112 
113  memcpy(&startpoint, rec, sizeof(XLogRecPtr));
114  appendStringInfo(buf, "%X/%X", LSN_FORMAT_ARGS(startpoint));
115  }
116  else if (info == XLOG_PARAMETER_CHANGE)
117  {
118  xl_parameter_change xlrec;
119  const char *wal_level_str;
120 
121  memcpy(&xlrec, rec, sizeof(xl_parameter_change));
123 
124  appendStringInfo(buf, "max_connections=%d max_worker_processes=%d "
125  "max_wal_senders=%d max_prepared_xacts=%d "
126  "max_locks_per_xact=%d wal_level=%s "
127  "wal_log_hints=%s track_commit_timestamp=%s",
128  xlrec.MaxConnections,
129  xlrec.max_worker_processes,
130  xlrec.max_wal_senders,
131  xlrec.max_prepared_xacts,
132  xlrec.max_locks_per_xact,
134  xlrec.wal_log_hints ? "on" : "off",
135  xlrec.track_commit_timestamp ? "on" : "off");
136  }
137  else if (info == XLOG_FPW_CHANGE)
138  {
139  bool fpw;
140 
141  memcpy(&fpw, rec, sizeof(bool));
142  appendStringInfoString(buf, fpw ? "true" : "false");
143  }
144  else if (info == XLOG_END_OF_RECOVERY)
145  {
146  xl_end_of_recovery xlrec;
147 
148  memcpy(&xlrec, rec, sizeof(xl_end_of_recovery));
149  appendStringInfo(buf, "tli %u; prev tli %u; time %s; wal_level %s",
150  xlrec.ThisTimeLineID, xlrec.PrevTimeLineID,
153  }
154  else if (info == XLOG_OVERWRITE_CONTRECORD)
155  {
157 
158  memcpy(&xlrec, rec, sizeof(xl_overwrite_contrecord));
159  appendStringInfo(buf, "lsn %X/%X; time %s",
162  }
163  else if (info == XLOG_CHECKPOINT_REDO)
164  {
165  int wal_level;
166 
167  memcpy(&wal_level, rec, sizeof(int));
169  }
170 }
static const char * wal_level_str(WalLevel wal_level)
unsigned char uint8
Definition: c.h:504
#define XLOG_RESTORE_POINT
Definition: pg_control.h:75
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:81
#define XLOG_FPI
Definition: pg_control.h:79
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:78
#define XLOG_NEXTOID
Definition: pg_control.h:71
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:74
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:77
static char * buf
Definition: pg_test_fsync.c:73
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID
char rp_name[MAXFNAMELEN]
#define EpochFromFullTransactionId(x)
Definition: transam.h:47
static const char * get_wal_level_string(int wal_level)
Definition: xlogdesc.c:40
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLR_INFO_MASK
Definition: xlogrecord.h:62

References appendStringInfo(), appendStringInfoString(), buf, xl_end_of_recovery::end_time, EpochFromFullTransactionId, CheckPoint::fullPageWrites, get_wal_level_string(), LSN_FORMAT_ARGS, xl_parameter_change::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, xl_parameter_change::max_wal_senders, xl_parameter_change::max_worker_processes, xl_parameter_change::MaxConnections, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, CheckPoint::nextXid, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, xl_end_of_recovery::PrevTimeLineID, CheckPoint::PrevTimeLineID, CheckPoint::redo, xl_restore_point::rp_name, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, timestamptz_to_str(), xl_parameter_change::track_commit_timestamp, wal_level, xl_parameter_change::wal_level, xl_end_of_recovery::wal_level, CheckPoint::wal_level, wal_level_str(), xl_parameter_change::wal_log_hints, XidFromFullTransactionId, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLogRecGetData, XLogRecGetInfo, and XLR_INFO_MASK.

◆ xlog_identify()

const char* xlog_identify ( uint8  info)

Definition at line 173 of file xlogdesc.c.

174 {
175  const char *id = NULL;
176 
177  switch (info & ~XLR_INFO_MASK)
178  {
180  id = "CHECKPOINT_SHUTDOWN";
181  break;
183  id = "CHECKPOINT_ONLINE";
184  break;
185  case XLOG_NOOP:
186  id = "NOOP";
187  break;
188  case XLOG_NEXTOID:
189  id = "NEXTOID";
190  break;
191  case XLOG_SWITCH:
192  id = "SWITCH";
193  break;
194  case XLOG_BACKUP_END:
195  id = "BACKUP_END";
196  break;
198  id = "PARAMETER_CHANGE";
199  break;
200  case XLOG_RESTORE_POINT:
201  id = "RESTORE_POINT";
202  break;
203  case XLOG_FPW_CHANGE:
204  id = "FPW_CHANGE";
205  break;
207  id = "END_OF_RECOVERY";
208  break;
210  id = "OVERWRITE_CONTRECORD";
211  break;
212  case XLOG_FPI:
213  id = "FPI";
214  break;
215  case XLOG_FPI_FOR_HINT:
216  id = "FPI_FOR_HINT";
217  break;
219  id = "CHECKPOINT_REDO";
220  break;
221  }
222 
223  return id;
224 }
#define XLOG_NOOP
Definition: pg_control.h:70
#define XLOG_SWITCH
Definition: pg_control.h:72

References XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, and XLR_INFO_MASK.

◆ xlog_redo()

void xlog_redo ( struct XLogReaderState record)

Definition at line 8234 of file xlog.c.

8235 {
8236  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8237  XLogRecPtr lsn = record->EndRecPtr;
8238 
8239  /*
8240  * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8241  * XLOG_FPI_FOR_HINT records.
8242  */
8243  Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8244  !XLogRecHasAnyBlockRefs(record));
8245 
8246  if (info == XLOG_NEXTOID)
8247  {
8248  Oid nextOid;
8249 
8250  /*
8251  * We used to try to take the maximum of TransamVariables->nextOid and
8252  * the recorded nextOid, but that fails if the OID counter wraps
8253  * around. Since no OID allocation should be happening during replay
8254  * anyway, better to just believe the record exactly. We still take
8255  * OidGenLock while setting the variable, just in case.
8256  */
8257  memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8258  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8259  TransamVariables->nextOid = nextOid;
8261  LWLockRelease(OidGenLock);
8262  }
8263  else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8264  {
8265  CheckPoint checkPoint;
8266  TimeLineID replayTLI;
8267 
8268  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8269  /* In a SHUTDOWN checkpoint, believe the counters exactly */
8270  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8271  TransamVariables->nextXid = checkPoint.nextXid;
8272  LWLockRelease(XidGenLock);
8273  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8274  TransamVariables->nextOid = checkPoint.nextOid;
8276  LWLockRelease(OidGenLock);
8277  MultiXactSetNextMXact(checkPoint.nextMulti,
8278  checkPoint.nextMultiOffset);
8279 
8281  checkPoint.oldestMultiDB);
8282 
8283  /*
8284  * No need to set oldestClogXid here as well; it'll be set when we
8285  * redo an xl_clog_truncate if it changed since initialization.
8286  */
8287  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8288 
8289  /*
8290  * If we see a shutdown checkpoint while waiting for an end-of-backup
8291  * record, the backup was canceled and the end-of-backup record will
8292  * never arrive.
8293  */
8297  ereport(PANIC,
8298  (errmsg("online backup was canceled, recovery cannot continue")));
8299 
8300  /*
8301  * If we see a shutdown checkpoint, we know that nothing was running
8302  * on the primary at this point. So fake-up an empty running-xacts
8303  * record and use that here and now. Recover additional standby state
8304  * for prepared transactions.
8305  */
8307  {
8308  TransactionId *xids;
8309  int nxids;
8310  TransactionId oldestActiveXID;
8311  TransactionId latestCompletedXid;
8312  RunningTransactionsData running;
8313 
8314  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8315 
8316  /* Update pg_subtrans entries for any prepared transactions */
8318 
8319  /*
8320  * Construct a RunningTransactions snapshot representing a shut
8321  * down server, with only prepared transactions still alive. We're
8322  * never overflowed at this point because all subxids are listed
8323  * with their parent prepared transactions.
8324  */
8325  running.xcnt = nxids;
8326  running.subxcnt = 0;
8328  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8329  running.oldestRunningXid = oldestActiveXID;
8330  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8331  TransactionIdRetreat(latestCompletedXid);
8332  Assert(TransactionIdIsNormal(latestCompletedXid));
8333  running.latestCompletedXid = latestCompletedXid;
8334  running.xids = xids;
8335 
8336  ProcArrayApplyRecoveryInfo(&running);
8337  }
8338 
8339  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8340  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8341  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8342  LWLockRelease(ControlFileLock);
8343 
8344  /* Update shared-memory copy of checkpoint XID/epoch */
8346  XLogCtl->ckptFullXid = checkPoint.nextXid;
8348 
8349  /*
8350  * We should've already switched to the new TLI before replaying this
8351  * record.
8352  */
8353  (void) GetCurrentReplayRecPtr(&replayTLI);
8354  if (checkPoint.ThisTimeLineID != replayTLI)
8355  ereport(PANIC,
8356  (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8357  checkPoint.ThisTimeLineID, replayTLI)));
8358 
8359  RecoveryRestartPoint(&checkPoint, record);
8360  }
8361  else if (info == XLOG_CHECKPOINT_ONLINE)
8362  {
8363  CheckPoint checkPoint;
8364  TimeLineID replayTLI;
8365 
8366  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8367  /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8368  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8370  checkPoint.nextXid))
8371  TransamVariables->nextXid = checkPoint.nextXid;
8372  LWLockRelease(XidGenLock);
8373 
8374  /*
8375  * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8376  * to track OID assignment through XLOG_NEXTOID records. The nextOid
8377  * counter is from the start of the checkpoint and might well be stale
8378  * compared to later XLOG_NEXTOID records. We could try to take the
8379  * maximum of the nextOid counter and our latest value, but since
8380  * there's no particular guarantee about the speed with which the OID
8381  * counter wraps around, that's a risky thing to do. In any case,
8382  * users of the nextOid counter are required to avoid assignment of
8383  * duplicates, so that a somewhat out-of-date value should be safe.
8384  */
8385 
8386  /* Handle multixact */
8388  checkPoint.nextMultiOffset);
8389 
8390  /*
8391  * NB: This may perform multixact truncation when replaying WAL
8392  * generated by an older primary.
8393  */
8395  checkPoint.oldestMultiDB);
8397  checkPoint.oldestXid))
8398  SetTransactionIdLimit(checkPoint.oldestXid,
8399  checkPoint.oldestXidDB);
8400  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8401  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8402  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8403  LWLockRelease(ControlFileLock);
8404 
8405  /* Update shared-memory copy of checkpoint XID/epoch */
8407  XLogCtl->ckptFullXid = checkPoint.nextXid;
8409 
8410  /* TLI should not change in an on-line checkpoint */
8411  (void) GetCurrentReplayRecPtr(&replayTLI);
8412  if (checkPoint.ThisTimeLineID != replayTLI)
8413  ereport(PANIC,
8414  (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8415  checkPoint.ThisTimeLineID, replayTLI)));
8416 
8417  RecoveryRestartPoint(&checkPoint, record);
8418  }
8419  else if (info == XLOG_OVERWRITE_CONTRECORD)
8420  {
8421  /* nothing to do here, handled in xlogrecovery_redo() */
8422  }
8423  else if (info == XLOG_END_OF_RECOVERY)
8424  {
8425  xl_end_of_recovery xlrec;
8426  TimeLineID replayTLI;
8427 
8428  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8429 
8430  /*
8431  * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8432  * but this case is rarer and harder to test, so the benefit doesn't
8433  * outweigh the potential extra cost of maintenance.
8434  */
8435 
8436  /*
8437  * We should've already switched to the new TLI before replaying this
8438  * record.
8439  */
8440  (void) GetCurrentReplayRecPtr(&replayTLI);
8441  if (xlrec.ThisTimeLineID != replayTLI)
8442  ereport(PANIC,
8443  (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8444  xlrec.ThisTimeLineID, replayTLI)));
8445  }
8446  else if (info == XLOG_NOOP)
8447  {
8448  /* nothing to do here */
8449  }
8450  else if (info == XLOG_SWITCH)
8451  {
8452  /* nothing to do here */
8453  }
8454  else if (info == XLOG_RESTORE_POINT)
8455  {
8456  /* nothing to do here, handled in xlogrecovery.c */
8457  }
8458  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8459  {
8460  /*
8461  * XLOG_FPI records contain nothing else but one or more block
8462  * references. Every block reference must include a full-page image
8463  * even if full_page_writes was disabled when the record was generated
8464  * - otherwise there would be no point in this record.
8465  *
8466  * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8467  * WAL-logged because of a hint bit update. They are only generated
8468  * when checksums and/or wal_log_hints are enabled. They may include
8469  * no full-page images if full_page_writes was disabled when they were
8470  * generated. In this case there is nothing to do here.
8471  *
8472  * No recovery conflicts are generated by these generic records - if a
8473  * resource manager needs to generate conflicts, it has to define a
8474  * separate WAL record type and redo routine.
8475  */
8476  for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8477  {
8478  Buffer buffer;
8479 
8480  if (!XLogRecHasBlockImage(record, block_id))
8481  {
8482  if (info == XLOG_FPI)
8483  elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8484  continue;
8485  }
8486 
8487  if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8488  elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8489  UnlockReleaseBuffer(buffer);
8490  }
8491  }
8492  else if (info == XLOG_BACKUP_END)
8493  {
8494  /* nothing to do here, handled in xlogrecovery_redo() */
8495  }
8496  else if (info == XLOG_PARAMETER_CHANGE)
8497  {
8498  xl_parameter_change xlrec;
8499 
8500  /* Update our copy of the parameters in pg_control */
8501  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8502 
8503  /*
8504  * Invalidate logical slots if we are in hot standby and the primary
8505  * does not have a WAL level sufficient for logical decoding. No need
8506  * to search for potentially conflicting logically slots if standby is
8507  * running with wal_level lower than logical, because in that case, we
8508  * would have either disallowed creation of logical slots or
8509  * invalidated existing ones.
8510  */
8511  if (InRecovery && InHotStandby &&
8512  xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8515  0, InvalidOid,
8517 
8518  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8524  ControlFile->wal_level = xlrec.wal_level;
8526 
8527  /*
8528  * Update minRecoveryPoint to ensure that if recovery is aborted, we
8529  * recover back up to this point before allowing hot standby again.
8530  * This is important if the max_* settings are decreased, to ensure
8531  * you don't run queries against the WAL preceding the change. The
8532  * local copies cannot be updated as long as crash recovery is
8533  * happening and we expect all the WAL to be replayed.
8534  */
8535  if (InArchiveRecovery)
8536  {
8539  }
8541  {
8542  TimeLineID replayTLI;
8543 
8544  (void) GetCurrentReplayRecPtr(&replayTLI);
8546  ControlFile->minRecoveryPointTLI = replayTLI;
8547  }
8548 
8552 
8554  LWLockRelease(ControlFileLock);
8555 
8556  /* Check to see if any parameter change gives a problem on recovery */
8558  }
8559  else if (info == XLOG_FPW_CHANGE)
8560  {
8561  bool fpw;
8562 
8563  memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8564 
8565  /*
8566  * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8567  * do_pg_backup_start() and do_pg_backup_stop() can check whether
8568  * full_page_writes has been disabled during online backup.
8569  */
8570  if (!fpw)
8571  {
8573  if (XLogCtl->lastFpwDisableRecPtr < record->ReadRecPtr)
8576  }
8577 
8578  /* Keep track of full_page_writes */
8579  lastFullPageWrites = fpw;
8580  }
8581  else if (info == XLOG_CHECKPOINT_REDO)
8582  {
8583  /* nothing to do here, just for informational purposes */
8584  }
8585 }
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:664
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2536
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2511
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:58
int max_worker_processes
Definition: pg_control.h:181
int max_locks_per_xact
Definition: pg_control.h:184
int max_prepared_xacts
Definition: pg_control.h:183
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7549
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60
@ BLK_RESTORED
Definition: xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, XLogRecPtrIsInvalid, and XLR_INFO_MASK.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2983 of file xlog.c.

2984 {
2985  XLogwrtRqst WriteRqst;
2986  bool flexible = true;
2987  static TimestampTz lastflush;
2988  TimestampTz now;
2989  int flushblocks;
2990  TimeLineID insertTLI;
2991 
2992  /* XLOG doesn't need flushing during recovery */
2993  if (RecoveryInProgress())
2994  return false;
2995 
2996  /*
2997  * Since we're not in recovery, InsertTimeLineID is set and can't change,
2998  * so we can read it without a lock.
2999  */
3000  insertTLI = XLogCtl->InsertTimeLineID;
3001 
3002  /* read updated LogwrtRqst */
3004  WriteRqst = XLogCtl->LogwrtRqst;
3006 
3007  /* back off to last completed page boundary */
3008  WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3009 
3010  /* if we have already flushed that far, consider async commit records */
3012  if (WriteRqst.Write <= LogwrtResult.Flush)
3013  {
3015  WriteRqst.Write = XLogCtl->asyncXactLSN;
3017  flexible = false; /* ensure it all gets written */
3018  }
3019 
3020  /*
3021  * If already known flushed, we're done. Just need to check if we are
3022  * holding an open file handle to a logfile that's no longer in use,
3023  * preventing the file from being deleted.
3024  */
3025  if (WriteRqst.Write <= LogwrtResult.Flush)
3026  {
3027  if (openLogFile >= 0)
3028  {
3031  {
3032  XLogFileClose();
3033  }
3034  }
3035  return false;
3036  }
3037 
3038  /*
3039  * Determine how far to flush WAL, based on the wal_writer_delay and
3040  * wal_writer_flush_after GUCs.
3041  *
3042  * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3043  * wal_writer_flush_after, to decide when to wake us up. Make sure the
3044  * logic is the same in both places if you change this.
3045  */
3047  flushblocks =
3048  WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3049 
3050  if (WalWriterFlushAfter == 0 || lastflush == 0)
3051  {
3052  /* first call, or block based limits disabled */
3053  WriteRqst.Flush = WriteRqst.Write;
3054  lastflush = now;
3055  }
3056  else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3057  {
3058  /*
3059  * Flush the writes at least every WalWriterDelay ms. This is
3060  * important to bound the amount of time it takes for an asynchronous
3061  * commit to hit disk.
3062  */
3063  WriteRqst.Flush = WriteRqst.Write;
3064  lastflush = now;
3065  }
3066  else if (flushblocks >= WalWriterFlushAfter)
3067  {
3068  /* exceeded wal_writer_flush_after blocks, flush */
3069  WriteRqst.Flush = WriteRqst.Write;
3070  lastflush = now;
3071  }
3072  else
3073  {
3074  /* no flushing, this time round */
3075  WriteRqst.Flush = 0;
3076  }
3077 
3078 #ifdef WAL_DEBUG
3079  if (XLOG_DEBUG)
3080  elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3081  LSN_FORMAT_ARGS(WriteRqst.Write),
3082  LSN_FORMAT_ARGS(WriteRqst.Flush),
3085 #endif
3086 
3088 
3089  /* now wait for any in-progress insertions to finish and get write lock */
3090  WaitXLogInsertionsToFinish(WriteRqst.Write);
3091  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3093  if (WriteRqst.Write > LogwrtResult.Write ||
3094  WriteRqst.Flush > LogwrtResult.Flush)
3095  {
3096  XLogWrite(WriteRqst, insertTLI, flexible);
3097  }
3098  LWLockRelease(WALWriteLock);
3099 
3100  END_CRIT_SECTION();
3101 
3102  /* wake up walsenders now that we've released heavily contended locks */
3104 
3105  /*
3106  * Great, done. To take some work off the critical path, try to initialize
3107  * as many of the no-longer-needed WAL buffers for future use as we can.
3108  */
3109  AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3110 
3111  /*
3112  * If we determined that we need to write data, but somebody else
3113  * wrote/flushed already, it should be considered as being active, to
3114  * avoid hibernating too early.
3115  */
3116  return true;
3117 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1780
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
XLogRecPtr asyncXactLSN
Definition: xlog.c:457
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:65
int WalWriterFlushAfter
Definition: walwriter.c:72
int WalWriterDelay
Definition: walwriter.c:71
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1503
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1984
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2313
static void XLogFileClose(void)
Definition: xlog.c:3632
static XLogSegNo openLogSegNo
Definition: xlog.c:635
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2289 of file xlog.c.

2290 {
2291  XLogSegNo old_segno;
2292 
2294 
2295  if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2296  return true;
2297  return false;
2298 }
int CheckPointSegments
Definition: xlog.c:155

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3373 of file xlog.c.

3374 {
3375  bool ignore_added;
3376  char path[MAXPGPATH];
3377  int fd;
3378 
3379  Assert(logtli != 0);
3380 
3381  fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3382  if (fd >= 0)
3383  return fd;
3384 
3385  /* Now open original target segment (might not be file I just made) */
3386  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3388  if (fd < 0)
3389  ereport(ERROR,
3391  errmsg("could not open file \"%s\": %m", path)));
3392  return fd;
3393 }
#define PG_BINARY
Definition: c.h:1264
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1087
#define O_CLOEXEC
Definition: win32_port.h:359
static int get_sync_bit(int method)
Definition: xlog.c:8592
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3203

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3611 of file xlog.c.

3612 {
3613  char path[MAXPGPATH];
3614  int fd;
3615 
3616  XLogFilePath(path, tli, segno, wal_segment_size);
3617 
3618  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3620  if (fd < 0)
3621  ereport(PANIC,
3623  errmsg("could not open file \"%s\": %m", path)));
3624 
3625  return fd;
3626 }
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2795 of file xlog.c.

2796 {
2797  XLogRecPtr WriteRqstPtr;
2798  XLogwrtRqst WriteRqst;
2799  TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2800 
2801  /*
2802  * During REDO, we are reading not writing WAL. Therefore, instead of
2803  * trying to flush the WAL, we should update minRecoveryPoint instead. We
2804  * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2805  * to act this way too, and because when it tries to write the
2806  * end-of-recovery checkpoint, it should indeed flush.
2807  */
2808  if (!XLogInsertAllowed())
2809  {
2810  UpdateMinRecoveryPoint(record, false);
2811  return;
2812  }
2813 
2814  /* Quick exit if already known flushed */
2815  if (record <= LogwrtResult.Flush)
2816  return;
2817 
2818 #ifdef WAL_DEBUG
2819  if (XLOG_DEBUG)
2820  elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
2821  LSN_FORMAT_ARGS(record),
2824 #endif
2825 
2827 
2828  /*
2829  * Since fsync is usually a horribly expensive operation, we try to
2830  * piggyback as much data as we can on each fsync: if we see any more data
2831  * entered into the xlog buffer, we'll write and fsync that too, so that
2832  * the final value of LogwrtResult.Flush is as large as possible. This
2833  * gives us some chance of avoiding another fsync immediately after.
2834  */
2835 
2836  /* initialize to given target; may increase below */
2837  WriteRqstPtr = record;
2838 
2839  /*
2840  * Now wait until we get the write lock, or someone else does the flush
2841  * for us.
2842  */
2843  for (;;)
2844  {
2845  XLogRecPtr insertpos;
2846 
2847  /* done already? */
2849  if (record <= LogwrtResult.Flush)
2850  break;
2851 
2852  /*
2853  * Before actually performing the write, wait for all in-flight
2854  * insertions to the pages we're about to write to finish.
2855  */
2857  if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2858  WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2860  insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2861 
2862  /*
2863  * Try to get the write lock. If we can't get it immediately, wait
2864  * until it's released, and recheck if we still need to do the flush
2865  * or if the backend that held the lock did it for us already. This
2866  * helps to maintain a good rate of group committing when the system
2867  * is bottlenecked by the speed of fsyncing.
2868  */
2869  if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2870  {
2871  /*
2872  * The lock is now free, but we didn't acquire it yet. Before we
2873  * do, loop back to check if someone else flushed the record for
2874  * us already.
2875  */
2876  continue;
2877  }
2878 
2879  /* Got the lock; recheck whether request is satisfied */
2881  if (record <= LogwrtResult.Flush)
2882  {
2883  LWLockRelease(WALWriteLock);
2884  break;
2885  }
2886 
2887  /*
2888  * Sleep before flush! By adding a delay here, we may give further
2889  * backends the opportunity to join the backlog of group commit
2890  * followers; this can significantly improve transaction throughput,
2891  * at the risk of increasing transaction latency.
2892  *
2893  * We do not sleep if enableFsync is not turned on, nor if there are
2894  * fewer than CommitSiblings other backends with active transactions.
2895  */
2896  if (CommitDelay > 0 && enableFsync &&
2898  {
2900 
2901  /*
2902  * Re-check how far we can now flush the WAL. It's generally not
2903  * safe to call WaitXLogInsertionsToFinish while holding
2904  * WALWriteLock, because an in-progress insertion might need to
2905  * also grab WALWriteLock to make progress. But we know that all
2906  * the insertions up to insertpos have already finished, because
2907  * that's what the earlier WaitXLogInsertionsToFinish() returned.
2908  * We're only calling it again to allow insertpos to be moved
2909  * further forward, not to actually wait for anyone.
2910  */
2911  insertpos = WaitXLogInsertionsToFinish(insertpos);
2912  }
2913 
2914  /* try to write/flush later additions to XLOG as well */
2915  WriteRqst.Write = insertpos;
2916  WriteRqst.Flush = insertpos;
2917 
2918  XLogWrite(WriteRqst, insertTLI, false);
2919 
2920  LWLockRelease(WALWriteLock);
2921  /* done */
2922  break;
2923  }
2924 
2925  END_CRIT_SECTION();
2926 
2927  /* wake up walsenders now that we've released heavily contended locks */
2929 
2930  /*
2931  * If we still haven't flushed to the request point then we have a
2932  * problem; most likely, the requested flush point is past end of XLOG.
2933  * This has been seen to occur when a disk page has a corrupted LSN.
2934  *
2935  * Formerly we treated this as a PANIC condition, but that hurts the
2936  * system's robustness rather than helping it: we do not want to take down
2937  * the whole system due to corruption on one data page. In particular, if
2938  * the bad page is encountered again during recovery then we would be
2939  * unable to restart the database at all! (This scenario actually
2940  * happened in the field several times with 7.1 releases.) As of 8.4, bad
2941  * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2942  * the only time we can reach here during recovery is while flushing the
2943  * end-of-recovery checkpoint record, and we don't expect that to have a
2944  * bad LSN.
2945  *
2946  * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2947  * since xact.c calls this routine inside a critical section. However,
2948  * calls from bufmgr.c are not within critical sections and so we will not
2949  * force a restart for a bad LSN on a data page.
2950  */
2951  if (LogwrtResult.Flush < record)
2952  elog(ERROR,
2953  "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2954  LSN_FORMAT_ARGS(record),
2956 }
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1396
bool MinimumActiveBackends(int min)
Definition: procarray.c:3550
int CommitDelay
Definition: xlog.c:131
int CommitSiblings
Definition: xlog.c:132
bool XLogInsertAllowed(void)
Definition: xlog.c:6388

References CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FlushBuffer(), LogLogicalMessage(), pg_attribute_noreturn(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3751 of file xlog.c.

3752 {
3753  XLogSegNo lastRemovedSegNo;
3754 
3756  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3758 
3759  return lastRemovedSegNo;
3760 }

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3767 of file xlog.c.

3768 {
3769  DIR *xldir;
3770  struct dirent *xlde;
3771  XLogSegNo oldest_segno = 0;
3772 
3773  xldir = AllocateDir(XLOGDIR);
3774  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3775  {
3776  TimeLineID file_tli;
3777  XLogSegNo file_segno;
3778 
3779  /* Ignore files that are not XLOG segments. */
3780  if (!IsXLogFileName(xlde->d_name))
3781  continue;
3782 
3783  /* Parse filename to get TLI and segno. */
3784  XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3786 
3787  /* Ignore anything that's not from the TLI of interest. */
3788  if (tli != file_tli)
3789  continue;
3790 
3791  /* If it's the oldest so far, update oldest_segno. */
3792  if (oldest_segno == 0 || file_segno < oldest_segno)
3793  oldest_segno = file_segno;
3794  }
3795 
3796  FreeDir(xldir);
3797  return oldest_segno;
3798 }
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6388 of file xlog.c.

6389 {
6390  /*
6391  * If value is "unconditionally true" or "unconditionally false", just
6392  * return it. This provides the normal fast path once recovery is known
6393  * done.
6394  */
6395  if (LocalXLogInsertAllowed >= 0)
6396  return (bool) LocalXLogInsertAllowed;
6397 
6398  /*
6399  * Else, must check to see if we're still in recovery.
6400  */
6401  if (RecoveryInProgress())
6402  return false;
6403 
6404  /*
6405  * On exit from recovery, reset to "unconditionally true", since there is
6406  * no need to keep checking.
6407  */
6409  return true;
6410 }

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), and XLogInsertRecord().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( struct XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 747 of file xlog.c.

752 {
754  pg_crc32c rdata_crc;
755  bool inserted;
756  XLogRecord *rechdr = (XLogRecord *) rdata->data;
757  uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
759  XLogRecPtr StartPos;
760  XLogRecPtr EndPos;
761  bool prevDoPageWrites = doPageWrites;
762  TimeLineID insertTLI;
763 
764  /* Does this record type require special handling? */
765  if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
766  {
767  if (info == XLOG_SWITCH)
768  class = WALINSERT_SPECIAL_SWITCH;
769  else if (info == XLOG_CHECKPOINT_REDO)
771  }
772 
773  /* we assume that all of the record header is in the first chunk */
774  Assert(rdata->len >= SizeOfXLogRecord);
775 
776  /* cross-check on whether we should be here or not */
777  if (!XLogInsertAllowed())
778  elog(ERROR, "cannot make new WAL entries during recovery");
779 
780  /*
781  * Given that we're not in recovery, InsertTimeLineID is set and can't
782  * change, so we can read it without a lock.
783  */
784  insertTLI = XLogCtl->InsertTimeLineID;
785 
786  /*----------
787  *
788  * We have now done all the preparatory work we can without holding a
789  * lock or modifying shared state. From here on, inserting the new WAL
790  * record to the shared WAL buffer cache is a two-step process:
791  *
792  * 1. Reserve the right amount of space from the WAL. The current head of
793  * reserved space is kept in Insert->CurrBytePos, and is protected by
794  * insertpos_lck.
795  *
796  * 2. Copy the record to the reserved WAL space. This involves finding the
797  * correct WAL buffer containing the reserved space, and copying the
798  * record in place. This can be done concurrently in multiple processes.
799  *
800  * To keep track of which insertions are still in-progress, each concurrent
801  * inserter acquires an insertion lock. In addition to just indicating that
802  * an insertion is in progress, the lock tells others how far the inserter
803  * has progressed. There is a small fixed number of insertion locks,
804  * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
805  * boundary, it updates the value stored in the lock to the how far it has
806  * inserted, to allow the previous buffer to be flushed.
807  *
808  * Holding onto an insertion lock also protects RedoRecPtr and
809  * fullPageWrites from changing until the insertion is finished.
810  *
811  * Step 2 can usually be done completely in parallel. If the required WAL
812  * page is not initialized yet, you have to grab WALBufMappingLock to
813  * initialize it, but the WAL writer tries to do that ahead of insertions
814  * to avoid that from happening in the critical path.
815  *
816  *----------
817  */
819 
820  if (likely(class == WALINSERT_NORMAL))
821  {
823 
824  /*
825  * Check to see if my copy of RedoRecPtr is out of date. If so, may
826  * have to go back and have the caller recompute everything. This can
827  * only happen just after a checkpoint, so it's better to be slow in
828  * this case and fast otherwise.
829  *
830  * Also check to see if fullPageWrites was just turned on or there's a
831  * running backup (which forces full-page writes); if we weren't
832  * already doing full-page writes then go back and recompute.
833  *
834  * If we aren't doing full-page writes then RedoRecPtr doesn't
835  * actually affect the contents of the XLOG record, so we'll update
836  * our local copy but not force a recomputation. (If doPageWrites was
837  * just turned off, we could recompute the record without full pages,
838  * but we choose not to bother.)
839  */
840  if (RedoRecPtr != Insert->RedoRecPtr)
841  {
842  Assert(RedoRecPtr < Insert->RedoRecPtr);
843  RedoRecPtr = Insert->RedoRecPtr;
844  }
845  doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
846 
847  if (doPageWrites &&
848  (!prevDoPageWrites ||
849  (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
850  {
851  /*
852  * Oops, some buffer now needs to be backed up that the caller
853  * didn't back up. Start over.
854  */
857  return InvalidXLogRecPtr;
858  }
859 
860  /*
861  * Reserve space for the record in the WAL. This also sets the xl_prev
862  * pointer.
863  */
864  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
865  &rechdr->xl_prev);
866 
867  /* Normal records are always inserted. */
868  inserted = true;
869  }
870  else if (class == WALINSERT_SPECIAL_SWITCH)
871  {
872  /*
873  * In order to insert an XLOG_SWITCH record, we need to hold all of
874  * the WAL insertion locks, not just one, so that no one else can
875  * begin inserting a record until we've figured out how much space
876  * remains in the current WAL segment and claimed all of it.
877  *
878  * Nonetheless, this case is simpler than the normal cases handled
879  * below, which must check for changes in doPageWrites and RedoRecPtr.
880  * Those checks are only needed for records that can contain buffer
881  * references, and an XLOG_SWITCH record never does.
882  */
883  Assert(fpw_lsn == InvalidXLogRecPtr);
885  inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
886  }
887  else
888  {
890 
891  /*
892  * We need to update both the local and shared copies of RedoRecPtr,
893  * which means that we need to hold all the WAL insertion locks.
894  * However, there can't be any buffer references, so as above, we need
895  * not check RedoRecPtr before inserting the record; we just need to
896  * update it afterwards.
897  */
898  Assert(fpw_lsn == InvalidXLogRecPtr);
900  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
901  &rechdr->xl_prev);
902  RedoRecPtr = Insert->RedoRecPtr = StartPos;
903  inserted = true;
904  }
905 
906  if (inserted)
907  {
908  /*
909  * Now that xl_prev has been filled in, calculate CRC of the record
910  * header.
911  */
912  rdata_crc = rechdr->xl_crc;
913  COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
914  FIN_CRC32C(rdata_crc);
915  rechdr->xl_crc = rdata_crc;
916 
917  /*
918  * All the record data, including the header, is now ready to be
919  * inserted. Copy the record in the space reserved.
920  */
922  class == WALINSERT_SPECIAL_SWITCH, rdata,
923  StartPos, EndPos, insertTLI);
924 
925  /*
926  * Unless record is flagged as not important, update LSN of last
927  * important record in the current slot. When holding all locks, just
928  * update the first one.
929  */
930  if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
931  {
932  int lockno = holdingAllLocks ? 0 : MyLockNo;
933 
934  WALInsertLocks[lockno].l.lastImportantAt = StartPos;
935  }
936  }
937  else
938  {
939  /*
940  * This was an xlog-switch record, but the current insert location was
941  * already exactly at the beginning of a segment, so there was no need
942  * to do anything.
943  */
944  }
945 
946  /*
947  * Done! Let others know that we're finished.
948  */
950 
952 
954 
955  /*
956  * Mark top transaction id is logged (if needed) so that we should not try
957  * to log it again with the next WAL record in the current subtransaction.
958  */
959  if (topxid_included)
961 
962  /*
963  * Update shared LogwrtRqst.Write, if we crossed page boundary.
964  */
965  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
966  {
968  /* advance global request to include new block(s) */
969  if (XLogCtl->LogwrtRqst.Write < EndPos)
970  XLogCtl->LogwrtRqst.Write = EndPos;
973  }
974 
975  /*
976  * If this was an XLOG_SWITCH record, flush the record and the empty
977  * padding space that fills the rest of the segment, and perform
978  * end-of-segment actions (eg, notifying archiver).
979  */
980  if (class == WALINSERT_SPECIAL_SWITCH)
981  {
982  TRACE_POSTGRESQL_WAL_SWITCH();
983  XLogFlush(EndPos);
984 
985  /*
986  * Even though we reserved the rest of the segment for us, which is
987  * reflected in EndPos, we return a pointer to just the end of the
988  * xlog-switch record.
989  */
990  if (inserted)
991  {
992  EndPos = StartPos + SizeOfXLogRecord;
993  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
994  {
995  uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
996 
997  if (offset == EndPos % XLOG_BLCKSZ)
998  EndPos += SizeOfXLogLongPHD;
999  else
1000  EndPos += SizeOfXLogShortPHD;
1001  }
1002  }
1003  }
1004 
1005 #ifdef WAL_DEBUG
1006  if (XLOG_DEBUG)
1007  {
1008  static XLogReaderState *debug_reader = NULL;
1009  XLogRecord *record;
1010  DecodedXLogRecord *decoded;
1012  StringInfoData recordBuf;
1013  char *errormsg = NULL;
1014  MemoryContext oldCxt;
1015 
1016  oldCxt = MemoryContextSwitchTo(walDebugCxt);
1017 
1018  initStringInfo(&buf);
1019  appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
1020 
1021  /*
1022  * We have to piece together the WAL record data from the XLogRecData
1023  * entries, so that we can pass it to the rm_desc function as one
1024  * contiguous chunk.
1025  */
1026  initStringInfo(&recordBuf);
1027  for (; rdata != NULL; rdata = rdata->next)
1028  appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1029 
1030  /* We also need temporary space to decode the record. */
1031  record = (XLogRecord *) recordBuf.data;
1032  decoded = (DecodedXLogRecord *)
1034 
1035  if (!debug_reader)
1036  debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1037  XL_ROUTINE(.page_read = NULL,
1038  .segment_open = NULL,
1039  .segment_close = NULL),
1040  NULL);
1041  if (!debug_reader)
1042  {
1043  appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1044  }
1045  else if (!DecodeXLogRecord(debug_reader,
1046  decoded,
1047  record,
1048  EndPos,
1049  &errormsg))
1050  {
1051  appendStringInfo(&buf, "error decoding record: %s",
1052  errormsg ? errormsg : "no error message");
1053  }
1054  else
1055  {
1056  appendStringInfoString(&buf, " - ");
1057 
1058  debug_reader->record = decoded;
1059  xlog_outdesc(&buf, debug_reader);
1060  debug_reader->record = NULL;
1061  }
1062  elog(LOG, "%s", buf.data);
1063 
1064  pfree(decoded);
1065  pfree(buf.data);
1066  pfree(recordBuf.data);
1067  MemoryContextSwitchTo(oldCxt);
1068  }
1069 #endif
1070 
1071  /*
1072  * Update our global variables
1073  */
1074  ProcLastRecPtr = StartPos;
1075  XactLastRecEnd = EndPos;
1076 
1077  /* Report WAL traffic to the instrumentation. */
1078  if (inserted)
1079  {
1080  pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1082  pgWalUsage.wal_fpi += num_fpi;
1083  }
1084 
1085  return EndPos;
1086 }
#define likely(x)
Definition: c.h:313
#define unlikely(x)
Definition: c.h:314
WalUsage pgWalUsage
Definition: instrument.c:22
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
MemoryContextSwitchTo(old_ctx)
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
const char * data
struct XLogRecData * next
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:590
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:540
XLogRecPtr XactLastRecEnd
Definition: xlog.c:253
static void WALInsertLockAcquire(void)
Definition: xlog.c:1370
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1224
static bool holdingAllLocks
Definition: xlog.c:651
static int MyLockNo
Definition: xlog.c:650
WalInsertClass
Definition: xlog.c:559
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:561
@ WALINSERT_NORMAL
Definition: xlog.c:560
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:562
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1107
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1163
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:106
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1662
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1629
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3126 of file xlog.c.

3127 {
3128  /*
3129  * During recovery, we don't flush WAL but update minRecoveryPoint
3130  * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3131  * would need to be updated.
3132  */
3133  if (RecoveryInProgress())
3134  {
3135  /*
3136  * An invalid minRecoveryPoint means that we need to recover all the
3137  * WAL, i.e., we're doing crash recovery. We never modify the control
3138  * file's value in that case, so we can short-circuit future checks
3139  * here too. This triggers a quick exit path for the startup process,
3140  * which cannot update its local copy of minRecoveryPoint as long as
3141  * it has not replayed all WAL available when doing crash recovery.
3142  */
3144  updateMinRecoveryPoint = false;
3145 
3146  /* Quick exit if already known to be updated or cannot be updated */
3148  return false;
3149 
3150  /*
3151  * Update local copy of minRecoveryPoint. But if the lock is busy,
3152  * just return a conservative guess.
3153  */
3154  if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3155  return true;
3158  LWLockRelease(ControlFileLock);
3159 
3160  /*
3161  * Check minRecoveryPoint for any other process than the startup
3162  * process doing crash recovery, which should not update the control
3163  * file value if crash recovery is still running.
3164  */
3166  updateMinRecoveryPoint = false;
3167 
3168  /* check again */
3170  return false;
3171  else
3172  return true;
3173  }
3174 
3175  /* Quick exit if already known flushed */
3176  if (record <= LogwrtResult.Flush)
3177  return false;
3178 
3179  /* read LogwrtResult and update local state */
3181 
3182  /* check again */
3183  if (record <= LogwrtResult.Flush)
3184  return false;
3185 
3186  return true;
3187 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RecoveryInProgress(), RefreshXLogWriteResult, updateMinRecoveryPoint, and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), and SetHintBits().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8022 of file xlog.c.

8023 {
8024  XLogBeginInsert();
8025  XLogRegisterData((char *) (&nextOid), sizeof(Oid));
8026  (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
8027 
8028  /*
8029  * We need not flush the NEXTOID record immediately, because any of the
8030  * just-allocated OIDs could only reach disk as part of a tuple insert or
8031  * update that would have its own XLOG record that must follow the NEXTOID
8032  * record. Therefore, the standard buffer LSN interlock applied to those
8033  * records will ensure no such OID reaches disk before the NEXTOID record
8034  * does.
8035  *
8036  * Note, however, that the above statement only covers state "within" the
8037  * database. When we use a generated OID as a file or directory name, we
8038  * are in a sense violating the basic WAL rule, because that filesystem
8039  * change may reach disk before the NEXTOID WAL record does. The impact
8040  * of this is that if a database crash occurs immediately afterward, we
8041  * might after restart re-generate the same OID and find that it conflicts
8042  * with the leftover file or directory. But since for safety's sake we
8043  * always loop until finding a nonconflicting filename, this poses no real
8044  * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8045  */
8046 }

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 8077 of file xlog.c.

8078 {
8079  XLogRecPtr RecPtr;
8080  xl_restore_point xlrec;
8081 
8082  xlrec.rp_time = GetCurrentTimestamp();
8083  strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8084 
8085  XLogBeginInsert();
8086  XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
8087 
8088  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8089 
8090  ereport(LOG,
8091  (errmsg("restore point \"%s\" created at %X/%X",
8092  rpName, LSN_FORMAT_ARGS(RecPtr))));
8093 
8094  return RecPtr;
8095 }
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2630 of file xlog.c.

2631 {
2632  XLogRecPtr WriteRqstPtr = asyncXactLSN;
2633  bool sleeping;
2634  bool wakeup = false;
2635  XLogRecPtr prevAsyncXactLSN;
2636 
2638  sleeping = XLogCtl->WalWriterSleeping;
2639  prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2640  if (XLogCtl->asyncXactLSN < asyncXactLSN)
2641  XLogCtl->asyncXactLSN = asyncXactLSN;
2643 
2644  /*
2645  * If somebody else already called this function with a more aggressive
2646  * LSN, they will have done what we needed (and perhaps more).
2647  */
2648  if (asyncXactLSN <= prevAsyncXactLSN)
2649  return;
2650 
2651  /*
2652  * If the WALWriter is sleeping, kick it to make it come out of low-power
2653  * mode, so that this async commit will reach disk within the expected
2654  * amount of time. Otherwise, determine whether it has enough WAL
2655  * available to flush, the same way that XLogBackgroundFlush() does.
2656  */
2657  if (sleeping)
2658  wakeup = true;
2659  else
2660  {
2661  int flushblocks;
2662 
2664 
2665  flushblocks =
2666  WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2667 
2668  if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2669  wakeup = true;
2670  }
2671 
2674 }
void SetLatch(Latch *latch)
Definition: latch.c:632
PROC_HDR * ProcGlobal
Definition: proc.c:79
Latch * walwriterLatch
Definition: proc.h:416
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:129

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, XLogCtlData::info_lck, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterLatch, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4911 of file xlog.c.

4912 {
4913  bool foundCFile,
4914  foundXLog;
4915  char *allocptr;
4916  int i;
4917  ControlFileData *localControlFile;
4918 
4919 #ifdef WAL_DEBUG
4920 
4921  /*
4922  * Create a memory context for WAL debugging that's exempt from the normal
4923  * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4924  * an allocation fails, but wal_debug is not for production use anyway.
4925  */
4926  if (walDebugCxt == NULL)
4927  {
4929  "WAL Debug",
4931  MemoryContextAllowInCriticalSection(walDebugCxt, true);
4932  }
4933 #endif
4934 
4935 
4936  XLogCtl = (XLogCtlData *)
4937  ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
4938 
4939  localControlFile = ControlFile;
4941  ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
4942 
4943  if (foundCFile || foundXLog)
4944  {
4945  /* both should be present or neither */
4946  Assert(foundCFile && foundXLog);
4947 
4948  /* Initialize local copy of WALInsertLocks */
4950 
4951  if (localControlFile)
4952  pfree(localControlFile);
4953  return;
4954  }
4955  memset(XLogCtl, 0, sizeof(XLogCtlData));
4956 
4957  /*
4958  * Already have read control file locally, unless in bootstrap mode. Move
4959  * contents into shared memory.
4960  */
4961  if (localControlFile)
4962  {
4963  memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
4964  pfree(localControlFile);
4965  }
4966 
4967  /*
4968  * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
4969  * multiple of the alignment for same, so no extra alignment padding is
4970  * needed here.
4971  */
4972  allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
4973  XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
4974  allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
4975 
4976  for (i = 0; i < XLOGbuffers; i++)
4977  {
4979  }
4980 
4981  /* WAL insertion locks. Ensure they're aligned to the full padded size */
4982  allocptr += sizeof(WALInsertLockPadded) -
4983  ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
4985  (WALInsertLockPadded *) allocptr;
4986  allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
4987 
4988  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
4989  {
4993  }
4994 
4995  /*
4996  * Align the start of the page buffers to a full xlog block size boundary.
4997  * This simplifies some calculations in XLOG insertion. It is also
4998  * required for O_DIRECT.
4999  */
5000  allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
5001  XLogCtl->pages = allocptr;
5002  memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
5003 
5004  /*
5005  * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5006  * in additional info.)
5007  */
5011  XLogCtl->WalWriterSleeping = false;
5012 
5019 }
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:453
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:186
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:694
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:57
int XLogCacheBlck
Definition: xlog.c:493
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:443
slock_t insertpos_lck
Definition: xlog.c:397
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4861
int XLOGbuffers
Definition: xlog.c:116
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), LWTRANCHE_WAL_INSERT, MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4861 of file xlog.c.

4862 {
4863  Size size;
4864 
4865  /*
4866  * If the value of wal_buffers is -1, use the preferred auto-tune value.
4867  * This isn't an amazingly clean place to do this, but we must wait till
4868  * NBuffers has received its final value, and must do it before using the
4869  * value of XLOGbuffers to do anything important.
4870  *
4871  * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4872  * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4873  * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4874  * the matter with PGC_S_OVERRIDE.
4875  */
4876  if (XLOGbuffers == -1)
4877  {
4878  char buf[32];
4879 
4880  snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4881  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4883  if (XLOGbuffers == -1) /* failed to apply it? */
4884  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4885  PGC_S_OVERRIDE);
4886  }
4887  Assert(XLOGbuffers > 0);
4888 
4889  /* XLogCtl */
4890  size = sizeof(XLogCtlData);
4891 
4892  /* WAL insertion locks, plus alignment */
4894  /* xlblocks array */
4896  /* extra alignment padding for XLOG I/O buffers */
4897  size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4898  /* and the buffers themselves */
4899  size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4900 
4901  /*
4902  * Note: we don't count ControlFileData, it comes out of the "slop factor"
4903  * added by CreateSharedMemoryAndSemaphores. This lets us use this
4904  * routine again below to compute the actual allocation size.
4905  */
4906 
4907  return size;
4908 }
#define Max(x, y)
Definition: c.h:989
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4290
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:110
@ PGC_S_OVERRIDE
Definition: guc.h:119
@ PGC_POSTMASTER
Definition: guc.h:70
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4614

References add_size(), Assert, buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), size, snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9472 of file xlog.c.

9473 {
9474  ShutdownWalRcv();
9475 
9476  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9478  LWLockRelease(ControlFileLock);
9479 }
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

Variable Documentation

◆ CheckPointSegments

PGDLLIMPORT int CheckPointSegments
extern

◆ CheckpointStats

◆ CommitDelay

PGDLLIMPORT int CommitDelay
extern

Definition at line 131 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

PGDLLIMPORT int CommitSiblings
extern

Definition at line 132 of file xlog.c.

Referenced by XLogFlush().

◆ EnableHotStandby

◆ fullPageWrites

PGDLLIMPORT bool fullPageWrites
extern

Definition at line 121 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ log_checkpoints

PGDLLIMPORT bool log_checkpoints
extern

◆ max_slot_wal_keep_size_mb

PGDLLIMPORT int max_slot_wal_keep_size_mb
extern

Definition at line 134 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

PGDLLIMPORT int max_wal_size_mb
extern

◆ min_wal_size_mb

PGDLLIMPORT int min_wal_size_mb
extern

Definition at line 114 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ ProcLastRecPtr

PGDLLIMPORT XLogRecPtr ProcLastRecPtr
extern

◆ track_wal_io_timing

PGDLLIMPORT bool track_wal_io_timing
extern

Definition at line 136 of file xlog.c.

Referenced by issue_xlog_fsync(), and XLogWrite().

◆ wal_compression

PGDLLIMPORT int wal_compression
extern

Definition at line 123 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

PGDLLIMPORT bool* wal_consistency_checking
extern

Definition at line 125 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

PGDLLIMPORT char* wal_consistency_checking_string
extern

Definition at line 124 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

PGDLLIMPORT int wal_decode_buffer_size
extern

Definition at line 135 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

PGDLLIMPORT bool wal_init_zero
extern

Definition at line 126 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

PGDLLIMPORT int wal_keep_size_mb
extern

Definition at line 115 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

PGDLLIMPORT bool wal_log_hints
extern

Definition at line 122 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

PGDLLIMPORT bool wal_recycle
extern

Definition at line 127 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

PGDLLIMPORT int wal_retrieve_retry_interval
extern

◆ wal_segment_size

PGDLLIMPORT int wal_segment_size
extern

Definition at line 142 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

PGDLLIMPORT int wal_sync_method
extern

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

PGDLLIMPORT char* XLogArchiveCommand
extern

◆ XLogArchiveMode

◆ XLogArchiveTimeout

PGDLLIMPORT int XLogArchiveTimeout
extern

Definition at line 117 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

PGDLLIMPORT int XLOGbuffers
extern

Definition at line 116 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().