PostgreSQL Source Code  git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
xlog.h File Reference
#include "access/xlogbackup.h"
#include "access/xlogdefs.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
Include dependency graph for xlog.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  CheckpointStatsData
 

Macros

#define XLogArchivingActive()    (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
 
#define XLogArchivingAlways()    (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
 
#define XLogIsNeeded()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogHintBitIsNeeded()   (DataChecksumsEnabled() || wal_log_hints)
 
#define XLogStandbyInfoActive()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogLogicalInfoActive()   (wal_level >= WAL_LEVEL_LOGICAL)
 
#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */
 
#define CHECKPOINT_END_OF_RECOVERY
 
#define CHECKPOINT_IMMEDIATE   0x0004 /* Do it without delays */
 
#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */
 
#define CHECKPOINT_FLUSH_ALL
 
#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */
 
#define CHECKPOINT_REQUESTED   0x0040 /* Checkpoint request has been made */
 
#define CHECKPOINT_CAUSE_XLOG   0x0080 /* XLOG consumption */
 
#define CHECKPOINT_CAUSE_TIME   0x0100 /* Elapsed time */
 
#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */
 
#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */
 
#define RECOVERY_SIGNAL_FILE   "recovery.signal"
 
#define STANDBY_SIGNAL_FILE   "standby.signal"
 
#define BACKUP_LABEL_FILE   "backup_label"
 
#define BACKUP_LABEL_OLD   "backup_label.old"
 
#define TABLESPACE_MAP   "tablespace_map"
 
#define TABLESPACE_MAP_OLD   "tablespace_map.old"
 
#define PROMOTE_SIGNAL_FILE   "promote"
 

Typedefs

typedef enum ArchiveMode ArchiveMode
 
typedef enum WalLevel WalLevel
 
typedef enum WalCompression WalCompression
 
typedef enum RecoveryState RecoveryState
 
typedef struct CheckpointStatsData CheckpointStatsData
 
typedef enum WALAvailability WALAvailability
 
typedef enum SessionBackupState SessionBackupState
 

Enumerations

enum  WalSyncMethod {
  WAL_SYNC_METHOD_FSYNC = 0 , WAL_SYNC_METHOD_FDATASYNC , WAL_SYNC_METHOD_OPEN , WAL_SYNC_METHOD_FSYNC_WRITETHROUGH ,
  WAL_SYNC_METHOD_OPEN_DSYNC
}
 
enum  ArchiveMode { ARCHIVE_MODE_OFF = 0 , ARCHIVE_MODE_ON , ARCHIVE_MODE_ALWAYS }
 
enum  WalLevel { WAL_LEVEL_MINIMAL = 0 , WAL_LEVEL_REPLICA , WAL_LEVEL_LOGICAL }
 
enum  WalCompression { WAL_COMPRESSION_NONE = 0 , WAL_COMPRESSION_PGLZ , WAL_COMPRESSION_LZ4 , WAL_COMPRESSION_ZSTD }
 
enum  RecoveryState { RECOVERY_STATE_CRASH = 0 , RECOVERY_STATE_ARCHIVE , RECOVERY_STATE_DONE }
 
enum  WALAvailability {
  WALAVAIL_INVALID_LSN , WALAVAIL_RESERVED , WALAVAIL_EXTENDED , WALAVAIL_UNRESERVED ,
  WALAVAIL_REMOVED
}
 
enum  SessionBackupState { SESSION_BACKUP_NONE , SESSION_BACKUP_RUNNING }
 

Functions

XLogRecPtr XLogInsertRecord (struct XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, bool topxid_included)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void xlog_redo (struct XLogReaderState *record)
 
void xlog_desc (StringInfo buf, struct XLogReaderState *record)
 
const char * xlog_identify (uint8 info)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
void InitializeWalConsistencyChecking (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
void StartupXLOG (void)
 
void ShutdownXLOG (int code, Datum arg)
 
bool CreateCheckPoint (int flags)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetRedoRecPtr (void)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
void SetWalWriterSleeping (bool sleeping)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
void SetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void XLogShutdownWalRcv (void)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
SessionBackupState get_backup_status (void)
 

Variables

PGDLLIMPORT int wal_sync_method
 
PGDLLIMPORT XLogRecPtr ProcLastRecPtr
 
PGDLLIMPORT XLogRecPtr XactLastRecEnd
 
PGDLLIMPORT XLogRecPtr XactLastCommitEnd
 
PGDLLIMPORT int wal_segment_size
 
PGDLLIMPORT int min_wal_size_mb
 
PGDLLIMPORT int max_wal_size_mb
 
PGDLLIMPORT int wal_keep_size_mb
 
PGDLLIMPORT int max_slot_wal_keep_size_mb
 
PGDLLIMPORT int XLOGbuffers
 
PGDLLIMPORT int XLogArchiveTimeout
 
PGDLLIMPORT int wal_retrieve_retry_interval
 
PGDLLIMPORT char * XLogArchiveCommand
 
PGDLLIMPORT bool EnableHotStandby
 
PGDLLIMPORT bool fullPageWrites
 
PGDLLIMPORT bool wal_log_hints
 
PGDLLIMPORT int wal_compression
 
PGDLLIMPORT bool wal_init_zero
 
PGDLLIMPORT bool wal_recycle
 
PGDLLIMPORT boolwal_consistency_checking
 
PGDLLIMPORT char * wal_consistency_checking_string
 
PGDLLIMPORT bool log_checkpoints
 
PGDLLIMPORT int CommitDelay
 
PGDLLIMPORT int CommitSiblings
 
PGDLLIMPORT bool track_wal_io_timing
 
PGDLLIMPORT int wal_decode_buffer_size
 
PGDLLIMPORT int CheckPointSegments
 
PGDLLIMPORT int XLogArchiveMode
 
PGDLLIMPORT int wal_level
 
PGDLLIMPORT CheckpointStatsData CheckpointStats
 

Macro Definition Documentation

◆ BACKUP_LABEL_FILE

#define BACKUP_LABEL_FILE   "backup_label"

Definition at line 302 of file xlog.h.

◆ BACKUP_LABEL_OLD

#define BACKUP_LABEL_OLD   "backup_label.old"

Definition at line 303 of file xlog.h.

◆ CHECKPOINT_CAUSE_TIME

#define CHECKPOINT_CAUSE_TIME   0x0100 /* Elapsed time */

Definition at line 149 of file xlog.h.

◆ CHECKPOINT_CAUSE_XLOG

#define CHECKPOINT_CAUSE_XLOG   0x0080 /* XLOG consumption */

Definition at line 148 of file xlog.h.

◆ CHECKPOINT_END_OF_RECOVERY

#define CHECKPOINT_END_OF_RECOVERY
Value:
0x0002 /* Like shutdown checkpoint, but
* issued at end of WAL recovery */

Definition at line 140 of file xlog.h.

◆ CHECKPOINT_FLUSH_ALL

#define CHECKPOINT_FLUSH_ALL
Value:
0x0010 /* Flush all pages, including those
* belonging to unlogged tables */

Definition at line 143 of file xlog.h.

◆ CHECKPOINT_FORCE

#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */

Definition at line 142 of file xlog.h.

◆ CHECKPOINT_IMMEDIATE

#define CHECKPOINT_IMMEDIATE   0x0004 /* Do it without delays */

Definition at line 141 of file xlog.h.

◆ CHECKPOINT_IS_SHUTDOWN

#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */

Definition at line 139 of file xlog.h.

◆ CHECKPOINT_REQUESTED

#define CHECKPOINT_REQUESTED   0x0040 /* Checkpoint request has been made */

Definition at line 146 of file xlog.h.

◆ CHECKPOINT_WAIT

#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */

Definition at line 145 of file xlog.h.

◆ PROMOTE_SIGNAL_FILE

#define PROMOTE_SIGNAL_FILE   "promote"

Definition at line 309 of file xlog.h.

◆ RECOVERY_SIGNAL_FILE

#define RECOVERY_SIGNAL_FILE   "recovery.signal"

Definition at line 300 of file xlog.h.

◆ STANDBY_SIGNAL_FILE

#define STANDBY_SIGNAL_FILE   "standby.signal"

Definition at line 301 of file xlog.h.

◆ TABLESPACE_MAP

#define TABLESPACE_MAP   "tablespace_map"

Definition at line 305 of file xlog.h.

◆ TABLESPACE_MAP_OLD

#define TABLESPACE_MAP_OLD   "tablespace_map.old"

Definition at line 306 of file xlog.h.

◆ XLOG_INCLUDE_ORIGIN

#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */

Definition at line 154 of file xlog.h.

◆ XLOG_MARK_UNIMPORTANT

#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */

Definition at line 155 of file xlog.h.

◆ XLogArchivingActive

Definition at line 99 of file xlog.h.

◆ XLogArchivingAlways

Definition at line 102 of file xlog.h.

◆ XLogHintBitIsNeeded

#define XLogHintBitIsNeeded ( )    (DataChecksumsEnabled() || wal_log_hints)

Definition at line 120 of file xlog.h.

◆ XLogIsNeeded

#define XLogIsNeeded ( )    (wal_level >= WAL_LEVEL_REPLICA)

Definition at line 109 of file xlog.h.

◆ XLogLogicalInfoActive

#define XLogLogicalInfoActive ( )    (wal_level >= WAL_LEVEL_LOGICAL)

Definition at line 126 of file xlog.h.

◆ XLogStandbyInfoActive

#define XLogStandbyInfoActive ( )    (wal_level >= WAL_LEVEL_REPLICA)

Definition at line 123 of file xlog.h.

Typedef Documentation

◆ ArchiveMode

typedef enum ArchiveMode ArchiveMode

◆ CheckpointStatsData

◆ RecoveryState

◆ SessionBackupState

◆ WALAvailability

◆ WalCompression

◆ WalLevel

typedef enum WalLevel WalLevel

Enumeration Type Documentation

◆ ArchiveMode

Enumerator
ARCHIVE_MODE_OFF 
ARCHIVE_MODE_ON 
ARCHIVE_MODE_ALWAYS 

Definition at line 63 of file xlog.h.

64 {
65  ARCHIVE_MODE_OFF = 0, /* disabled */
66  ARCHIVE_MODE_ON, /* enabled while server is running normally */
67  ARCHIVE_MODE_ALWAYS, /* enabled always (even during recovery) */
68 } ArchiveMode;
ArchiveMode
Definition: xlog.h:64
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:67
@ ARCHIVE_MODE_OFF
Definition: xlog.h:65
@ ARCHIVE_MODE_ON
Definition: xlog.h:66

◆ RecoveryState

Enumerator
RECOVERY_STATE_CRASH 
RECOVERY_STATE_ARCHIVE 
RECOVERY_STATE_DONE 

Definition at line 89 of file xlog.h.

90 {
91  RECOVERY_STATE_CRASH = 0, /* crash recovery */
92  RECOVERY_STATE_ARCHIVE, /* archive recovery */
93  RECOVERY_STATE_DONE, /* currently in production */
RecoveryState
Definition: xlog.h:90
@ RECOVERY_STATE_CRASH
Definition: xlog.h:91
@ RECOVERY_STATE_DONE
Definition: xlog.h:93
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:92

◆ SessionBackupState

Enumerator
SESSION_BACKUP_NONE 
SESSION_BACKUP_RUNNING 

Definition at line 285 of file xlog.h.

288 {
@ SESSION_BACKUP_NONE
Definition: xlog.h:287

◆ WALAvailability

Enumerator
WALAVAIL_INVALID_LSN 
WALAVAIL_RESERVED 
WALAVAIL_EXTENDED 
WALAVAIL_UNRESERVED 
WALAVAIL_REMOVED 

Definition at line 187 of file xlog.h.

190 {
191  WALAVAIL_INVALID_LSN, /* parameter error */
192  WALAVAIL_RESERVED, /* WAL segment is within max_wal_size */
193  WALAVAIL_EXTENDED, /* WAL segment is reserved by a slot or
194  * wal_keep_size */
195  WALAVAIL_UNRESERVED, /* no longer reserved, but not removed yet */
@ WALAVAIL_RESERVED
Definition: xlog.h:190
@ WALAVAIL_UNRESERVED
Definition: xlog.h:193
@ WALAVAIL_EXTENDED
Definition: xlog.h:191
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:189

◆ WalCompression

Enumerator
WAL_COMPRESSION_NONE 
WAL_COMPRESSION_PGLZ 
WAL_COMPRESSION_LZ4 
WAL_COMPRESSION_ZSTD 

Definition at line 80 of file xlog.h.

81 {
WalCompression
Definition: xlog.h:81
@ WAL_COMPRESSION_NONE
Definition: xlog.h:82
@ WAL_COMPRESSION_LZ4
Definition: xlog.h:84
@ WAL_COMPRESSION_PGLZ
Definition: xlog.h:83
@ WAL_COMPRESSION_ZSTD
Definition: xlog.h:85

◆ WalLevel

enum WalLevel
Enumerator
WAL_LEVEL_MINIMAL 
WAL_LEVEL_REPLICA 
WAL_LEVEL_LOGICAL 

Definition at line 72 of file xlog.h.

73 {
77 } WalLevel;
WalLevel
Definition: xlog.h:73
@ WAL_LEVEL_REPLICA
Definition: xlog.h:75
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:74

◆ WalSyncMethod

Enumerator
WAL_SYNC_METHOD_FSYNC 
WAL_SYNC_METHOD_FDATASYNC 
WAL_SYNC_METHOD_OPEN 
WAL_SYNC_METHOD_FSYNC_WRITETHROUGH 
WAL_SYNC_METHOD_OPEN_DSYNC 

Definition at line 22 of file xlog.h.

23 {
26  WAL_SYNC_METHOD_OPEN, /* for O_SYNC */
28  WAL_SYNC_METHOD_OPEN_DSYNC /* for O_DSYNC */
29 };
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

Function Documentation

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5033 of file xlog.c.

5034 {
5035  CheckPoint checkPoint;
5036  char *buffer;
5037  XLogPageHeader page;
5038  XLogLongPageHeader longpage;
5039  XLogRecord *record;
5040  char *recptr;
5041  uint64 sysidentifier;
5042  struct timeval tv;
5043  pg_crc32c crc;
5044 
5045  /* allow ordinary WAL segment creation, like StartupXLOG() would */
5047 
5048  /*
5049  * Select a hopefully-unique system identifier code for this installation.
5050  * We use the result of gettimeofday(), including the fractional seconds
5051  * field, as being about as unique as we can easily get. (Think not to
5052  * use random(), since it hasn't been seeded and there's no portable way
5053  * to seed it other than the system clock value...) The upper half of the
5054  * uint64 value is just the tv_sec part, while the lower half contains the
5055  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5056  * PID for a little extra uniqueness. A person knowing this encoding can
5057  * determine the initialization time of the installation, which could
5058  * perhaps be useful sometimes.
5059  */
5060  gettimeofday(&tv, NULL);
5061  sysidentifier = ((uint64) tv.tv_sec) << 32;
5062  sysidentifier |= ((uint64) tv.tv_usec) << 12;
5063  sysidentifier |= getpid() & 0xFFF;
5064 
5065  /* page buffer must be aligned suitably for O_DIRECT */
5066  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5067  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5068  memset(page, 0, XLOG_BLCKSZ);
5069 
5070  /*
5071  * Set up information for the initial checkpoint record
5072  *
5073  * The initial checkpoint record is written to the beginning of the WAL
5074  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5075  * used, so that we can use 0/0 to mean "before any valid WAL segment".
5076  */
5077  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
5078  checkPoint.ThisTimeLineID = BootstrapTimeLineID;
5079  checkPoint.PrevTimeLineID = BootstrapTimeLineID;
5080  checkPoint.fullPageWrites = fullPageWrites;
5081  checkPoint.wal_level = wal_level;
5082  checkPoint.nextXid =
5084  checkPoint.nextOid = FirstGenbkiObjectId;
5085  checkPoint.nextMulti = FirstMultiXactId;
5086  checkPoint.nextMultiOffset = 0;
5087  checkPoint.oldestXid = FirstNormalTransactionId;
5088  checkPoint.oldestXidDB = Template1DbOid;
5089  checkPoint.oldestMulti = FirstMultiXactId;
5090  checkPoint.oldestMultiDB = Template1DbOid;
5093  checkPoint.time = (pg_time_t) time(NULL);
5095 
5096  TransamVariables->nextXid = checkPoint.nextXid;
5097  TransamVariables->nextOid = checkPoint.nextOid;
5099  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5100  AdvanceOldestClogXid(checkPoint.oldestXid);
5101  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5102  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5104 
5105  /* Set up the XLOG page header */
5106  page->xlp_magic = XLOG_PAGE_MAGIC;
5107  page->xlp_info = XLP_LONG_HEADER;
5108  page->xlp_tli = BootstrapTimeLineID;
5110  longpage = (XLogLongPageHeader) page;
5111  longpage->xlp_sysid = sysidentifier;
5112  longpage->xlp_seg_size = wal_segment_size;
5113  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5114 
5115  /* Insert the initial checkpoint record */
5116  recptr = ((char *) page + SizeOfXLogLongPHD);
5117  record = (XLogRecord *) recptr;
5118  record->xl_prev = 0;
5119  record->xl_xid = InvalidTransactionId;
5120  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5122  record->xl_rmid = RM_XLOG_ID;
5123  recptr += SizeOfXLogRecord;
5124  /* fill the XLogRecordDataHeaderShort struct */
5125  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5126  *(recptr++) = sizeof(checkPoint);
5127  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5128  recptr += sizeof(checkPoint);
5129  Assert(recptr - (char *) record == record->xl_tot_len);
5130 
5131  INIT_CRC32C(crc);
5132  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5133  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5134  FIN_CRC32C(crc);
5135  record->xl_crc = crc;
5136 
5137  /* Create first XLOG segment file */
5140 
5141  /*
5142  * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5143  * close the file again in a moment.
5144  */
5145 
5146  /* Write the first page with the initial record */
5147  errno = 0;
5148  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5149  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5150  {
5151  /* if write didn't set errno, assume problem is no disk space */
5152  if (errno == 0)
5153  errno = ENOSPC;
5154  ereport(PANIC,
5156  errmsg("could not write bootstrap write-ahead log file: %m")));
5157  }
5159 
5160  pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5161  if (pg_fsync(openLogFile) != 0)
5162  ereport(PANIC,
5164  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5166 
5167  if (close(openLogFile) != 0)
5168  ereport(PANIC,
5170  errmsg("could not close bootstrap write-ahead log file: %m")));
5171 
5172  openLogFile = -1;
5173 
5174  /* Now create pg_control */
5175  InitControlFile(sysidentifier, data_checksum_version);
5176  ControlFile->time = checkPoint.time;
5177  ControlFile->checkPoint = checkPoint.redo;
5178  ControlFile->checkPointCopy = checkPoint;
5179 
5180  /* some additional ControlFile fields are set in WriteControlFile() */
5181  WriteControlFile();
5182 
5183  /* Bootstrap the commit log, too */
5184  BootStrapCLOG();
5188 
5189  pfree(buffer);
5190 
5191  /*
5192  * Force control file to be read - in contrast to normal processing we'd
5193  * otherwise never run the checks and GUC related initializations therein.
5194  */
5195  ReadControlFile();
5196 }
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:809
#define Assert(condition)
Definition: c.h:863
void BootStrapCLOG(void)
Definition: clog.c:833
void BootStrapCommitTs(void)
Definition: commit_ts.c:596
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:909
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
int pg_fsync(int fd)
Definition: fd.c:385
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2328
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2362
void BootStrapMultiXact(void)
Definition: multixact.c:2034
#define FirstMultiXactId
Definition: multixact.h:25
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:68
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:51
MultiXactId oldestMulti
Definition: pg_control.h:50
MultiXactOffset nextMultiOffset
Definition: pg_control.h:47
TransactionId newestCommitTsXid
Definition: pg_control.h:55
TransactionId oldestXid
Definition: pg_control.h:48
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:45
TransactionId oldestActiveXid
Definition: pg_control.h:64
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:46
FullTransactionId nextXid
Definition: pg_control.h:44
TransactionId oldestCommitTsXid
Definition: pg_control.h:53
pg_time_t time
Definition: pg_control.h:52
int wal_level
Definition: pg_control.h:43
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:49
CheckPoint checkPointCopy
Definition: pg_control.h:135
pg_time_t time
Definition: pg_control.h:132
XLogRecPtr checkPoint
Definition: pg_control.h:133
FullTransactionId nextXid
Definition: transam.h:220
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:270
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3380
bool fullPageWrites
Definition: xlog.c:122
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition: xlog.c:4204
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9484
static int openLogFile
Definition: xlog.c:635
int wal_level
Definition: xlog.c:131
static void WriteControlFile(void)
Definition: xlog.c:4239
int wal_segment_size
Definition: xlog.c:143
static TimeLineID openLogTLI
Definition: xlog.c:637
static ControlFileData * ControlFile
Definition: xlog.c:574
#define BootstrapTimeLineID
Definition: xlog.c:111
static void ReadControlFile(void)
Definition: xlog.c:4321
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_level, CheckPoint::wal_level, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3727 of file xlog.c.

3728 {
3729  int save_errno = errno;
3730  XLogSegNo lastRemovedSegNo;
3731 
3733  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3735 
3736  if (segno <= lastRemovedSegNo)
3737  {
3738  char filename[MAXFNAMELEN];
3739 
3740  XLogFileName(filename, tli, segno, wal_segment_size);
3741  errno = save_errno;
3742  ereport(ERROR,
3744  errmsg("requested WAL segment %s has already been removed",
3745  filename)));
3746  }
3747  errno = save_errno;
3748 }
#define ERROR
Definition: elog.h:39
static char * filename
Definition: pg_dumpall.c:119
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
slock_t info_lck
Definition: xlog.c:553
XLogSegNo lastRemovedSegNo
Definition: xlog.c:461
static XLogCtlData * XLogCtl
Definition: xlog.c:566
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
uint64 XLogSegNo
Definition: xlogdefs.h:48

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 6889 of file xlog.c.

6890 {
6891  bool shutdown;
6892  CheckPoint checkPoint;
6893  XLogRecPtr recptr;
6894  XLogSegNo _logSegNo;
6896  uint32 freespace;
6897  XLogRecPtr PriorRedoPtr;
6898  XLogRecPtr last_important_lsn;
6899  VirtualTransactionId *vxids;
6900  int nvxids;
6901  int oldXLogAllowed = 0;
6902 
6903  /*
6904  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6905  * issued at a different time.
6906  */
6908  shutdown = true;
6909  else
6910  shutdown = false;
6911 
6912  /* sanity check */
6913  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6914  elog(ERROR, "can't create a checkpoint during recovery");
6915 
6916  /*
6917  * Prepare to accumulate statistics.
6918  *
6919  * Note: because it is possible for log_checkpoints to change while a
6920  * checkpoint proceeds, we always accumulate stats, even if
6921  * log_checkpoints is currently off.
6922  */
6923  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
6925 
6926  /*
6927  * Let smgr prepare for checkpoint; this has to happen outside the
6928  * critical section and before we determine the REDO pointer. Note that
6929  * smgr must not do anything that'd have to be undone if we decide no
6930  * checkpoint is needed.
6931  */
6933 
6934  /*
6935  * Use a critical section to force system panic if we have trouble.
6936  */
6938 
6939  if (shutdown)
6940  {
6941  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6944  LWLockRelease(ControlFileLock);
6945  }
6946 
6947  /* Begin filling in the checkpoint WAL record */
6948  MemSet(&checkPoint, 0, sizeof(checkPoint));
6949  checkPoint.time = (pg_time_t) time(NULL);
6950 
6951  /*
6952  * For Hot Standby, derive the oldestActiveXid before we fix the redo
6953  * pointer. This allows us to begin accumulating changes to assemble our
6954  * starting snapshot of locks and transactions.
6955  */
6956  if (!shutdown && XLogStandbyInfoActive())
6958  else
6960 
6961  /*
6962  * Get location of last important record before acquiring insert locks (as
6963  * GetLastImportantRecPtr() also locks WAL locks).
6964  */
6965  last_important_lsn = GetLastImportantRecPtr();
6966 
6967  /*
6968  * If this isn't a shutdown or forced checkpoint, and if there has been no
6969  * WAL activity requiring a checkpoint, skip it. The idea here is to
6970  * avoid inserting duplicate checkpoints when the system is idle.
6971  */
6973  CHECKPOINT_FORCE)) == 0)
6974  {
6975  if (last_important_lsn == ControlFile->checkPoint)
6976  {
6977  END_CRIT_SECTION();
6978  ereport(DEBUG1,
6979  (errmsg_internal("checkpoint skipped because system is idle")));
6980  return false;
6981  }
6982  }
6983 
6984  /*
6985  * An end-of-recovery checkpoint is created before anyone is allowed to
6986  * write WAL. To allow us to write the checkpoint record, temporarily
6987  * enable XLogInsertAllowed.
6988  */
6989  if (flags & CHECKPOINT_END_OF_RECOVERY)
6990  oldXLogAllowed = LocalSetXLogInsertAllowed();
6991 
6992  checkPoint.ThisTimeLineID = XLogCtl->InsertTimeLineID;
6993  if (flags & CHECKPOINT_END_OF_RECOVERY)
6994  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
6995  else
6996  checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
6997 
6998  /*
6999  * We must block concurrent insertions while examining insert state.
7000  */
7002 
7003  checkPoint.fullPageWrites = Insert->fullPageWrites;
7004  checkPoint.wal_level = wal_level;
7005 
7006  if (shutdown)
7007  {
7008  XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
7009 
7010  /*
7011  * Compute new REDO record ptr = location of next XLOG record.
7012  *
7013  * Since this is a shutdown checkpoint, there can't be any concurrent
7014  * WAL insertion.
7015  */
7016  freespace = INSERT_FREESPACE(curInsert);
7017  if (freespace == 0)
7018  {
7019  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
7020  curInsert += SizeOfXLogLongPHD;
7021  else
7022  curInsert += SizeOfXLogShortPHD;
7023  }
7024  checkPoint.redo = curInsert;
7025 
7026  /*
7027  * Here we update the shared RedoRecPtr for future XLogInsert calls;
7028  * this must be done while holding all the insertion locks.
7029  *
7030  * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7031  * left pointing past where it really needs to point. This is okay;
7032  * the only consequence is that XLogInsert might back up whole buffers
7033  * that it didn't really need to. We can't postpone advancing
7034  * RedoRecPtr because XLogInserts that happen while we are dumping
7035  * buffers must assume that their buffer changes are not included in
7036  * the checkpoint.
7037  */
7038  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7039  }
7040 
7041  /*
7042  * Now we can release the WAL insertion locks, allowing other xacts to
7043  * proceed while we are flushing disk buffers.
7044  */
7046 
7047  /*
7048  * If this is an online checkpoint, we have not yet determined the redo
7049  * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7050  * record; the LSN at which it starts becomes the new redo pointer. We
7051  * don't do this for a shutdown checkpoint, because in that case no WAL
7052  * can be written between the redo point and the insertion of the
7053  * checkpoint record itself, so the checkpoint record itself serves to
7054  * mark the redo point.
7055  */
7056  if (!shutdown)
7057  {
7058  /* Include WAL level in record for WAL summarizer's benefit. */
7059  XLogBeginInsert();
7060  XLogRegisterData((char *) &wal_level, sizeof(wal_level));
7061  (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
7062 
7063  /*
7064  * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7065  * shared memory and RedoRecPtr in backend-local memory, but we need
7066  * to copy that into the record that will be inserted when the
7067  * checkpoint is complete.
7068  */
7069  checkPoint.redo = RedoRecPtr;
7070  }
7071 
7072  /* Update the info_lck-protected copy of RedoRecPtr as well */
7074  XLogCtl->RedoRecPtr = checkPoint.redo;
7076 
7077  /*
7078  * If enabled, log checkpoint start. We postpone this until now so as not
7079  * to log anything if we decided to skip the checkpoint.
7080  */
7081  if (log_checkpoints)
7082  LogCheckpointStart(flags, false);
7083 
7084  /* Update the process title */
7085  update_checkpoint_display(flags, false, false);
7086 
7087  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7088 
7089  /*
7090  * Get the other info we need for the checkpoint record.
7091  *
7092  * We don't need to save oldestClogXid in the checkpoint, it only matters
7093  * for the short period in which clog is being truncated, and if we crash
7094  * during that we'll redo the clog truncation and fix up oldestClogXid
7095  * there.
7096  */
7097  LWLockAcquire(XidGenLock, LW_SHARED);
7098  checkPoint.nextXid = TransamVariables->nextXid;
7099  checkPoint.oldestXid = TransamVariables->oldestXid;
7101  LWLockRelease(XidGenLock);
7102 
7103  LWLockAcquire(CommitTsLock, LW_SHARED);
7106  LWLockRelease(CommitTsLock);
7107 
7108  LWLockAcquire(OidGenLock, LW_SHARED);
7109  checkPoint.nextOid = TransamVariables->nextOid;
7110  if (!shutdown)
7111  checkPoint.nextOid += TransamVariables->oidCount;
7112  LWLockRelease(OidGenLock);
7113 
7114  MultiXactGetCheckptMulti(shutdown,
7115  &checkPoint.nextMulti,
7116  &checkPoint.nextMultiOffset,
7117  &checkPoint.oldestMulti,
7118  &checkPoint.oldestMultiDB);
7119 
7120  /*
7121  * Having constructed the checkpoint record, ensure all shmem disk buffers
7122  * and commit-log buffers are flushed to disk.
7123  *
7124  * This I/O could fail for various reasons. If so, we will fail to
7125  * complete the checkpoint, but there is no reason to force a system
7126  * panic. Accordingly, exit critical section while doing it.
7127  */
7128  END_CRIT_SECTION();
7129 
7130  /*
7131  * In some cases there are groups of actions that must all occur on one
7132  * side or the other of a checkpoint record. Before flushing the
7133  * checkpoint record we must explicitly wait for any backend currently
7134  * performing those groups of actions.
7135  *
7136  * One example is end of transaction, so we must wait for any transactions
7137  * that are currently in commit critical sections. If an xact inserted
7138  * its commit record into XLOG just before the REDO point, then a crash
7139  * restart from the REDO point would not replay that record, which means
7140  * that our flushing had better include the xact's update of pg_xact. So
7141  * we wait till he's out of his commit critical section before proceeding.
7142  * See notes in RecordTransactionCommit().
7143  *
7144  * Because we've already released the insertion locks, this test is a bit
7145  * fuzzy: it is possible that we will wait for xacts we didn't really need
7146  * to wait for. But the delay should be short and it seems better to make
7147  * checkpoint take a bit longer than to hold off insertions longer than
7148  * necessary. (In fact, the whole reason we have this issue is that xact.c
7149  * does commit record XLOG insertion and clog update as two separate steps
7150  * protected by different locks, but again that seems best on grounds of
7151  * minimizing lock contention.)
7152  *
7153  * A transaction that has not yet set delayChkptFlags when we look cannot
7154  * be at risk, since it has not inserted its commit record yet; and one
7155  * that's already cleared it is not at risk either, since it's done fixing
7156  * clog and we will correctly flush the update below. So we cannot miss
7157  * any xacts we need to wait for.
7158  */
7160  if (nvxids > 0)
7161  {
7162  do
7163  {
7164  /*
7165  * Keep absorbing fsync requests while we wait. There could even
7166  * be a deadlock if we don't, if the process that prevents the
7167  * checkpoint is trying to add a request to the queue.
7168  */
7170 
7171  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7172  pg_usleep(10000L); /* wait for 10 msec */
7174  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7176  }
7177  pfree(vxids);
7178 
7179  CheckPointGuts(checkPoint.redo, flags);
7180 
7182  if (nvxids > 0)
7183  {
7184  do
7185  {
7187 
7188  pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7189  pg_usleep(10000L); /* wait for 10 msec */
7191  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7193  }
7194  pfree(vxids);
7195 
7196  /*
7197  * Take a snapshot of running transactions and write this to WAL. This
7198  * allows us to reconstruct the state of running transactions during
7199  * archive recovery, if required. Skip, if this info disabled.
7200  *
7201  * If we are shutting down, or Startup process is completing crash
7202  * recovery we don't need to write running xact data.
7203  */
7204  if (!shutdown && XLogStandbyInfoActive())
7206 
7208 
7209  /*
7210  * Now insert the checkpoint record into XLOG.
7211  */
7212  XLogBeginInsert();
7213  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
7214  recptr = XLogInsert(RM_XLOG_ID,
7215  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7217 
7218  XLogFlush(recptr);
7219 
7220  /*
7221  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7222  * overwritten at next startup. No-one should even try, this just allows
7223  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7224  * to just temporarily disable writing until the system has exited
7225  * recovery.
7226  */
7227  if (shutdown)
7228  {
7229  if (flags & CHECKPOINT_END_OF_RECOVERY)
7230  LocalXLogInsertAllowed = oldXLogAllowed;
7231  else
7232  LocalXLogInsertAllowed = 0; /* never again write WAL */
7233  }
7234 
7235  /*
7236  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7237  * = end of actual checkpoint record.
7238  */
7239  if (shutdown && checkPoint.redo != ProcLastRecPtr)
7240  ereport(PANIC,
7241  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7242 
7243  /*
7244  * Remember the prior checkpoint's redo ptr for
7245  * UpdateCheckPointDistanceEstimate()
7246  */
7247  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7248 
7249  /*
7250  * Update the control file.
7251  */
7252  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7253  if (shutdown)
7256  ControlFile->checkPointCopy = checkPoint;
7257  /* crash recovery should always recover to the end of WAL */
7260 
7261  /*
7262  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7263  * unused on non-shutdown checkpoints, but seems useful to store it always
7264  * for debugging purposes.
7265  */
7267 
7269  LWLockRelease(ControlFileLock);
7270 
7271  /* Update shared-memory copy of checkpoint XID/epoch */
7273  XLogCtl->ckptFullXid = checkPoint.nextXid;
7275 
7276  /*
7277  * We are now done with critical updates; no need for system panic if we
7278  * have trouble while fooling with old log segments.
7279  */
7280  END_CRIT_SECTION();
7281 
7282  /*
7283  * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7284  * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7285  * where (a) we're not inside of a critical section and (b) we can be
7286  * certain that the relevant record has been flushed to disk, which must
7287  * happen before it can be summarized.
7288  *
7289  * If this is a shutdown checkpoint, then this happens reasonably
7290  * promptly: we've only just inserted and flushed the
7291  * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7292  * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7293  * record was written before we began flushing data to disk, and that
7294  * could be many minutes ago at this point. However, we don't XLogFlush()
7295  * after inserting that record, so we're not guaranteed that it's on disk
7296  * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7297  * record.
7298  */
7300 
7301  /*
7302  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7303  */
7305 
7306  /*
7307  * Update the average distance between checkpoints if the prior checkpoint
7308  * exists.
7309  */
7310  if (PriorRedoPtr != InvalidXLogRecPtr)
7312 
7313  /*
7314  * Delete old log files, those no longer needed for last checkpoint to
7315  * prevent the disk holding the xlog from growing full.
7316  */
7318  KeepLogSeg(recptr, &_logSegNo);
7320  _logSegNo, InvalidOid,
7322  {
7323  /*
7324  * Some slots have been invalidated; recalculate the old-segment
7325  * horizon, starting again from RedoRecPtr.
7326  */
7328  KeepLogSeg(recptr, &_logSegNo);
7329  }
7330  _logSegNo--;
7331  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7332  checkPoint.ThisTimeLineID);
7333 
7334  /*
7335  * Make more log segments if needed. (Do this after recycling old log
7336  * segments, since that may supply some of the needed files.)
7337  */
7338  if (!shutdown)
7339  PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7340 
7341  /*
7342  * Truncate pg_subtrans if possible. We can throw away all data before
7343  * the oldest XMIN of any running transaction. No future transaction will
7344  * attempt to reference any pg_subtrans entry older than that (see Asserts
7345  * in subtrans.c). During recovery, though, we mustn't do this because
7346  * StartupSUBTRANS hasn't been called yet.
7347  */
7348  if (!RecoveryInProgress())
7350 
7351  /* Real work is done; log and update stats. */
7352  LogCheckpointEnd(false);
7353 
7354  /* Reset the process title */
7355  update_checkpoint_display(flags, false, true);
7356 
7357  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7358  NBuffers,
7362 
7363  return true;
7364 }
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:476
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
unsigned int uint32
Definition: c.h:518
#define MemSet(start, val, len)
Definition: c.h:1025
void AbsorbSyncRequests(void)
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:225
static void Insert(File file)
Definition: fd.c:1312
int NBuffers
Definition: globals.c:141
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2282
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:82
@ DB_SHUTDOWNING
Definition: pg_control.h:94
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:69
#define InvalidOid
Definition: postgres_ext.h:36
#define DELAY_CHKPT_START
Definition: proc.h:119
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:120
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3047
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2884
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:2034
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3093
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(ReplicationSlotInvalidationCause cause, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:1811
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:54
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1281
TimestampTz ckpt_start_t
Definition: xlog.h:161
int ckpt_segs_removed
Definition: xlog.h:171
int ckpt_segs_added
Definition: xlog.h:170
int ckpt_bufs_written
Definition: xlog.h:167
int ckpt_segs_recycled
Definition: xlog.h:172
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
XLogRecPtr unloggedLSN
Definition: pg_control.h:137
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
FullTransactionId ckptFullXid
Definition: xlog.c:457
TimeLineID InsertTimeLineID
Definition: xlog.c:509
XLogRecPtr RedoRecPtr
Definition: xlog.c:456
XLogCtlInsert Insert
Definition: xlog.c:452
TimeLineID PrevTimeLineID
Definition: xlog.c:510
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:464
XLogRecPtr RedoRecPtr
Definition: xlog.c:430
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:411
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:253
bool RecoveryInProgress(void)
Definition: xlog.c:6334
static void WALInsertLockRelease(void)
Definition: xlog.c:1445
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1858
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1416
static void UpdateControlFile(void)
Definition: xlog.c:4559
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3865
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6649
static XLogRecPtr RedoRecPtr
Definition: xlog.c:273
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6681
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3690
bool log_checkpoints
Definition: xlog.c:129
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:7953
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6422
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6556
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6786
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:580
static int LocalXLogInsertAllowed
Definition: xlog.c:236
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2802
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7510
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6824
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FORCE
Definition: xlog.h:142
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptFullXid, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7591 of file xlog.c.

7592 {
7593  XLogRecPtr lastCheckPointRecPtr;
7594  XLogRecPtr lastCheckPointEndPtr;
7595  CheckPoint lastCheckPoint;
7596  XLogRecPtr PriorRedoPtr;
7597  XLogRecPtr receivePtr;
7598  XLogRecPtr replayPtr;
7599  TimeLineID replayTLI;
7600  XLogRecPtr endptr;
7601  XLogSegNo _logSegNo;
7602  TimestampTz xtime;
7603 
7604  /* Concurrent checkpoint/restartpoint cannot happen */
7606 
7607  /* Get a local copy of the last safe checkpoint record. */
7609  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7610  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7611  lastCheckPoint = XLogCtl->lastCheckPoint;
7613 
7614  /*
7615  * Check that we're still in recovery mode. It's ok if we exit recovery
7616  * mode after this check, the restart point is valid anyway.
7617  */
7618  if (!RecoveryInProgress())
7619  {
7620  ereport(DEBUG2,
7621  (errmsg_internal("skipping restartpoint, recovery has already ended")));
7622  return false;
7623  }
7624 
7625  /*
7626  * If the last checkpoint record we've replayed is already our last
7627  * restartpoint, we can't perform a new restart point. We still update
7628  * minRecoveryPoint in that case, so that if this is a shutdown restart
7629  * point, we won't start up earlier than before. That's not strictly
7630  * necessary, but when hot standby is enabled, it would be rather weird if
7631  * the database opened up for read-only connections at a point-in-time
7632  * before the last shutdown. Such time travel is still possible in case of
7633  * immediate shutdown, though.
7634  *
7635  * We don't explicitly advance minRecoveryPoint when we do create a
7636  * restartpoint. It's assumed that flushing the buffers will do that as a
7637  * side-effect.
7638  */
7639  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
7640  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7641  {
7642  ereport(DEBUG2,
7643  (errmsg_internal("skipping restartpoint, already performed at %X/%X",
7644  LSN_FORMAT_ARGS(lastCheckPoint.redo))));
7645 
7647  if (flags & CHECKPOINT_IS_SHUTDOWN)
7648  {
7649  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7652  LWLockRelease(ControlFileLock);
7653  }
7654  return false;
7655  }
7656 
7657  /*
7658  * Update the shared RedoRecPtr so that the startup process can calculate
7659  * the number of segments replayed since last restartpoint, and request a
7660  * restartpoint if it exceeds CheckPointSegments.
7661  *
7662  * Like in CreateCheckPoint(), hold off insertions to update it, although
7663  * during recovery this is just pro forma, because no WAL insertions are
7664  * happening.
7665  */
7667  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7669 
7670  /* Also update the info_lck-protected copy */
7672  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7674 
7675  /*
7676  * Prepare to accumulate statistics.
7677  *
7678  * Note: because it is possible for log_checkpoints to change while a
7679  * checkpoint proceeds, we always accumulate stats, even if
7680  * log_checkpoints is currently off.
7681  */
7682  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
7684 
7685  if (log_checkpoints)
7686  LogCheckpointStart(flags, true);
7687 
7688  /* Update the process title */
7689  update_checkpoint_display(flags, true, false);
7690 
7691  CheckPointGuts(lastCheckPoint.redo, flags);
7692 
7693  /*
7694  * This location needs to be after CheckPointGuts() to ensure that some
7695  * work has already happened during this checkpoint.
7696  */
7697  INJECTION_POINT("create-restart-point");
7698 
7699  /*
7700  * Remember the prior checkpoint's redo ptr for
7701  * UpdateCheckPointDistanceEstimate()
7702  */
7703  PriorRedoPtr = ControlFile->checkPointCopy.redo;
7704 
7705  /*
7706  * Update pg_control, using current time. Check that it still shows an
7707  * older checkpoint, else do nothing; this is a quick hack to make sure
7708  * nothing really bad happens if somehow we get here after the
7709  * end-of-recovery checkpoint.
7710  */
7711  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7712  if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7713  {
7714  /*
7715  * Update the checkpoint information. We do this even if the cluster
7716  * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7717  * segments recycled below.
7718  */
7719  ControlFile->checkPoint = lastCheckPointRecPtr;
7720  ControlFile->checkPointCopy = lastCheckPoint;
7721 
7722  /*
7723  * Ensure minRecoveryPoint is past the checkpoint record and update it
7724  * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7725  * this will have happened already while writing out dirty buffers,
7726  * but not necessarily - e.g. because no buffers were dirtied. We do
7727  * this because a backup performed in recovery uses minRecoveryPoint
7728  * to determine which WAL files must be included in the backup, and
7729  * the file (or files) containing the checkpoint record must be
7730  * included, at a minimum. Note that for an ordinary restart of
7731  * recovery there's no value in having the minimum recovery point any
7732  * earlier than this anyway, because redo will begin just after the
7733  * checkpoint record.
7734  */
7736  {
7737  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7738  {
7739  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7741 
7742  /* update local copy */
7745  }
7746  if (flags & CHECKPOINT_IS_SHUTDOWN)
7748  }
7750  }
7751  LWLockRelease(ControlFileLock);
7752 
7753  /*
7754  * Update the average distance between checkpoints/restartpoints if the
7755  * prior checkpoint exists.
7756  */
7757  if (PriorRedoPtr != InvalidXLogRecPtr)
7759 
7760  /*
7761  * Delete old log files, those no longer needed for last restartpoint to
7762  * prevent the disk holding the xlog from growing full.
7763  */
7765 
7766  /*
7767  * Retreat _logSegNo using the current end of xlog replayed or received,
7768  * whichever is later.
7769  */
7770  receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7771  replayPtr = GetXLogReplayRecPtr(&replayTLI);
7772  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7773  KeepLogSeg(endptr, &_logSegNo);
7775  _logSegNo, InvalidOid,
7777  {
7778  /*
7779  * Some slots have been invalidated; recalculate the old-segment
7780  * horizon, starting again from RedoRecPtr.
7781  */
7783  KeepLogSeg(endptr, &_logSegNo);
7784  }
7785  _logSegNo--;
7786 
7787  /*
7788  * Try to recycle segments on a useful timeline. If we've been promoted
7789  * since the beginning of this restartpoint, use the new timeline chosen
7790  * at end of recovery. If we're still in recovery, use the timeline we're
7791  * currently replaying.
7792  *
7793  * There is no guarantee that the WAL segments will be useful on the
7794  * current timeline; if recovery proceeds to a new timeline right after
7795  * this, the pre-allocated WAL segments on this timeline will not be used,
7796  * and will go wasted until recycled on the next restartpoint. We'll live
7797  * with that.
7798  */
7799  if (!RecoveryInProgress())
7800  replayTLI = XLogCtl->InsertTimeLineID;
7801 
7802  RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7803 
7804  /*
7805  * Make more log segments if needed. (Do this after recycling old log
7806  * segments, since that may supply some of the needed files.)
7807  */
7808  PreallocXlogFiles(endptr, replayTLI);
7809 
7810  /*
7811  * Truncate pg_subtrans if possible. We can throw away all data before
7812  * the oldest XMIN of any running transaction. No future transaction will
7813  * attempt to reference any pg_subtrans entry older than that (see Asserts
7814  * in subtrans.c). When hot standby is disabled, though, we mustn't do
7815  * this because StartupSUBTRANS hasn't been called yet.
7816  */
7817  if (EnableHotStandby)
7819 
7820  /* Real work is done; log and update stats. */
7821  LogCheckpointEnd(true);
7822 
7823  /* Reset the process title */
7824  update_checkpoint_display(flags, true, true);
7825 
7826  xtime = GetLatestXTime();
7828  (errmsg("recovery restart point at %X/%X",
7829  LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7830  xtime ? errdetail("Last completed transaction was at log time %s.",
7831  timestamptz_to_str(xtime)) : 0));
7832 
7833  /*
7834  * Finally, execute archive_cleanup_command, if any.
7835  */
7836  if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7838  "archive_cleanup_command",
7839  false,
7840  WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7841 
7842  return true;
7843 }
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1843
int64 TimestampTz
Definition: timestamp.h:39
int errdetail(const char *fmt,...)
Definition: elog.c:1203
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
bool IsUnderPostmaster
Definition: globals.c:119
#define INJECTION_POINT(name)
@ B_CHECKPOINTER
Definition: miscadmin.h:355
BackendType MyBackendType
Definition: miscinit.c:64
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
CheckPoint lastCheckPoint
Definition: xlog.c:545
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:543
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:544
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
bool EnableHotStandby
Definition: xlog.c:121
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2722
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:646
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:647
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 TimeLineID
Definition: xlogdefs.h:59
char * archiveCleanupCommand
Definition: xlogrecovery.c:84
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert, B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9394 of file xlog.c.

9395 {
9396  bool during_backup_start = DatumGetBool(arg);
9397 
9398  /* If called during backup start, there shouldn't be one already running */
9399  Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9400 
9401  if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9402  {
9406 
9409 
9410  if (!during_backup_start)
9411  ereport(WARNING,
9412  errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9413  }
9414 }
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
int runningBackups
Definition: xlog.c:438
static SessionBackupState sessionBackupState
Definition: xlog.c:391

References arg, Assert, DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8792 of file xlog.c.

8794 {
8796 
8797  Assert(state != NULL);
8799 
8800  /*
8801  * During recovery, we don't need to check WAL level. Because, if WAL
8802  * level is not sufficient, it's impossible to get here during recovery.
8803  */
8805  ereport(ERROR,
8806  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8807  errmsg("WAL level not sufficient for making an online backup"),
8808  errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8809 
8810  if (strlen(backupidstr) > MAXPGPATH)
8811  ereport(ERROR,
8812  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8813  errmsg("backup label too long (max %d bytes)",
8814  MAXPGPATH)));
8815 
8816  strlcpy(state->name, backupidstr, sizeof(state->name));
8817 
8818  /*
8819  * Mark backup active in shared memory. We must do full-page WAL writes
8820  * during an on-line backup even if not doing so at other times, because
8821  * it's quite possible for the backup dump to obtain a "torn" (partially
8822  * written) copy of a database page if it reads the page concurrently with
8823  * our write to the same page. This can be fixed as long as the first
8824  * write to the page in the WAL sequence is a full-page write. Hence, we
8825  * increment runningBackups then force a CHECKPOINT, to ensure there are
8826  * no dirty pages in shared memory that might get dumped while the backup
8827  * is in progress without having a corresponding WAL record. (Once the
8828  * backup is complete, we need not force full-page writes anymore, since
8829  * we expect that any pages not modified during the backup interval must
8830  * have been correctly captured by the backup.)
8831  *
8832  * Note that forcing full-page writes has no effect during an online
8833  * backup from the standby.
8834  *
8835  * We must hold all the insertion locks to change the value of
8836  * runningBackups, to ensure adequate interlocking against
8837  * XLogInsertRecord().
8838  */
8842 
8843  /*
8844  * Ensure we decrement runningBackups if we fail below. NB -- for this to
8845  * work correctly, it is critical that sessionBackupState is only updated
8846  * after this block is over.
8847  */
8849  {
8850  bool gotUniqueStartpoint = false;
8851  DIR *tblspcdir;
8852  struct dirent *de;
8853  tablespaceinfo *ti;
8854  int datadirpathlen;
8855 
8856  /*
8857  * Force an XLOG file switch before the checkpoint, to ensure that the
8858  * WAL segment the checkpoint is written to doesn't contain pages with
8859  * old timeline IDs. That would otherwise happen if you called
8860  * pg_backup_start() right after restoring from a PITR archive: the
8861  * first WAL segment containing the startup checkpoint has pages in
8862  * the beginning with the old timeline ID. That can cause trouble at
8863  * recovery: we won't have a history file covering the old timeline if
8864  * pg_wal directory was not included in the base backup and the WAL
8865  * archive was cleared too before starting the backup.
8866  *
8867  * This also ensures that we have emitted a WAL page header that has
8868  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8869  * Therefore, if a WAL archiver (such as pglesslog) is trying to
8870  * compress out removable backup blocks, it won't remove any that
8871  * occur after this point.
8872  *
8873  * During recovery, we skip forcing XLOG file switch, which means that
8874  * the backup taken during recovery is not available for the special
8875  * recovery case described above.
8876  */
8878  RequestXLogSwitch(false);
8879 
8880  do
8881  {
8882  bool checkpointfpw;
8883 
8884  /*
8885  * Force a CHECKPOINT. Aside from being necessary to prevent torn
8886  * page problems, this guarantees that two successive backup runs
8887  * will have different checkpoint positions and hence different
8888  * history file names, even if nothing happened in between.
8889  *
8890  * During recovery, establish a restartpoint if possible. We use
8891  * the last restartpoint as the backup starting checkpoint. This
8892  * means that two successive backup runs can have same checkpoint
8893  * positions.
8894  *
8895  * Since the fact that we are executing do_pg_backup_start()
8896  * during recovery means that checkpointer is running, we can use
8897  * RequestCheckpoint() to establish a restartpoint.
8898  *
8899  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
8900  * passing fast = true). Otherwise this can take awhile.
8901  */
8903  (fast ? CHECKPOINT_IMMEDIATE : 0));
8904 
8905  /*
8906  * Now we need to fetch the checkpoint record location, and also
8907  * its REDO pointer. The oldest point in WAL that would be needed
8908  * to restore starting from the checkpoint is precisely the REDO
8909  * pointer.
8910  */
8911  LWLockAcquire(ControlFileLock, LW_SHARED);
8912  state->checkpointloc = ControlFile->checkPoint;
8913  state->startpoint = ControlFile->checkPointCopy.redo;
8915  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8916  LWLockRelease(ControlFileLock);
8917 
8919  {
8920  XLogRecPtr recptr;
8921 
8922  /*
8923  * Check to see if all WAL replayed during online backup
8924  * (i.e., since last restartpoint used as backup starting
8925  * checkpoint) contain full-page writes.
8926  */
8928  recptr = XLogCtl->lastFpwDisableRecPtr;
8930 
8931  if (!checkpointfpw || state->startpoint <= recptr)
8932  ereport(ERROR,
8933  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8934  errmsg("WAL generated with \"full_page_writes=off\" was replayed "
8935  "since last restartpoint"),
8936  errhint("This means that the backup being taken on the standby "
8937  "is corrupt and should not be used. "
8938  "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
8939  "and then try an online backup again.")));
8940 
8941  /*
8942  * During recovery, since we don't use the end-of-backup WAL
8943  * record and don't write the backup history file, the
8944  * starting WAL location doesn't need to be unique. This means
8945  * that two base backups started at the same time might use
8946  * the same checkpoint as starting locations.
8947  */
8948  gotUniqueStartpoint = true;
8949  }
8950 
8951  /*
8952  * If two base backups are started at the same time (in WAL sender
8953  * processes), we need to make sure that they use different
8954  * checkpoints as starting locations, because we use the starting
8955  * WAL location as a unique identifier for the base backup in the
8956  * end-of-backup WAL record and when we write the backup history
8957  * file. Perhaps it would be better generate a separate unique ID
8958  * for each backup instead of forcing another checkpoint, but
8959  * taking a checkpoint right after another is not that expensive
8960  * either because only few buffers have been dirtied yet.
8961  */
8963  if (XLogCtl->Insert.lastBackupStart < state->startpoint)
8964  {
8965  XLogCtl->Insert.lastBackupStart = state->startpoint;
8966  gotUniqueStartpoint = true;
8967  }
8969  } while (!gotUniqueStartpoint);
8970 
8971  /*
8972  * Construct tablespace_map file.
8973  */
8974  datadirpathlen = strlen(DataDir);
8975 
8976  /* Collect information about all tablespaces */
8977  tblspcdir = AllocateDir(PG_TBLSPC_DIR);
8978  while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
8979  {
8980  char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
8981  char linkpath[MAXPGPATH];
8982  char *relpath = NULL;
8983  char *s;
8984  PGFileType de_type;
8985  char *badp;
8986  Oid tsoid;
8987 
8988  /*
8989  * Try to parse the directory name as an unsigned integer.
8990  *
8991  * Tablespace directories should be positive integers that can be
8992  * represented in 32 bits, with no leading zeroes or trailing
8993  * garbage. If we come across a name that doesn't meet those
8994  * criteria, skip it.
8995  */
8996  if (de->d_name[0] < '1' || de->d_name[1] > '9')
8997  continue;
8998  errno = 0;
8999  tsoid = strtoul(de->d_name, &badp, 10);
9000  if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9001  continue;
9002 
9003  snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9004 
9005  de_type = get_dirent_type(fullpath, de, false, ERROR);
9006 
9007  if (de_type == PGFILETYPE_LNK)
9008  {
9009  StringInfoData escapedpath;
9010  int rllen;
9011 
9012  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9013  if (rllen < 0)
9014  {
9015  ereport(WARNING,
9016  (errmsg("could not read symbolic link \"%s\": %m",
9017  fullpath)));
9018  continue;
9019  }
9020  else if (rllen >= sizeof(linkpath))
9021  {
9022  ereport(WARNING,
9023  (errmsg("symbolic link \"%s\" target is too long",
9024  fullpath)));
9025  continue;
9026  }
9027  linkpath[rllen] = '\0';
9028 
9029  /*
9030  * Relpath holds the relative path of the tablespace directory
9031  * when it's located within PGDATA, or NULL if it's located
9032  * elsewhere.
9033  */
9034  if (rllen > datadirpathlen &&
9035  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
9036  IS_DIR_SEP(linkpath[datadirpathlen]))
9037  relpath = pstrdup(linkpath + datadirpathlen + 1);
9038 
9039  /*
9040  * Add a backslash-escaped version of the link path to the
9041  * tablespace map file.
9042  */
9043  initStringInfo(&escapedpath);
9044  for (s = linkpath; *s; s++)
9045  {
9046  if (*s == '\n' || *s == '\r' || *s == '\\')
9047  appendStringInfoChar(&escapedpath, '\\');
9048  appendStringInfoChar(&escapedpath, *s);
9049  }
9050  appendStringInfo(tblspcmapfile, "%s %s\n",
9051  de->d_name, escapedpath.data);
9052  pfree(escapedpath.data);
9053  }
9054  else if (de_type == PGFILETYPE_DIR)
9055  {
9056  /*
9057  * It's possible to use allow_in_place_tablespaces to create
9058  * directories directly under pg_tblspc, for testing purposes
9059  * only.
9060  *
9061  * In this case, we store a relative path rather than an
9062  * absolute path into the tablespaceinfo.
9063  */
9064  snprintf(linkpath, sizeof(linkpath), "%s/%s",
9065  PG_TBLSPC_DIR, de->d_name);
9066  relpath = pstrdup(linkpath);
9067  }
9068  else
9069  {
9070  /* Skip any other file type that appears here. */
9071  continue;
9072  }
9073 
9074  ti = palloc(sizeof(tablespaceinfo));
9075  ti->oid = tsoid;
9076  ti->path = pstrdup(linkpath);
9077  ti->rpath = relpath;
9078  ti->size = -1;
9079 
9080  if (tablespaces)
9081  *tablespaces = lappend(*tablespaces, ti);
9082  }
9083  FreeDir(tblspcdir);
9084 
9085  state->starttime = (pg_time_t) time(NULL);
9086  }
9088 
9089  state->started_in_recovery = backup_started_in_recovery;
9090 
9091  /*
9092  * Mark that the start phase has correctly finished for the backup.
9093  */
9095 }
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
Definition: checkpointer.c:952
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2931
int FreeDir(DIR *dir)
Definition: fd.c:2983
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2865
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:526
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:70
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
char * pstrdup(const char *in)
Definition: mcxt.c:1696
#define MAXPGPATH
#define snprintf
Definition: port.h:238
#define IS_DIR_SEP(ch)
Definition: port.h:102
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
unsigned int Oid
Definition: postgres_ext.h:31
#define relpath(rlocator, forknum)
Definition: relpath.h:102
#define PG_TBLSPC_DIR
Definition: relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:94
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:191
void initStringInfo(StringInfo str)
Definition: stringinfo.c:56
Definition: dirent.c:26
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:551
XLogRecPtr lastBackupStart
Definition: xlog.c:439
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:236
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:8060
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9394
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:288
#define CHECKPOINT_WAIT
Definition: xlog.h:145
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:141
#define XLogIsNeeded()
Definition: xlog.h:109

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert, backup_started_in_recovery, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, DatumGetBool(), do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9120 of file xlog.c.

9121 {
9122  bool backup_stopped_in_recovery = false;
9123  char histfilepath[MAXPGPATH];
9124  char lastxlogfilename[MAXFNAMELEN];
9125  char histfilename[MAXFNAMELEN];
9126  XLogSegNo _logSegNo;
9127  FILE *fp;
9128  int seconds_before_warning;
9129  int waits = 0;
9130  bool reported_waiting = false;
9131 
9132  Assert(state != NULL);
9133 
9134  backup_stopped_in_recovery = RecoveryInProgress();
9135 
9136  /*
9137  * During recovery, we don't need to check WAL level. Because, if WAL
9138  * level is not sufficient, it's impossible to get here during recovery.
9139  */
9140  if (!backup_stopped_in_recovery && !XLogIsNeeded())
9141  ereport(ERROR,
9142  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9143  errmsg("WAL level not sufficient for making an online backup"),
9144  errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9145 
9146  /*
9147  * OK to update backup counter and session-level lock.
9148  *
9149  * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9150  * otherwise they can be updated inconsistently, which might cause
9151  * do_pg_abort_backup() to fail.
9152  */
9154 
9155  /*
9156  * It is expected that each do_pg_backup_start() call is matched by
9157  * exactly one do_pg_backup_stop() call.
9158  */
9161 
9162  /*
9163  * Clean up session-level lock.
9164  *
9165  * You might think that WALInsertLockRelease() can be called before
9166  * cleaning up session-level lock because session-level lock doesn't need
9167  * to be protected with WAL insertion lock. But since
9168  * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9169  * cleaned up before it.
9170  */
9172 
9174 
9175  /*
9176  * If we are taking an online backup from the standby, we confirm that the
9177  * standby has not been promoted during the backup.
9178  */
9179  if (state->started_in_recovery && !backup_stopped_in_recovery)
9180  ereport(ERROR,
9181  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9182  errmsg("the standby was promoted during online backup"),
9183  errhint("This means that the backup being taken is corrupt "
9184  "and should not be used. "
9185  "Try taking another online backup.")));
9186 
9187  /*
9188  * During recovery, we don't write an end-of-backup record. We assume that
9189  * pg_control was backed up last and its minimum recovery point can be
9190  * available as the backup end location. Since we don't have an
9191  * end-of-backup record, we use the pg_control value to check whether
9192  * we've reached the end of backup when starting recovery from this
9193  * backup. We have no way of checking if pg_control wasn't backed up last
9194  * however.
9195  *
9196  * We don't force a switch to new WAL file but it is still possible to
9197  * wait for all the required files to be archived if waitforarchive is
9198  * true. This is okay if we use the backup to start a standby and fetch
9199  * the missing WAL using streaming replication. But in the case of an
9200  * archive recovery, a user should set waitforarchive to true and wait for
9201  * them to be archived to ensure that all the required files are
9202  * available.
9203  *
9204  * We return the current minimum recovery point as the backup end
9205  * location. Note that it can be greater than the exact backup end
9206  * location if the minimum recovery point is updated after the backup of
9207  * pg_control. This is harmless for current uses.
9208  *
9209  * XXX currently a backup history file is for informational and debug
9210  * purposes only. It's not essential for an online backup. Furthermore,
9211  * even if it's created, it will not be archived during recovery because
9212  * an archiver is not invoked. So it doesn't seem worthwhile to write a
9213  * backup history file during recovery.
9214  */
9215  if (backup_stopped_in_recovery)
9216  {
9217  XLogRecPtr recptr;
9218 
9219  /*
9220  * Check to see if all WAL replayed during online backup contain
9221  * full-page writes.
9222  */
9224  recptr = XLogCtl->lastFpwDisableRecPtr;
9226 
9227  if (state->startpoint <= recptr)
9228  ereport(ERROR,
9229  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9230  errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9231  "during online backup"),
9232  errhint("This means that the backup being taken on the standby "
9233  "is corrupt and should not be used. "
9234  "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9235  "and then try an online backup again.")));
9236 
9237 
9238  LWLockAcquire(ControlFileLock, LW_SHARED);
9239  state->stoppoint = ControlFile->minRecoveryPoint;
9241  LWLockRelease(ControlFileLock);
9242  }
9243  else
9244  {
9245  char *history_file;
9246 
9247  /*
9248  * Write the backup-end xlog record
9249  */
9250  XLogBeginInsert();
9251  XLogRegisterData((char *) (&state->startpoint),
9252  sizeof(state->startpoint));
9253  state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9254 
9255  /*
9256  * Given that we're not in recovery, InsertTimeLineID is set and can't
9257  * change, so we can read it without a lock.
9258  */
9259  state->stoptli = XLogCtl->InsertTimeLineID;
9260 
9261  /*
9262  * Force a switch to a new xlog segment file, so that the backup is
9263  * valid as soon as archiver moves out the current segment file.
9264  */
9265  RequestXLogSwitch(false);
9266 
9267  state->stoptime = (pg_time_t) time(NULL);
9268 
9269  /*
9270  * Write the backup history file
9271  */
9272  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9273  BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9274  state->startpoint, wal_segment_size);
9275  fp = AllocateFile(histfilepath, "w");
9276  if (!fp)
9277  ereport(ERROR,
9279  errmsg("could not create file \"%s\": %m",
9280  histfilepath)));
9281 
9282  /* Build and save the contents of the backup history file */
9283  history_file = build_backup_content(state, true);
9284  fprintf(fp, "%s", history_file);
9285  pfree(history_file);
9286 
9287  if (fflush(fp) || ferror(fp) || FreeFile(fp))
9288  ereport(ERROR,
9290  errmsg("could not write file \"%s\": %m",
9291  histfilepath)));
9292 
9293  /*
9294  * Clean out any no-longer-needed history files. As a side effect,
9295  * this will post a .ready file for the newly created history file,
9296  * notifying the archiver that history file may be archived
9297  * immediately.
9298  */
9300  }
9301 
9302  /*
9303  * If archiving is enabled, wait for all the required WAL files to be
9304  * archived before returning. If archiving isn't enabled, the required WAL
9305  * needs to be transported via streaming replication (hopefully with
9306  * wal_keep_size set high enough), or some more exotic mechanism like
9307  * polling and copying files from pg_wal with script. We have no knowledge
9308  * of those mechanisms, so it's up to the user to ensure that he gets all
9309  * the required WAL.
9310  *
9311  * We wait until both the last WAL file filled during backup and the
9312  * history file have been archived, and assume that the alphabetic sorting
9313  * property of the WAL files ensures any earlier WAL files are safely
9314  * archived as well.
9315  *
9316  * We wait forever, since archive_command is supposed to work and we
9317  * assume the admin wanted his backup to work completely. If you don't
9318  * wish to wait, then either waitforarchive should be passed in as false,
9319  * or you can set statement_timeout. Also, some notices are issued to
9320  * clue in anyone who might be doing this interactively.
9321  */
9322 
9323  if (waitforarchive &&
9324  ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9325  (backup_stopped_in_recovery && XLogArchivingAlways())))
9326  {
9327  XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9328  XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9330 
9331  XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9332  BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9333  state->startpoint, wal_segment_size);
9334 
9335  seconds_before_warning = 60;
9336  waits = 0;
9337 
9338  while (XLogArchiveIsBusy(lastxlogfilename) ||
9339  XLogArchiveIsBusy(histfilename))
9340  {
9342 
9343  if (!reported_waiting && waits > 5)
9344  {
9345  ereport(NOTICE,
9346  (errmsg("base backup done, waiting for required WAL segments to be archived")));
9347  reported_waiting = true;
9348  }
9349 
9350  (void) WaitLatch(MyLatch,
9352  1000L,
9353  WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9355 
9356  if (++waits >= seconds_before_warning)
9357  {
9358  seconds_before_warning *= 2; /* This wraps in >10 years... */
9359  ereport(WARNING,
9360  (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9361  waits),
9362  errhint("Check that your \"archive_command\" is executing properly. "
9363  "You can safely cancel this backup, "
9364  "but the database backup will not be usable without all the WAL segments.")));
9365  }
9366  }
9367 
9368  ereport(NOTICE,
9369  (errmsg("all required WAL segments have been archived")));
9370  }
9371  else if (waitforarchive)
9372  ereport(NOTICE,
9373  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9374 }
#define NOTICE
Definition: elog.h:35
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2605
int FreeFile(FILE *file)
Definition: fd.c:2803
struct Latch * MyLatch
Definition: globals.c:62
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
static void const char fflush(stdout)
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:73
#define fprintf
Definition: port.h:242
static void CleanupBackupHistory(void)
Definition: xlog.c:4161
#define XLogArchivingActive()
Definition: xlog.h:99
#define XLogArchivingAlways()
Definition: xlog.h:102
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert, BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fflush(), fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9101 of file xlog.c.

9102 {
9103  return sessionBackupState;
9104 }

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4859 of file xlog.c.

4860 {
4861  return ControlFile->wal_level;
4862 }

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4604 of file xlog.c.

4605 {
4607 }
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:522

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

Definition at line 6499 of file xlog.c.

6500 {
6502 
6504 
6505  /*
6506  * If we're writing and flushing WAL, the time line can't be changing, so
6507  * no lock is required.
6508  */
6509  if (insertTLI)
6510  *insertTLI = XLogCtl->InsertTimeLineID;
6511 
6512  return LogwrtResult.Flush;
6513 }
RecoveryState SharedRecoveryState
Definition: xlog.c:516
XLogRecPtr Flush
Definition: xlog.c:328
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:620
static XLogwrtResult LogwrtResult
Definition: xlog.c:612

References Assert, XLogwrtResult::Flush, XLogCtlData::InsertTimeLineID, LogwrtResult, RECOVERY_STATE_DONE, RefreshXLogWriteResult, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by binary_upgrade_logical_slot_has_caught_up(), get_flush_position(), GetCurrentLSN(), GetLatestLSN(), IdentifySystem(), pg_current_wal_flush_lsn(), pg_logical_slot_get_changes_guts(), pg_replication_slot_advance(), read_local_xlog_page_guts(), StartReplication(), WalSndWaitForWal(), XLogSendLogical(), and XLogSendPhysical().

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 6467 of file xlog.c.

6468 {
6469  *RedoRecPtr_p = RedoRecPtr;
6470  *doPageWrites_p = doPageWrites;
6471 }
static bool doPageWrites
Definition: xlog.c:286

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 6482 of file xlog.c.

6483 {
6484  XLogRecPtr recptr;
6485 
6487  recptr = XLogCtl->LogwrtRqst.Write;
6489 
6490  return recptr;
6491 }
XLogwrtRqst LogwrtRqst
Definition: xlog.c:455
XLogRecPtr Write
Definition: xlog.c:321

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, XLogwrtRqst::Write, and XLogCtl.

Referenced by CheckpointerMain(), gistvacuumscan(), and IsCheckpointOnSchedule().

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6556 of file xlog.c.

6557 {
6559  int i;
6560 
6561  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6562  {
6563  XLogRecPtr last_important;
6564 
6565  /*
6566  * Need to take a lock to prevent torn reads of the LSN, which are
6567  * possible on some of the supported platforms. WAL insert locks only
6568  * support exclusive mode, so we have to use that.
6569  */
6571  last_important = WALInsertLocks[i].l.lastImportantAt;
6572  LWLockRelease(&WALInsertLocks[i].l.lock);
6573 
6574  if (res < last_important)
6575  res = last_important;
6576  }
6577 
6578  return res;
6579 }
int i
Definition: isn.c:72
XLogRecPtr lastImportantAt
Definition: xlog.c:371
WALInsertLock l
Definition: xlog.c:383
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:569
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:150

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, res, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetMockAuthenticationNonce()

char* GetMockAuthenticationNonce ( void  )

Definition at line 4578 of file xlog.c.

4579 {
4580  Assert(ControlFile != NULL);
4582 }
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:229

References Assert, ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6370 of file xlog.c.

6371 {
6372  RecoveryState retval;
6373 
6375  retval = XLogCtl->SharedRecoveryState;
6377 
6378  return retval;
6379 }

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6437 of file xlog.c.

6438 {
6439  XLogRecPtr ptr;
6440 
6441  /*
6442  * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6443  * grabbed a WAL insertion lock to read the authoritative value in
6444  * Insert->RedoRecPtr, someone might update it just after we've released
6445  * the lock.
6446  */
6448  ptr = XLogCtl->RedoRecPtr;
6450 
6451  if (RedoRecPtr < ptr)
6452  RedoRecPtr = ptr;
6453 
6454  return RedoRecPtr;
6455 }

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), pgstat_before_server_shutdown(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

uint64 GetSystemIdentifier ( void  )

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7869 of file xlog.c.

7870 {
7871  XLogRecPtr currpos; /* current write LSN */
7872  XLogSegNo currSeg; /* segid of currpos */
7873  XLogSegNo targetSeg; /* segid of targetLSN */
7874  XLogSegNo oldestSeg; /* actual oldest segid */
7875  XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7876  XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7877  uint64 keepSegs;
7878 
7879  /*
7880  * slot does not reserve WAL. Either deactivated, or has never been active
7881  */
7882  if (XLogRecPtrIsInvalid(targetLSN))
7883  return WALAVAIL_INVALID_LSN;
7884 
7885  /*
7886  * Calculate the oldest segment currently reserved by all slots,
7887  * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7888  * oldestSlotSeg to the current segment.
7889  */
7890  currpos = GetXLogWriteRecPtr();
7891  XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7892  KeepLogSeg(currpos, &oldestSlotSeg);
7893 
7894  /*
7895  * Find the oldest extant segment file. We get 1 until checkpoint removes
7896  * the first WAL segment file since startup, which causes the status being
7897  * wrong under certain abnormal conditions but that doesn't actually harm.
7898  */
7899  oldestSeg = XLogGetLastRemovedSegno() + 1;
7900 
7901  /* calculate oldest segment by max_wal_size */
7902  XLByteToSeg(currpos, currSeg, wal_segment_size);
7904 
7905  if (currSeg > keepSegs)
7906  oldestSegMaxWalSize = currSeg - keepSegs;
7907  else
7908  oldestSegMaxWalSize = 1;
7909 
7910  /* the segment we care about */
7911  XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7912 
7913  /*
7914  * No point in returning reserved or extended status values if the
7915  * targetSeg is known to be lost.
7916  */
7917  if (targetSeg >= oldestSlotSeg)
7918  {
7919  /* show "reserved" when targetSeg is within max_wal_size */
7920  if (targetSeg >= oldestSegMaxWalSize)
7921  return WALAVAIL_RESERVED;
7922 
7923  /* being retained by slots exceeding max_wal_size */
7924  return WALAVAIL_EXTENDED;
7925  }
7926 
7927  /* WAL segments are no longer retained but haven't been removed yet */
7928  if (targetSeg >= oldestSeg)
7929  return WALAVAIL_UNRESERVED;
7930 
7931  /* Definitely lost */
7932  return WALAVAIL_REMOVED;
7933 }
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3758
int max_wal_size_mb
Definition: xlog.c:114
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:603
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9451
@ WALAVAIL_REMOVED
Definition: xlog.h:194

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsInvalid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

Definition at line 6520 of file xlog.c.

6521 {
6523 
6524  /* Since the value can't be changing, no lock is required. */
6525  return XLogCtl->InsertTimeLineID;
6526 }

References Assert, XLogCtlData::InsertTimeLineID, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by logical_read_xlog_page(), pg_walfile_name(), pg_walfile_name_offset(), ReadReplicationSlot(), WALReadFromBuffers(), and XLogSendPhysical().

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

Definition at line 6536 of file xlog.c.

6537 {
6538  TimeLineID insertTLI;
6539 
6541  insertTLI = XLogCtl->InsertTimeLineID;
6543 
6544  return insertTLI;
6545 }

References XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by GetLatestLSN().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9435 of file xlog.c.

9436 {
9438  uint64 current_bytepos;
9439 
9440  SpinLockAcquire(&Insert->insertpos_lck);
9441  current_bytepos = Insert->CurrBytePos;
9442  SpinLockRelease(&Insert->insertpos_lck);
9443 
9444  return XLogBytePosToRecPtr(current_bytepos);
9445 }

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

XLogRecPtr GetXLogWriteRecPtr ( void  )

Definition at line 9451 of file xlog.c.

9452 {
9454 
9455  return LogwrtResult.Write;
9456 }
XLogRecPtr Write
Definition: xlog.c:327

References LogwrtResult, RefreshXLogWriteResult, and XLogwrtResult::Write.

Referenced by GetWALAvailability(), pg_attribute_noreturn(), pg_current_wal_lsn(), and pg_get_replication_slots().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4784 of file xlog.c.

4785 {
4787 
4789  {
4790  struct config_generic *guc;
4791 
4792  guc = find_option("wal_consistency_checking", false, false, ERROR);
4793 
4795 
4796  set_config_option_ext("wal_consistency_checking",
4798  guc->scontext, guc->source, guc->srole,
4799  GUC_ACTION_SET, true, ERROR, false);
4800 
4801  /* checking should not be deferred again */
4803  }
4804 }
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1235
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3382
@ GUC_ACTION_SET
Definition: guc.h:199
bool process_shared_preload_libraries_done
Definition: miscinit.c:1840
GucContext scontext
Definition: guc_tables.h:167
GucSource source
Definition: guc_tables.h:165
char * wal_consistency_checking_string
Definition: xlog.c:125
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:166

References Assert, check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9492 of file xlog.c.

9493 {
9494  bool result;
9495 
9496  LWLockAcquire(ControlFileLock, LW_SHARED);
9498  LWLockRelease(ControlFileLock);
9499 
9500  return result;
9501 }
bool InstallXLogFileSegmentActive
Definition: xlog.c:526

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8683 of file xlog.c.

8684 {
8685  char *msg = NULL;
8686  instr_time start;
8687 
8688  Assert(tli != 0);
8689 
8690  /*
8691  * Quick exit if fsync is disabled or write() has already synced the WAL
8692  * file.
8693  */
8694  if (!enableFsync ||
8697  return;
8698 
8699  /* Measure I/O timing to sync the WAL file */
8700  if (track_wal_io_timing)
8702  else
8704 
8705  pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8706  switch (wal_sync_method)
8707  {
8708  case WAL_SYNC_METHOD_FSYNC:
8709  if (pg_fsync_no_writethrough(fd) != 0)
8710  msg = _("could not fsync file \"%s\": %m");
8711  break;
8712 #ifdef HAVE_FSYNC_WRITETHROUGH
8714  if (pg_fsync_writethrough(fd) != 0)
8715  msg = _("could not fsync write-through file \"%s\": %m");
8716  break;
8717 #endif
8719  if (pg_fdatasync(fd) != 0)
8720  msg = _("could not fdatasync file \"%s\": %m");
8721  break;
8722  case WAL_SYNC_METHOD_OPEN:
8724  /* not reachable */
8725  Assert(false);
8726  break;
8727  default:
8728  ereport(PANIC,
8729  errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8730  errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8731  break;
8732  }
8733 
8734  /* PANIC if failed to fsync */
8735  if (msg)
8736  {
8737  char xlogfname[MAXFNAMELEN];
8738  int save_errno = errno;
8739 
8740  XLogFileName(xlogfname, tli, segno, wal_segment_size);
8741  errno = save_errno;
8742  ereport(PANIC,
8744  errmsg(msg, xlogfname)));
8745  }
8746 
8748 
8749  /*
8750  * Increment the I/O timing and the number of times WAL files were synced.
8751  */
8752  if (track_wal_io_timing)
8753  {
8754  instr_time end;
8755 
8758  }
8759 
8761 }
#define _(x)
Definition: elog.c:90
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:440
int pg_fdatasync(int fd)
Definition: fd.c:479
int pg_fsync_writethrough(int fd)
Definition: fd.c:460
bool enableFsync
Definition: globals.c:128
return str start
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SET_ZERO(t)
Definition: instr_time.h:172
#define INSTR_TIME_ACCUM_DIFF(x, y, z)
Definition: instr_time.h:184
PgStat_PendingWalStats PendingWalStats
Definition: pgstat_wal.c:24
static int fd(const char *x, int i)
Definition: preproc-init.c:105
instr_time wal_sync_time
Definition: pgstat.h:493
PgStat_Counter wal_sync
Definition: pgstat.h:491
int wal_sync_method
Definition: xlog.c:130
bool track_wal_io_timing
Definition: xlog.c:137

References _, Assert, enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), INSTR_TIME_ACCUM_DIFF, INSTR_TIME_SET_CURRENT, INSTR_TIME_SET_ZERO, MAXFNAMELEN, PANIC, PendingWalStats, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, PgStat_PendingWalStats::wal_sync, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, PgStat_PendingWalStats::wal_sync_time, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4846 of file xlog.c.

4847 {
4848  Assert(reset || ControlFile == NULL);
4849  ControlFile = palloc(sizeof(ControlFileData));
4850  ReadControlFile();
4851 }
void reset(void)
Definition: sql-declare.c:600

References Assert, ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6247 of file xlog.c.

6248 {
6249  /*
6250  * We have reached the end of base backup, as indicated by pg_control. The
6251  * data on disk is now consistent (unless minRecoveryPoint is further
6252  * ahead, which can happen if we crashed during previous recovery). Reset
6253  * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6254  * make sure we don't allow starting up at an earlier point even if
6255  * recovery is stopped and restarted soon after this.
6256  */
6257  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6258 
6259  if (ControlFile->minRecoveryPoint < EndRecPtr)
6260  {
6261  ControlFile->minRecoveryPoint = EndRecPtr;
6263  }
6264 
6267  ControlFile->backupEndRequired = false;
6269 
6270  LWLockRelease(ControlFileLock);
6271 }
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
bool backupEndRequired
Definition: pg_control.h:172
XLogRecPtr backupEndPoint
Definition: pg_control.h:171

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6334 of file xlog.c.

6335 {
6336  /*
6337  * We check shared state each time only until we leave recovery mode. We
6338  * can't re-enter recovery, so there's no need to keep checking after the
6339  * shared variable has once been seen false.
6340  */
6342  return false;
6343  else
6344  {
6345  /*
6346  * use volatile pointer to make sure we make a fresh read of the
6347  * shared variable.
6348  */
6349  volatile XLogCtlData *xlogctl = XLogCtl;
6350 
6352 
6353  /*
6354  * Note: We don't need a memory barrier when we're still in recovery.
6355  * We might exit recovery immediately after return, so the caller
6356  * can't rely on 'true' meaning that we're still in recovery anyway.
6357  */
6358 
6359  return LocalRecoveryInProgress;
6360  }
6361 }
static bool LocalRecoveryInProgress
Definition: xlog.c:224

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), btree_index_mainfork_expected(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9421 of file xlog.c.

9422 {
9423  static bool already_done = false;
9424 
9425  if (already_done)
9426  return;
9428  already_done = true;
9429 }
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), DatumGetBool(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3940 of file xlog.c.

3941 {
3942  DIR *xldir;
3943  struct dirent *xlde;
3944  char switchseg[MAXFNAMELEN];
3945  XLogSegNo endLogSegNo;
3946  XLogSegNo switchLogSegNo;
3947  XLogSegNo recycleSegNo;
3948 
3949  /*
3950  * Initialize info about where to begin the work. This will recycle,
3951  * somewhat arbitrarily, 10 future segments.
3952  */
3953  XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3954  XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3955  recycleSegNo = endLogSegNo + 10;
3956 
3957  /*
3958  * Construct a filename of the last segment to be kept.
3959  */
3960  XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3961 
3962  elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3963  switchseg);
3964 
3965  xldir = AllocateDir(XLOGDIR);
3966 
3967  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3968  {
3969  /* Ignore files that are not XLOG segments */
3970  if (!IsXLogFileName(xlde->d_name))
3971  continue;
3972 
3973  /*
3974  * Remove files that are on a timeline older than the new one we're
3975  * switching to, but with a segment number >= the first segment on the
3976  * new timeline.
3977  */
3978  if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
3979  strcmp(xlde->d_name + 8, switchseg + 8) > 0)
3980  {
3981  /*
3982  * If the file has already been marked as .ready, however, don't
3983  * remove it yet. It should be OK to remove it - files that are
3984  * not part of our timeline history are not required for recovery
3985  * - but seems safer to let them be archived and removed later.
3986  */
3987  if (!XLogArchiveIsReady(xlde->d_name))
3988  RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
3989  }
3990  }
3991 
3992  FreeDir(xldir);
3993 }
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:4009
static bool IsXLogFileName(const char *fname)
#define XLOGDIR
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9484 of file xlog.c.

9485 {
9486  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9488  LWLockRelease(ControlFileLock);
9489 }

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9507 of file xlog.c.

9508 {
9510  XLogCtl->WalWriterSleeping = sleeping;
9512 }
bool WalWriterSleeping
Definition: xlog.c:533

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6602 of file xlog.c.

6603 {
6604  /*
6605  * We should have an aux process resource owner to use, and we should not
6606  * be in a transaction that's installed some other resowner.
6607  */
6609  Assert(CurrentResourceOwner == NULL ||
6612 
6613  /* Don't be chatty in standalone mode */
6615  (errmsg("shutting down")));
6616 
6617  /*
6618  * Signal walsenders to move to stopping state.
6619  */
6621 
6622  /*
6623  * Wait for WAL senders to be in stopping state. This prevents commands
6624  * from writing new WAL.
6625  */
6627 
6628  if (RecoveryInProgress())
6630  else
6631  {
6632  /*
6633  * If archiving is enabled, rotate the last XLOG file so that all the
6634  * remaining records are archived (postmaster wakes up the archiver
6635  * process one more time at the end of shutdown). The checkpoint
6636  * record will go to the next XLOG file and won't be archived (yet).
6637  */
6638  if (XLogArchivingActive())
6639  RequestXLogSwitch(false);
6640 
6642  }
6643 }
bool IsPostmasterEnvironment
Definition: globals.c:118
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:168
void WalSndInitStopping(void)
Definition: walsender.c:3717
void WalSndWaitStopping(void)
Definition: walsender.c:3743
bool CreateRestartPoint(int flags)
Definition: xlog.c:7591
bool CreateCheckPoint(int flags)
Definition: xlog.c:6889

References Assert, AuxProcessResourceOwner, CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by HandleCheckpointerInterrupts(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5429 of file xlog.c.

5430 {
5432  CheckPoint checkPoint;
5433  bool wasShutdown;
5434  bool didCrash;
5435  bool haveTblspcMap;
5436  bool haveBackupLabel;
5437  XLogRecPtr EndOfLog;
5438  TimeLineID EndOfLogTLI;
5439  TimeLineID newTLI;
5440  bool performedWalRecovery;
5441  EndOfWalRecoveryInfo *endOfRecoveryInfo;
5444  TransactionId oldestActiveXID;
5445  bool promoted = false;
5446 
5447  /*
5448  * We should have an aux process resource owner to use, and we should not
5449  * be in a transaction that's installed some other resowner.
5450  */
5452  Assert(CurrentResourceOwner == NULL ||
5455 
5456  /*
5457  * Check that contents look valid.
5458  */
5460  ereport(FATAL,
5462  errmsg("control file contains invalid checkpoint location")));
5463 
5464  switch (ControlFile->state)
5465  {
5466  case DB_SHUTDOWNED:
5467 
5468  /*
5469  * This is the expected case, so don't be chatty in standalone
5470  * mode
5471  */
5473  (errmsg("database system was shut down at %s",
5474  str_time(ControlFile->time))));
5475  break;
5476 
5478  ereport(LOG,
5479  (errmsg("database system was shut down in recovery at %s",
5480  str_time(ControlFile->time))));
5481  break;
5482 
5483  case DB_SHUTDOWNING:
5484  ereport(LOG,
5485  (errmsg("database system shutdown was interrupted; last known up at %s",
5486  str_time(ControlFile->time))));
5487  break;
5488 
5489  case DB_IN_CRASH_RECOVERY:
5490  ereport(LOG,
5491  (errmsg("database system was interrupted while in recovery at %s",
5493  errhint("This probably means that some data is corrupted and"
5494  " you will have to use the last backup for recovery.")));
5495  break;
5496 
5498  ereport(LOG,
5499  (errmsg("database system was interrupted while in recovery at log time %s",
5501  errhint("If this has occurred more than once some data might be corrupted"
5502  " and you might need to choose an earlier recovery target.")));
5503  break;
5504 
5505  case DB_IN_PRODUCTION:
5506  ereport(LOG,
5507  (errmsg("database system was interrupted; last known up at %s",
5508  str_time(ControlFile->time))));
5509  break;
5510 
5511  default:
5512  ereport(FATAL,
5514  errmsg("control file contains invalid database cluster state")));
5515  }
5516 
5517  /* This is just to allow attaching to startup process with a debugger */
5518 #ifdef XLOG_REPLAY_DELAY
5520  pg_usleep(60000000L);
5521 #endif
5522 
5523  /*
5524  * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5525  * In cases where someone has performed a copy for PITR, these directories
5526  * may have been excluded and need to be re-created.
5527  */
5529 
5530  /* Set up timeout handler needed to report startup progress. */
5534 
5535  /*----------
5536  * If we previously crashed, perform a couple of actions:
5537  *
5538  * - The pg_wal directory may still include some temporary WAL segments
5539  * used when creating a new segment, so perform some clean up to not
5540  * bloat this path. This is done first as there is no point to sync
5541  * this temporary data.
5542  *
5543  * - There might be data which we had written, intending to fsync it, but
5544  * which we had not actually fsync'd yet. Therefore, a power failure in
5545  * the near future might cause earlier unflushed writes to be lost, even
5546  * though more recent data written to disk from here on would be
5547  * persisted. To avoid that, fsync the entire data directory.
5548  */
5549  if (ControlFile->state != DB_SHUTDOWNED &&
5551  {
5554  didCrash = true;
5555  }
5556  else
5557  didCrash = false;
5558 
5559  /*
5560  * Prepare for WAL recovery if needed.
5561  *
5562  * InitWalRecovery analyzes the control file and the backup label file, if
5563  * any. It updates the in-memory ControlFile buffer according to the
5564  * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5565  * It also applies the tablespace map file, if any.
5566  */
5567  InitWalRecovery(ControlFile, &wasShutdown,
5568  &haveBackupLabel, &haveTblspcMap);
5569  checkPoint = ControlFile->checkPointCopy;
5570 
5571  /* initialize shared memory variables from the checkpoint record */
5572  TransamVariables->nextXid = checkPoint.nextXid;
5573  TransamVariables->nextOid = checkPoint.nextOid;
5575  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5576  AdvanceOldestClogXid(checkPoint.oldestXid);
5577  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5578  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5580  checkPoint.newestCommitTsXid);
5581  XLogCtl->ckptFullXid = checkPoint.nextXid;
5582 
5583  /*
5584  * Clear out any old relcache cache files. This is *necessary* if we do
5585  * any WAL replay, since that would probably result in the cache files
5586  * being out of sync with database reality. In theory we could leave them
5587  * in place if the database had been cleanly shut down, but it seems
5588  * safest to just remove them always and let them be rebuilt during the
5589  * first backend startup. These files needs to be removed from all
5590  * directories including pg_tblspc, however the symlinks are created only
5591  * after reading tablespace_map file in case of archive recovery from
5592  * backup, so needs to clear old relcache files here after creating
5593  * symlinks.
5594  */
5596 
5597  /*
5598  * Initialize replication slots, before there's a chance to remove
5599  * required resources.
5600  */
5602 
5603  /*
5604  * Startup logical state, needs to be setup now so we have proper data
5605  * during crash recovery.
5606  */
5608 
5609  /*
5610  * Startup CLOG. This must be done after TransamVariables->nextXid has
5611  * been initialized and before we accept connections or begin WAL replay.
5612  */
5613  StartupCLOG();
5614 
5615  /*
5616  * Startup MultiXact. We need to do this early to be able to replay
5617  * truncations.
5618  */
5619  StartupMultiXact();
5620 
5621  /*
5622  * Ditto for commit timestamps. Activate the facility if the setting is
5623  * enabled in the control file, as there should be no tracking of commit
5624  * timestamps done when the setting was disabled. This facility can be
5625  * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5626  */
5628  StartupCommitTs();
5629 
5630  /*
5631  * Recover knowledge about replay progress of known replication partners.
5632  */
5634 
5635  /*
5636  * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5637  * control file. On recovery, all unlogged relations are blown away, so
5638  * the unlogged LSN counter can be reset too.
5639  */
5643  else
5646 
5647  /*
5648  * Copy any missing timeline history files between 'now' and the recovery
5649  * target timeline from archive to pg_wal. While we don't need those files
5650  * ourselves - the history file of the recovery target timeline covers all
5651  * the previous timelines in the history too - a cascading standby server
5652  * might be interested in them. Or, if you archive the WAL from this
5653  * server to a different archive than the primary, it'd be good for all
5654  * the history files to get archived there after failover, so that you can
5655  * use one of the old timelines as a PITR target. Timeline history files
5656  * are small, so it's better to copy them unnecessarily than not copy them
5657  * and regret later.
5658  */
5660 
5661  /*
5662  * Before running in recovery, scan pg_twophase and fill in its status to
5663  * be able to work on entries generated by redo. Doing a scan before
5664  * taking any recovery action has the merit to discard any 2PC files that
5665  * are newer than the first record to replay, saving from any conflicts at
5666  * replay. This avoids as well any subsequent scans when doing recovery
5667  * of the on-disk two-phase data.
5668  */
5670 
5671  /*
5672  * When starting with crash recovery, reset pgstat data - it might not be
5673  * valid. Otherwise restore pgstat data. It's safe to do this here,
5674  * because postmaster will not yet have started any other processes.
5675  *
5676  * NB: Restoring replication slot stats relies on slot state to have
5677  * already been restored from disk.
5678  *
5679  * TODO: With a bit of extra work we could just start with a pgstat file
5680  * associated with the checkpoint redo location we're starting from.
5681  */
5682  if (didCrash)
5684  else
5685  pgstat_restore_stats(checkPoint.redo);
5686 
5687  lastFullPageWrites = checkPoint.fullPageWrites;
5688 
5691 
5692  /* REDO */
5693  if (InRecovery)
5694  {
5695  /* Initialize state for RecoveryInProgress() */
5697  if (InArchiveRecovery)
5699  else
5702 
5703  /*
5704  * Update pg_control to show that we are recovering and to show the
5705  * selected checkpoint as the place we are starting from. We also mark
5706  * pg_control with any minimum recovery stop point obtained from a
5707  * backup history file.
5708  *
5709  * No need to hold ControlFileLock yet, we aren't up far enough.
5710  */
5712 
5713  /*
5714  * If there was a backup label file, it's done its job and the info
5715  * has now been propagated into pg_control. We must get rid of the
5716  * label file so that if we crash during recovery, we'll pick up at
5717  * the latest recovery restartpoint instead of going all the way back
5718  * to the backup start point. It seems prudent though to just rename
5719  * the file out of the way rather than delete it completely.
5720  */
5721  if (haveBackupLabel)
5722  {
5723  unlink(BACKUP_LABEL_OLD);
5725  }
5726 
5727  /*
5728  * If there was a tablespace_map file, it's done its job and the
5729  * symlinks have been created. We must get rid of the map file so
5730  * that if we crash during recovery, we don't create symlinks again.
5731  * It seems prudent though to just rename the file out of the way
5732  * rather than delete it completely.
5733  */
5734  if (haveTblspcMap)
5735  {
5736  unlink(TABLESPACE_MAP_OLD);
5738  }
5739 
5740  /*
5741  * Initialize our local copy of minRecoveryPoint. When doing crash
5742  * recovery we want to replay up to the end of WAL. Particularly, in
5743  * the case of a promoted standby minRecoveryPoint value in the
5744  * control file is only updated after the first checkpoint. However,
5745  * if the instance crashes before the first post-recovery checkpoint
5746  * is completed then recovery will use a stale location causing the
5747  * startup process to think that there are still invalid page
5748  * references when checking for data consistency.
5749  */
5750  if (InArchiveRecovery)
5751  {
5754  }
5755  else
5756  {
5759  }
5760 
5761  /* Check that the GUCs used to generate the WAL allow recovery */
5763 
5764  /*
5765  * We're in recovery, so unlogged relations may be trashed and must be
5766  * reset. This should be done BEFORE allowing Hot Standby
5767  * connections, so that read-only backends don't try to read whatever
5768  * garbage is left over from before.
5769  */
5771 
5772  /*
5773  * Likewise, delete any saved transaction snapshot files that got left
5774  * behind by crashed backends.
5775  */
5777 
5778  /*
5779  * Initialize for Hot Standby, if enabled. We won't let backends in
5780  * yet, not until we've reached the min recovery point specified in
5781  * control file and we've established a recovery snapshot from a
5782  * running-xacts WAL record.
5783  */
5785  {
5786  TransactionId *xids;
5787  int nxids;
5788 
5789  ereport(DEBUG1,
5790  (errmsg_internal("initializing for hot standby")));
5791 
5793 
5794  if (wasShutdown)
5795  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5796  else
5797  oldestActiveXID = checkPoint.oldestActiveXid;
5798  Assert(TransactionIdIsValid(oldestActiveXID));
5799 
5800  /* Tell procarray about the range of xids it has to deal with */
5802 
5803  /*
5804  * Startup subtrans only. CLOG, MultiXact and commit timestamp
5805  * have already been started up and other SLRUs are not maintained
5806  * during recovery and need not be started yet.
5807  */
5808  StartupSUBTRANS(oldestActiveXID);
5809 
5810  /*
5811  * If we're beginning at a shutdown checkpoint, we know that
5812  * nothing was running on the primary at this point. So fake-up an
5813  * empty running-xacts record and use that here and now. Recover
5814  * additional standby state for prepared transactions.
5815  */
5816  if (wasShutdown)
5817  {
5818  RunningTransactionsData running;
5819  TransactionId latestCompletedXid;
5820 
5821  /* Update pg_subtrans entries for any prepared transactions */
5823 
5824  /*
5825  * Construct a RunningTransactions snapshot representing a
5826  * shut down server, with only prepared transactions still
5827  * alive. We're never overflowed at this point because all
5828  * subxids are listed with their parent prepared transactions.
5829  */
5830  running.xcnt = nxids;
5831  running.subxcnt = 0;
5833  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5834  running.oldestRunningXid = oldestActiveXID;
5835  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5836  TransactionIdRetreat(latestCompletedXid);
5837  Assert(TransactionIdIsNormal(latestCompletedXid));
5838  running.latestCompletedXid = latestCompletedXid;
5839  running.xids = xids;
5840 
5841  ProcArrayApplyRecoveryInfo(&running);
5842  }
5843  }
5844 
5845  /*
5846  * We're all set for replaying the WAL now. Do it.
5847  */
5849  performedWalRecovery = true;
5850  }
5851  else
5852  performedWalRecovery = false;
5853 
5854  /*
5855  * Finish WAL recovery.
5856  */
5857  endOfRecoveryInfo = FinishWalRecovery();
5858  EndOfLog = endOfRecoveryInfo->endOfLog;
5859  EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5860  abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5861  missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5862 
5863  /*
5864  * Reset ps status display, so as no information related to recovery shows
5865  * up.
5866  */
5867  set_ps_display("");
5868 
5869  /*
5870  * When recovering from a backup (we are in recovery, and archive recovery
5871  * was requested), complain if we did not roll forward far enough to reach
5872  * the point where the database is consistent. For regular online
5873  * backup-from-primary, that means reaching the end-of-backup WAL record
5874  * (at which point we reset backupStartPoint to be Invalid), for
5875  * backup-from-replica (which can't inject records into the WAL stream),
5876  * that point is when we reach the minRecoveryPoint in pg_control (which
5877  * we purposefully copy last when backing up from a replica). For
5878  * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5879  * or snapshot-style backups (which don't), backupEndRequired will be set
5880  * to false.
5881  *
5882  * Note: it is indeed okay to look at the local variable
5883  * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5884  * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5885  * been advanced beyond the WAL we processed.
5886  */
5887  if (InRecovery &&
5888  (EndOfLog < LocalMinRecoveryPoint ||
5890  {
5891  /*
5892  * Ran off end of WAL before reaching end-of-backup WAL record, or
5893  * minRecoveryPoint. That's a bad sign, indicating that you tried to
5894  * recover from an online backup but never called pg_backup_stop(), or
5895  * you didn't archive all the WAL needed.
5896  */
5898  {
5900  ereport(FATAL,
5901  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5902  errmsg("WAL ends before end of online backup"),
5903  errhint("All WAL generated while online backup was taken must be available at recovery.")));
5904  else
5905  ereport(FATAL,
5906  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5907  errmsg("WAL ends before consistent recovery point")));
5908  }
5909  }
5910 
5911  /*
5912  * Reset unlogged relations to the contents of their INIT fork. This is
5913  * done AFTER recovery is complete so as to include any unlogged relations
5914  * created during recovery, but BEFORE recovery is marked as having
5915  * completed successfully. Otherwise we'd not retry if any of the post
5916  * end-of-recovery steps fail.
5917  */
5918  if (InRecovery)
5920 
5921  /*
5922  * Pre-scan prepared transactions to find out the range of XIDs present.
5923  * This information is not quite needed yet, but it is positioned here so
5924  * as potential problems are detected before any on-disk change is done.
5925  */
5926  oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5927 
5928  /*
5929  * Allow ordinary WAL segment creation before possibly switching to a new
5930  * timeline, which creates a new segment, and after the last ReadRecord().
5931  */
5933 
5934  /*
5935  * Consider whether we need to assign a new timeline ID.
5936  *
5937  * If we did archive recovery, we always assign a new ID. This handles a
5938  * couple of issues. If we stopped short of the end of WAL during
5939  * recovery, then we are clearly generating a new timeline and must assign
5940  * it a unique new ID. Even if we ran to the end, modifying the current
5941  * last segment is problematic because it may result in trying to
5942  * overwrite an already-archived copy of that segment, and we encourage
5943  * DBAs to make their archive_commands reject that. We can dodge the
5944  * problem by making the new active segment have a new timeline ID.
5945  *
5946  * In a normal crash recovery, we can just extend the timeline we were in.
5947  */
5948  newTLI = endOfRecoveryInfo->lastRecTLI;
5950  {
5951  newTLI = findNewestTimeLine(recoveryTargetTLI) + 1;
5952  ereport(LOG,
5953  (errmsg("selected new timeline ID: %u", newTLI)));
5954 
5955  /*
5956  * Make a writable copy of the last WAL segment. (Note that we also
5957  * have a copy of the last block of the old WAL in
5958  * endOfRecovery->lastPage; we will use that below.)
5959  */
5960  XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
5961 
5962  /*
5963  * Remove the signal files out of the way, so that we don't
5964  * accidentally re-enter archive recovery mode in a subsequent crash.
5965  */
5966  if (endOfRecoveryInfo->standby_signal_file_found)
5968 
5969  if (endOfRecoveryInfo->recovery_signal_file_found)
5971 
5972  /*
5973  * Write the timeline history file, and have it archived. After this
5974  * point (or rather, as soon as the file is archived), the timeline
5975  * will appear as "taken" in the WAL archive and to any standby
5976  * servers. If we crash before actually switching to the new
5977  * timeline, standby servers will nevertheless think that we switched
5978  * to the new timeline, and will try to connect to the new timeline.
5979  * To minimize the window for that, try to do as little as possible
5980  * between here and writing the end-of-recovery record.
5981  */
5983  EndOfLog, endOfRecoveryInfo->recoveryStopReason);
5984 
5985  ereport(LOG,
5986  (errmsg("archive recovery complete")));
5987  }
5988 
5989  /* Save the selected TimeLineID in shared memory, too */
5991  XLogCtl->InsertTimeLineID = newTLI;
5992  XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
5994 
5995  /*
5996  * Actually, if WAL ended in an incomplete record, skip the parts that
5997  * made it through and start writing after the portion that persisted.
5998  * (It's critical to first write an OVERWRITE_CONTRECORD message, which
5999  * we'll do as soon as we're open for writing new WAL.)
6000  */
6002  {
6003  /*
6004  * We should only have a missingContrecPtr if we're not switching to a
6005  * new timeline. When a timeline switch occurs, WAL is copied from the
6006  * old timeline to the new only up to the end of the last complete
6007  * record, so there can't be an incomplete WAL record that we need to
6008  * disregard.
6009  */
6010  Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6012  EndOfLog = missingContrecPtr;
6013  }
6014 
6015  /*
6016  * Prepare to write WAL starting at EndOfLog location, and init xlog
6017  * buffer cache using the block containing the last record from the
6018  * previous incarnation.
6019  */
6020  Insert = &XLogCtl->Insert;
6021  Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
6022  Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6023 
6024  /*
6025  * Tricky point here: lastPage contains the *last* block that the LastRec
6026  * record spans, not the one it starts in. The last block is indeed the
6027  * one we want to use.
6028  */
6029  if (EndOfLog % XLOG_BLCKSZ != 0)
6030  {
6031  char *page;
6032  int len;
6033  int firstIdx;
6034 
6035  firstIdx = XLogRecPtrToBufIdx(EndOfLog);
6036  len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6037  Assert(len < XLOG_BLCKSZ);
6038 
6039  /* Copy the valid part of the last block, and zero the rest */
6040  page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6041  memcpy(page, endOfRecoveryInfo->lastPage, len);
6042  memset(page + len, 0, XLOG_BLCKSZ - len);
6043 
6044  pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6045  XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
6046  }
6047  else
6048  {
6049  /*
6050  * There is no partial block to copy. Just set InitializedUpTo, and
6051  * let the first attempt to insert a log record to initialize the next
6052  * buffer.
6053  */
6054  XLogCtl->InitializedUpTo = EndOfLog;
6055  }
6056 
6057  /*
6058  * Update local and shared status. This is OK to do without any locks
6059  * because no other process can be reading or writing WAL yet.
6060  */
6061  LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6065  XLogCtl->LogwrtRqst.Write = EndOfLog;
6066  XLogCtl->LogwrtRqst.Flush = EndOfLog;
6067 
6068  /*
6069  * Preallocate additional log files, if wanted.
6070  */
6071  PreallocXlogFiles(EndOfLog, newTLI);
6072 
6073  /*
6074  * Okay, we're officially UP.
6075  */
6076  InRecovery = false;
6077 
6078  /* start the archive_timeout timer and LSN running */
6079  XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6080  XLogCtl->lastSegSwitchLSN = EndOfLog;
6081 
6082  /* also initialize latestCompletedXid, to nextXid - 1 */
6083  LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6086  LWLockRelease(ProcArrayLock);
6087 
6088  /*
6089  * Start up subtrans, if not already done for hot standby. (commit
6090  * timestamps are started below, if necessary.)
6091  */
6093  StartupSUBTRANS(oldestActiveXID);
6094 
6095  /*
6096  * Perform end of recovery actions for any SLRUs that need it.
6097  */
6098  TrimCLOG();
6099  TrimMultiXact();
6100 
6101  /*
6102  * Reload shared-memory state for prepared transactions. This needs to
6103  * happen before renaming the last partial segment of the old timeline as
6104  * it may be possible that we have to recovery some transactions from it.
6105  */
6107 
6108  /* Shut down xlogreader */
6110 
6111  /* Enable WAL writes for this backend only. */
6113 
6114  /* If necessary, write overwrite-contrecord before doing anything else */
6116  {
6119  }
6120 
6121  /*
6122  * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6123  * record before resource manager writes cleanup WAL records or checkpoint
6124  * record is written.
6125  */
6126  Insert->fullPageWrites = lastFullPageWrites;
6128 
6129  /*
6130  * Emit checkpoint or end-of-recovery record in XLOG, if required.
6131  */
6132  if (performedWalRecovery)
6133  promoted = PerformRecoveryXLogAction();
6134 
6135  /*
6136  * If any of the critical GUCs have changed, log them before we allow
6137  * backends to write WAL.
6138  */
6140 
6141  /* If this is archive recovery, perform post-recovery cleanup actions. */
6143  CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6144 
6145  /*
6146  * Local WAL inserts enabled, so it's time to finish initialization of
6147  * commit timestamp.
6148  */
6150 
6151  /*
6152  * All done with end-of-recovery actions.
6153  *
6154  * Now allow backends to write WAL and update the control file status in
6155  * consequence. SharedRecoveryState, that controls if backends can write
6156  * WAL, is updated while holding ControlFileLock to prevent other backends
6157  * to look at an inconsistent state of the control file in shared memory.
6158  * There is still a small window during which backends can write WAL and
6159  * the control file is still referring to a system not in DB_IN_PRODUCTION
6160  * state while looking at the on-disk control file.
6161  *
6162  * Also, we use info_lck to update SharedRecoveryState to ensure that
6163  * there are no race conditions concerning visibility of other recent
6164  * updates to shared memory.
6165  */
6166  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6168 
6172 
6174  LWLockRelease(ControlFileLock);
6175 
6176  /*
6177  * Shutdown the recovery environment. This must occur after
6178  * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6179  * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6180  * any session building a snapshot will not rely on KnownAssignedXids as
6181  * RecoveryInProgress() would return false at this stage. This is
6182  * particularly critical for prepared 2PC transactions, that would still
6183  * need to be included in snapshots once recovery has ended.
6184  */
6187 
6188  /*
6189  * If there were cascading standby servers connected to us, nudge any wal
6190  * sender processes to notice that we've been promoted.
6191  */
6192  WalSndWakeup(true, true);
6193 
6194  /*
6195  * If this was a promotion, request an (online) checkpoint now. This isn't
6196  * required for consistency, but the last restartpoint might be far back,
6197  * and in case of a crash, recovering from it might take a longer than is
6198  * appropriate now that we're not in standby mode anymore.
6199  */
6200  if (promoted)
6202 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:494
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:657
void StartupCLOG(void)
Definition: clog.c:877
void TrimCLOG(void)
Definition: clog.c:892
void StartupCommitTs(void)
Definition: commit_ts.c:632
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:642
#define FATAL
Definition: elog.h:41
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:781
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:871
void SyncDataDirectory(void)
Definition: fd.c:3567
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
void TrimMultiXact(void)
Definition: multixact.c:2178
void StartupMultiXact(void)
Definition: multixact.c:2153
void StartupReplicationOrigin(void)
Definition: origin.c:703
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:95
const void size_t len
void pgstat_restore_stats(XLogRecPtr redo)
Definition: pgstat.c:488
void pgstat_discard_stats(void)
Definition: pgstat.c:500
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6797
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:1924
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1567
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:94
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:160
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
bool track_commit_timestamp
Definition: pg_control.h:185
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:111
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:120
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:121
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:109
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
FullTransactionId latestCompletedXid
Definition: transam.h:238
XLogRecPtr InitializedUpTo
Definition: xlog.c:485
char * pages
Definition: xlog.c:492
pg_time_t lastSegSwitchTime
Definition: xlog.c:467
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:468
pg_atomic_uint64 * xlblocks
Definition: xlog.c:493
pg_atomic_uint64 logWriteResult
Definition: xlog.c:472
pg_atomic_uint64 logFlushResult
Definition: xlog.c:473
pg_atomic_uint64 logInsertResult
Definition: xlog.c:471
XLogRecPtr Flush
Definition: xlog.c:322
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:309
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2090
void restoreTwoPhaseData(void)
Definition: twophase.c:1905
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1969
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2049
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3638
void UpdateFullPageWrites(void)
Definition: xlog.c:8166
static char * str_time(pg_time_t tnow)
Definition: xlog.c:5199
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4099
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7440
static void XLogReportParameters(void)
Definition: xlog.c:8103
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6284
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5289
static bool lastFullPageWrites
Definition: xlog.c:217
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1941
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5214
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5385
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:591
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3832
#define TABLESPACE_MAP_OLD
Definition: xlog.h:306
#define TABLESPACE_MAP
Definition: xlog.h:305
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:301
#define BACKUP_LABEL_OLD
Definition: xlog.h:303
#define BACKUP_LABEL_FILE
Definition: xlog.h:302
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:300
#define XRecOffIsValid(xlrp)
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36
void ShutdownWalRecovery(void)
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:137
bool InArchiveRecovery
Definition: xlogrecovery.c:138
void PerformWalRecovery(void)
EndOfWalRecoveryInfo * FinishWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:373
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:372
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:512
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:122
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert, AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6209 of file xlog.c.

6210 {
6211  /* initialize minRecoveryPoint to this record */
6212  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6214  if (ControlFile->minRecoveryPoint < EndRecPtr)
6215  {
6216  ControlFile->minRecoveryPoint = EndRecPtr;
6217  ControlFile->minRecoveryPointTLI = replayTLI;
6218  }
6219  /* update local copy */
6222 
6223  /*
6224  * The startup process can update its local copy of minRecoveryPoint from
6225  * this point.
6226  */
6227  updateMinRecoveryPoint = true;
6228 
6230 
6231  /*
6232  * We update SharedRecoveryState while holding the lock on ControlFileLock
6233  * so both states are consistent in shared memory.
6234  */
6238 
6239  LWLockRelease(ControlFileLock);
6240 }
static bool updateMinRecoveryPoint
Definition: xlog.c:648

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8166 of file xlog.c.

8167 {
8169  bool recoveryInProgress;
8170 
8171  /*
8172  * Do nothing if full_page_writes has not been changed.
8173  *
8174  * It's safe to check the shared full_page_writes without the lock,
8175  * because we assume that there is no concurrently running process which
8176  * can update it.
8177  */
8178  if (fullPageWrites == Insert->fullPageWrites)
8179  return;
8180 
8181  /*
8182  * Perform this outside critical section so that the WAL insert
8183  * initialization done by RecoveryInProgress() doesn't trigger an
8184  * assertion failure.
8185  */
8186  recoveryInProgress = RecoveryInProgress();
8187 
8189 
8190  /*
8191  * It's always safe to take full page images, even when not strictly
8192  * required, but not the other round. So if we're setting full_page_writes
8193  * to true, first set it true and then write the WAL record. If we're
8194  * setting it to false, first write the WAL record and then set the global
8195  * flag.
8196  */
8197  if (fullPageWrites)
8198  {
8200  Insert->fullPageWrites = true;
8202  }
8203 
8204  /*
8205  * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8206  * full_page_writes during archive recovery, if required.
8207  */
8208  if (XLogStandbyInfoActive() && !recoveryInProgress)
8209  {
8210  XLogBeginInsert();
8211  XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
8212 
8213  XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8214  }
8215 
8216  if (!fullPageWrites)
8217  {
8219  Insert->fullPageWrites = false;
8221  }
8222  END_CRIT_SECTION();
8223 }
#define XLOG_FPW_CHANGE
Definition: pg_control.h:76

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1748 of file xlog.c.

1750 {
1751  char *pdst = dstbuf;
1752  XLogRecPtr recptr = startptr;
1753  XLogRecPtr inserted;
1754  Size nbytes = count;
1755 
1756  if (RecoveryInProgress() || tli != GetWALInsertionTimeLine())
1757  return 0;
1758 
1759  Assert(!XLogRecPtrIsInvalid(startptr));
1760 
1761  /*
1762  * Caller should ensure that the requested data has been inserted into WAL
1763  * buffers before we try to read it.
1764  */
1766  if (startptr + count > inserted)
1767  ereport(ERROR,
1768  errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
1769  LSN_FORMAT_ARGS(startptr + count),
1770  LSN_FORMAT_ARGS(inserted)));
1771 
1772  /*
1773  * Loop through the buffers without a lock. For each buffer, atomically
1774  * read and verify the end pointer, then copy the data out, and finally
1775  * re-read and re-verify the end pointer.
1776  *
1777  * Once a page is evicted, it never returns to the WAL buffers, so if the
1778  * end pointer matches the expected end pointer before and after we copy
1779  * the data, then the right page must have been present during the data
1780  * copy. Read barriers are necessary to ensure that the data copy actually
1781  * happens between the two verification steps.
1782  *
1783  * If either verification fails, we simply terminate the loop and return
1784  * with the data that had been already copied out successfully.
1785  */
1786  while (nbytes > 0)
1787  {
1788  uint32 offset = recptr % XLOG_BLCKSZ;
1789  int idx = XLogRecPtrToBufIdx(recptr);
1790  XLogRecPtr expectedEndPtr;
1791  XLogRecPtr endptr;
1792  const char *page;
1793  const char *psrc;
1794  Size npagebytes;
1795 
1796  /*
1797  * Calculate the end pointer we expect in the xlblocks array if the
1798  * correct page is present.
1799  */
1800  expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1801 
1802  /*
1803  * First verification step: check that the correct page is present in
1804  * the WAL buffers.
1805  */
1806  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1807  if (expectedEndPtr != endptr)
1808  break;
1809 
1810  /*
1811  * The correct page is present (or was at the time the endptr was
1812  * read; must re-verify later). Calculate pointer to source data and
1813  * determine how much data to read from this page.
1814  */
1815  page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1816  psrc = page + offset;
1817  npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1818 
1819  /*
1820  * Ensure that the data copy and the first verification step are not
1821  * reordered.
1822  */
1823  pg_read_barrier();
1824 
1825  /* data copy */
1826  memcpy(pdst, psrc, npagebytes);
1827 
1828  /*
1829  * Ensure that the data copy and the second verification step are not
1830  * reordered.
1831  */
1832  pg_read_barrier();
1833 
1834  /*
1835  * Second verification step: check that the page we read from wasn't
1836  * evicted while we were copying the data.
1837  */
1838  endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
1839  if (expectedEndPtr != endptr)
1840  break;
1841 
1842  pdst += npagebytes;
1843  recptr += npagebytes;
1844  nbytes -= npagebytes;
1845  }
1846 
1847  Assert(pdst - dstbuf <= count);
1848 
1849  return pdst - dstbuf;
1850 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define pg_read_barrier()
Definition: atomics.h:156
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
#define Min(x, y)
Definition: c.h:1009
size_t Size
Definition: c.h:610
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6520

References Assert, ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsInvalid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ xlog_desc()

void xlog_desc ( StringInfo  buf,
struct XLogReaderState record 
)

Definition at line 58 of file xlogdesc.c.

59 {
60  char *rec = XLogRecGetData(record);
61  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
62 
63  if (info == XLOG_CHECKPOINT_SHUTDOWN ||
64  info == XLOG_CHECKPOINT_ONLINE)
65  {
66  CheckPoint *checkpoint = (CheckPoint *) rec;
67 
68  appendStringInfo(buf, "redo %X/%X; "
69  "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
70  "oldest xid %u in DB %u; oldest multi %u in DB %u; "
71  "oldest/newest commit timestamp xid: %u/%u; "
72  "oldest running xid %u; %s",
73  LSN_FORMAT_ARGS(checkpoint->redo),
74  checkpoint->ThisTimeLineID,
75  checkpoint->PrevTimeLineID,
76  checkpoint->fullPageWrites ? "true" : "false",
77  get_wal_level_string(checkpoint->wal_level),
79  XidFromFullTransactionId(checkpoint->nextXid),
80  checkpoint->nextOid,
81  checkpoint->nextMulti,
82  checkpoint->nextMultiOffset,
83  checkpoint->oldestXid,
84  checkpoint->oldestXidDB,
85  checkpoint->oldestMulti,
86  checkpoint->oldestMultiDB,
87  checkpoint->oldestCommitTsXid,
88  checkpoint->newestCommitTsXid,
89  checkpoint->oldestActiveXid,
90  (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
91  }
92  else if (info == XLOG_NEXTOID)
93  {
94  Oid nextOid;
95 
96  memcpy(&nextOid, rec, sizeof(Oid));
97  appendStringInfo(buf, "%u", nextOid);
98  }
99  else if (info == XLOG_RESTORE_POINT)
100  {
101  xl_restore_point *xlrec = (xl_restore_point *) rec;
102 
104  }
105  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
106  {
107  /* no further information to print */
108  }
109  else if (info == XLOG_BACKUP_END)
110  {
111  XLogRecPtr startpoint;
112 
113  memcpy(&startpoint, rec, sizeof(XLogRecPtr));
114  appendStringInfo(buf, "%X/%X", LSN_FORMAT_ARGS(startpoint));
115  }
116  else if (info == XLOG_PARAMETER_CHANGE)
117  {
118  xl_parameter_change xlrec;
119  const char *wal_level_str;
120 
121  memcpy(&xlrec, rec, sizeof(xl_parameter_change));
123 
124  appendStringInfo(buf, "max_connections=%d max_worker_processes=%d "
125  "max_wal_senders=%d max_prepared_xacts=%d "
126  "max_locks_per_xact=%d wal_level=%s "
127  "wal_log_hints=%s track_commit_timestamp=%s",
128  xlrec.MaxConnections,
129  xlrec.max_worker_processes,
130  xlrec.max_wal_senders,
131  xlrec.max_prepared_xacts,
132  xlrec.max_locks_per_xact,
134  xlrec.wal_log_hints ? "on" : "off",
135  xlrec.track_commit_timestamp ? "on" : "off");
136  }
137  else if (info == XLOG_FPW_CHANGE)
138  {
139  bool fpw;
140 
141  memcpy(&fpw, rec, sizeof(bool));
142  appendStringInfoString(buf, fpw ? "true" : "false");
143  }
144  else if (info == XLOG_END_OF_RECOVERY)
145  {
146  xl_end_of_recovery xlrec;
147 
148  memcpy(&xlrec, rec, sizeof(xl_end_of_recovery));
149  appendStringInfo(buf, "tli %u; prev tli %u; time %s; wal_level %s",
150  xlrec.ThisTimeLineID, xlrec.PrevTimeLineID,
153  }
154  else if (info == XLOG_OVERWRITE_CONTRECORD)
155  {
157 
158  memcpy(&xlrec, rec, sizeof(xl_overwrite_contrecord));
159  appendStringInfo(buf, "lsn %X/%X; time %s",
162  }
163  else if (info == XLOG_CHECKPOINT_REDO)
164  {
165  int wal_level;
166 
167  memcpy(&wal_level, rec, sizeof(int));
169  }
170 }
static const char * wal_level_str(WalLevel wal_level)
unsigned char uint8
Definition: c.h:516
#define XLOG_RESTORE_POINT
Definition: pg_control.h:75
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:81
#define XLOG_FPI
Definition: pg_control.h:79
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:78
#define XLOG_NEXTOID
Definition: pg_control.h:71
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:74
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:77
static char * buf
Definition: pg_test_fsync.c:72
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:179
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID
char rp_name[MAXFNAMELEN]
#define EpochFromFullTransactionId(x)
Definition: transam.h:47
static const char * get_wal_level_string(int wal_level)
Definition: xlogdesc.c:40
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLR_INFO_MASK
Definition: xlogrecord.h:62

References appendStringInfo(), appendStringInfoString(), buf, xl_end_of_recovery::end_time, EpochFromFullTransactionId, CheckPoint::fullPageWrites, get_wal_level_string(), LSN_FORMAT_ARGS, xl_parameter_change::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, xl_parameter_change::max_wal_senders, xl_parameter_change::max_worker_processes, xl_parameter_change::MaxConnections, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, CheckPoint::nextXid, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, xl_end_of_recovery::PrevTimeLineID, CheckPoint::PrevTimeLineID, CheckPoint::redo, xl_restore_point::rp_name, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, timestamptz_to_str(), xl_parameter_change::track_commit_timestamp, wal_level, xl_parameter_change::wal_level, xl_end_of_recovery::wal_level, CheckPoint::wal_level, wal_level_str(), xl_parameter_change::wal_log_hints, XidFromFullTransactionId, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLogRecGetData, XLogRecGetInfo, and XLR_INFO_MASK.

◆ xlog_identify()

const char* xlog_identify ( uint8  info)

Definition at line 173 of file xlogdesc.c.

174 {
175  const char *id = NULL;
176 
177  switch (info & ~XLR_INFO_MASK)
178  {
180  id = "CHECKPOINT_SHUTDOWN";
181  break;
183  id = "CHECKPOINT_ONLINE";
184  break;
185  case XLOG_NOOP:
186  id = "NOOP";
187  break;
188  case XLOG_NEXTOID:
189  id = "NEXTOID";
190  break;
191  case XLOG_SWITCH:
192  id = "SWITCH";
193  break;
194  case XLOG_BACKUP_END:
195  id = "BACKUP_END";
196  break;
198  id = "PARAMETER_CHANGE";
199  break;
200  case XLOG_RESTORE_POINT:
201  id = "RESTORE_POINT";
202  break;
203  case XLOG_FPW_CHANGE:
204  id = "FPW_CHANGE";
205  break;
207  id = "END_OF_RECOVERY";
208  break;
210  id = "OVERWRITE_CONTRECORD";
211  break;
212  case XLOG_FPI:
213  id = "FPI";
214  break;
215  case XLOG_FPI_FOR_HINT:
216  id = "FPI_FOR_HINT";
217  break;
219  id = "CHECKPOINT_REDO";
220  break;
221  }
222 
223  return id;
224 }
#define XLOG_NOOP
Definition: pg_control.h:70
#define XLOG_SWITCH
Definition: pg_control.h:72

References XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, and XLR_INFO_MASK.

◆ xlog_redo()

void xlog_redo ( struct XLogReaderState record)

Definition at line 8235 of file xlog.c.

8236 {
8237  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8238  XLogRecPtr lsn = record->EndRecPtr;
8239 
8240  /*
8241  * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8242  * XLOG_FPI_FOR_HINT records.
8243  */
8244  Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8245  !XLogRecHasAnyBlockRefs(record));
8246 
8247  if (info == XLOG_NEXTOID)
8248  {
8249  Oid nextOid;
8250 
8251  /*
8252  * We used to try to take the maximum of TransamVariables->nextOid and
8253  * the recorded nextOid, but that fails if the OID counter wraps
8254  * around. Since no OID allocation should be happening during replay
8255  * anyway, better to just believe the record exactly. We still take
8256  * OidGenLock while setting the variable, just in case.
8257  */
8258  memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8259  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8260  TransamVariables->nextOid = nextOid;
8262  LWLockRelease(OidGenLock);
8263  }
8264  else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8265  {
8266  CheckPoint checkPoint;
8267  TimeLineID replayTLI;
8268 
8269  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8270  /* In a SHUTDOWN checkpoint, believe the counters exactly */
8271  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8272  TransamVariables->nextXid = checkPoint.nextXid;
8273  LWLockRelease(XidGenLock);
8274  LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8275  TransamVariables->nextOid = checkPoint.nextOid;
8277  LWLockRelease(OidGenLock);
8278  MultiXactSetNextMXact(checkPoint.nextMulti,
8279  checkPoint.nextMultiOffset);
8280 
8282  checkPoint.oldestMultiDB);
8283 
8284  /*
8285  * No need to set oldestClogXid here as well; it'll be set when we
8286  * redo an xl_clog_truncate if it changed since initialization.
8287  */
8288  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8289 
8290  /*
8291  * If we see a shutdown checkpoint while waiting for an end-of-backup
8292  * record, the backup was canceled and the end-of-backup record will
8293  * never arrive.
8294  */
8298  ereport(PANIC,
8299  (errmsg("online backup was canceled, recovery cannot continue")));
8300 
8301  /*
8302  * If we see a shutdown checkpoint, we know that nothing was running
8303  * on the primary at this point. So fake-up an empty running-xacts
8304  * record and use that here and now. Recover additional standby state
8305  * for prepared transactions.
8306  */
8308  {
8309  TransactionId *xids;
8310  int nxids;
8311  TransactionId oldestActiveXID;
8312  TransactionId latestCompletedXid;
8313  RunningTransactionsData running;
8314 
8315  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8316 
8317  /* Update pg_subtrans entries for any prepared transactions */
8319 
8320  /*
8321  * Construct a RunningTransactions snapshot representing a shut
8322  * down server, with only prepared transactions still alive. We're
8323  * never overflowed at this point because all subxids are listed
8324  * with their parent prepared transactions.
8325  */
8326  running.xcnt = nxids;
8327  running.subxcnt = 0;
8329  running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8330  running.oldestRunningXid = oldestActiveXID;
8331  latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8332  TransactionIdRetreat(latestCompletedXid);
8333  Assert(TransactionIdIsNormal(latestCompletedXid));
8334  running.latestCompletedXid = latestCompletedXid;
8335  running.xids = xids;
8336 
8337  ProcArrayApplyRecoveryInfo(&running);
8338  }
8339 
8340  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8341  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8342  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8343  LWLockRelease(ControlFileLock);
8344 
8345  /* Update shared-memory copy of checkpoint XID/epoch */
8347  XLogCtl->ckptFullXid = checkPoint.nextXid;
8349 
8350  /*
8351  * We should've already switched to the new TLI before replaying this
8352  * record.
8353  */
8354  (void) GetCurrentReplayRecPtr(&replayTLI);
8355  if (checkPoint.ThisTimeLineID != replayTLI)
8356  ereport(PANIC,
8357  (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8358  checkPoint.ThisTimeLineID, replayTLI)));
8359 
8360  RecoveryRestartPoint(&checkPoint, record);
8361  }
8362  else if (info == XLOG_CHECKPOINT_ONLINE)
8363  {
8364  CheckPoint checkPoint;
8365  TimeLineID replayTLI;
8366 
8367  memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8368  /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8369  LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8371  checkPoint.nextXid))
8372  TransamVariables->nextXid = checkPoint.nextXid;
8373  LWLockRelease(XidGenLock);
8374 
8375  /*
8376  * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8377  * to track OID assignment through XLOG_NEXTOID records. The nextOid
8378  * counter is from the start of the checkpoint and might well be stale
8379  * compared to later XLOG_NEXTOID records. We could try to take the
8380  * maximum of the nextOid counter and our latest value, but since
8381  * there's no particular guarantee about the speed with which the OID
8382  * counter wraps around, that's a risky thing to do. In any case,
8383  * users of the nextOid counter are required to avoid assignment of
8384  * duplicates, so that a somewhat out-of-date value should be safe.
8385  */
8386 
8387  /* Handle multixact */
8389  checkPoint.nextMultiOffset);
8390 
8391  /*
8392  * NB: This may perform multixact truncation when replaying WAL
8393  * generated by an older primary.
8394  */
8396  checkPoint.oldestMultiDB);
8398  checkPoint.oldestXid))
8399  SetTransactionIdLimit(checkPoint.oldestXid,
8400  checkPoint.oldestXidDB);
8401  /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8402  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8403  ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
8404  LWLockRelease(ControlFileLock);
8405 
8406  /* Update shared-memory copy of checkpoint XID/epoch */
8408  XLogCtl->ckptFullXid = checkPoint.nextXid;
8410 
8411  /* TLI should not change in an on-line checkpoint */
8412  (void) GetCurrentReplayRecPtr(&replayTLI);
8413  if (checkPoint.ThisTimeLineID != replayTLI)
8414  ereport(PANIC,
8415  (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8416  checkPoint.ThisTimeLineID, replayTLI)));
8417 
8418  RecoveryRestartPoint(&checkPoint, record);
8419  }
8420  else if (info == XLOG_OVERWRITE_CONTRECORD)
8421  {
8422  /* nothing to do here, handled in xlogrecovery_redo() */
8423  }
8424  else if (info == XLOG_END_OF_RECOVERY)
8425  {
8426  xl_end_of_recovery xlrec;
8427  TimeLineID replayTLI;
8428 
8429  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8430 
8431  /*
8432  * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8433  * but this case is rarer and harder to test, so the benefit doesn't
8434  * outweigh the potential extra cost of maintenance.
8435  */
8436 
8437  /*
8438  * We should've already switched to the new TLI before replaying this
8439  * record.
8440  */
8441  (void) GetCurrentReplayRecPtr(&replayTLI);
8442  if (xlrec.ThisTimeLineID != replayTLI)
8443  ereport(PANIC,
8444  (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8445  xlrec.ThisTimeLineID, replayTLI)));
8446  }
8447  else if (info == XLOG_NOOP)
8448  {
8449  /* nothing to do here */
8450  }
8451  else if (info == XLOG_SWITCH)
8452  {
8453  /* nothing to do here */
8454  }
8455  else if (info == XLOG_RESTORE_POINT)
8456  {
8457  /* nothing to do here, handled in xlogrecovery.c */
8458  }
8459  else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8460  {
8461  /*
8462  * XLOG_FPI records contain nothing else but one or more block
8463  * references. Every block reference must include a full-page image
8464  * even if full_page_writes was disabled when the record was generated
8465  * - otherwise there would be no point in this record.
8466  *
8467  * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8468  * WAL-logged because of a hint bit update. They are only generated
8469  * when checksums and/or wal_log_hints are enabled. They may include
8470  * no full-page images if full_page_writes was disabled when they were
8471  * generated. In this case there is nothing to do here.
8472  *
8473  * No recovery conflicts are generated by these generic records - if a
8474  * resource manager needs to generate conflicts, it has to define a
8475  * separate WAL record type and redo routine.
8476  */
8477  for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8478  {
8479  Buffer buffer;
8480 
8481  if (!XLogRecHasBlockImage(record, block_id))
8482  {
8483  if (info == XLOG_FPI)
8484  elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8485  continue;
8486  }
8487 
8488  if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8489  elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8490  UnlockReleaseBuffer(buffer);
8491  }
8492  }
8493  else if (info == XLOG_BACKUP_END)
8494  {
8495  /* nothing to do here, handled in xlogrecovery_redo() */
8496  }
8497  else if (info == XLOG_PARAMETER_CHANGE)
8498  {
8499  xl_parameter_change xlrec;
8500 
8501  /* Update our copy of the parameters in pg_control */
8502  memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8503 
8504  /*
8505  * Invalidate logical slots if we are in hot standby and the primary
8506  * does not have a WAL level sufficient for logical decoding. No need
8507  * to search for potentially conflicting logically slots if standby is
8508  * running with wal_level lower than logical, because in that case, we
8509  * would have either disallowed creation of logical slots or
8510  * invalidated existing ones.
8511  */
8512  if (InRecovery && InHotStandby &&
8513  xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8516  0, InvalidOid,
8518 
8519  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8525  ControlFile->wal_level = xlrec.wal_level;
8527 
8528  /*
8529  * Update minRecoveryPoint to ensure that if recovery is aborted, we
8530  * recover back up to this point before allowing hot standby again.
8531  * This is important if the max_* settings are decreased, to ensure
8532  * you don't run queries against the WAL preceding the change. The
8533  * local copies cannot be updated as long as crash recovery is
8534  * happening and we expect all the WAL to be replayed.
8535  */
8536  if (InArchiveRecovery)
8537  {
8540  }
8542  {
8543  TimeLineID replayTLI;
8544 
8545  (void) GetCurrentReplayRecPtr(&replayTLI);
8547  ControlFile->minRecoveryPointTLI = replayTLI;
8548  }
8549 
8553 
8555  LWLockRelease(ControlFileLock);
8556 
8557  /* Check to see if any parameter change gives a problem on recovery */
8559  }
8560  else if (info == XLOG_FPW_CHANGE)
8561  {
8562  bool fpw;
8563 
8564  memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8565 
8566  /*
8567  * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8568  * do_pg_backup_start() and do_pg_backup_stop() can check whether
8569  * full_page_writes has been disabled during online backup.
8570  */
8571  if (!fpw)
8572  {
8574  if (XLogCtl->lastFpwDisableRecPtr < record->ReadRecPtr)
8577  }
8578 
8579  /* Keep track of full_page_writes */
8580  lastFullPageWrites = fpw;
8581  }
8582  else if (info == XLOG_CHECKPOINT_REDO)
8583  {
8584  /* nothing to do here, just for informational purposes */
8585  }
8586 }
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:664
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2536
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2511
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:58
int max_worker_processes
Definition: pg_control.h:181
int max_locks_per_xact
Definition: pg_control.h:184
int max_prepared_xacts
Definition: pg_control.h:183
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7550
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:418
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:423
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:314
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60
@ BLK_RESTORED
Definition: xlogutils.h:76

References ArchiveRecoveryRequested, Assert, ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), XLogCtlData::ckptFullXid, CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, XLogRecPtrIsInvalid, and XLR_INFO_MASK.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2990 of file xlog.c.

2991 {
2992  XLogwrtRqst WriteRqst;
2993  bool flexible = true;
2994  static TimestampTz lastflush;
2995  TimestampTz now;
2996  int flushblocks;
2997  TimeLineID insertTLI;
2998 
2999  /* XLOG doesn't need flushing during recovery */
3000  if (RecoveryInProgress())
3001  return false;
3002 
3003  /*
3004  * Since we're not in recovery, InsertTimeLineID is set and can't change,
3005  * so we can read it without a lock.
3006  */
3007  insertTLI = XLogCtl->InsertTimeLineID;
3008 
3009  /* read updated LogwrtRqst */
3011  WriteRqst = XLogCtl->LogwrtRqst;
3013 
3014  /* back off to last completed page boundary */
3015  WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3016 
3017  /* if we have already flushed that far, consider async commit records */
3019  if (WriteRqst.Write <= LogwrtResult.Flush)
3020  {
3022  WriteRqst.Write = XLogCtl->asyncXactLSN;
3024  flexible = false; /* ensure it all gets written */
3025  }
3026 
3027  /*
3028  * If already known flushed, we're done. Just need to check if we are
3029  * holding an open file handle to a logfile that's no longer in use,
3030  * preventing the file from being deleted.
3031  */
3032  if (WriteRqst.Write <= LogwrtResult.Flush)
3033  {
3034  if (openLogFile >= 0)
3035  {
3038  {
3039  XLogFileClose();
3040  }
3041  }
3042  return false;
3043  }
3044 
3045  /*
3046  * Determine how far to flush WAL, based on the wal_writer_delay and
3047  * wal_writer_flush_after GUCs.
3048  *
3049  * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3050  * wal_writer_flush_after, to decide when to wake us up. Make sure the
3051  * logic is the same in both places if you change this.
3052  */
3054  flushblocks =
3055  WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3056 
3057  if (WalWriterFlushAfter == 0 || lastflush == 0)
3058  {
3059  /* first call, or block based limits disabled */
3060  WriteRqst.Flush = WriteRqst.Write;
3061  lastflush = now;
3062  }
3063  else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3064  {
3065  /*
3066  * Flush the writes at least every WalWriterDelay ms. This is
3067  * important to bound the amount of time it takes for an asynchronous
3068  * commit to hit disk.
3069  */
3070  WriteRqst.Flush = WriteRqst.Write;
3071  lastflush = now;
3072  }
3073  else if (flushblocks >= WalWriterFlushAfter)
3074  {
3075  /* exceeded wal_writer_flush_after blocks, flush */
3076  WriteRqst.Flush = WriteRqst.Write;
3077  lastflush = now;
3078  }
3079  else
3080  {
3081  /* no flushing, this time round */
3082  WriteRqst.Flush = 0;
3083  }
3084 
3085 #ifdef WAL_DEBUG
3086  if (XLOG_DEBUG)
3087  elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3088  LSN_FORMAT_ARGS(WriteRqst.Write),
3089  LSN_FORMAT_ARGS(WriteRqst.Flush),
3092 #endif
3093 
3095 
3096  /* now wait for any in-progress insertions to finish and get write lock */
3097  WaitXLogInsertionsToFinish(WriteRqst.Write);
3098  LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3100  if (WriteRqst.Write > LogwrtResult.Write ||
3101  WriteRqst.Flush > LogwrtResult.Flush)
3102  {
3103  XLogWrite(WriteRqst, insertTLI, flexible);
3104  }
3105  LWLockRelease(WALWriteLock);
3106 
3107  END_CRIT_SECTION();
3108 
3109  /* wake up walsenders now that we've released heavily contended locks */
3111 
3112  /*
3113  * Great, done. To take some work off the critical path, try to initialize
3114  * as many of the no-longer-needed WAL buffers for future use as we can.
3115  */
3116  AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3117 
3118  /*
3119  * If we determined that we need to write data, but somebody else
3120  * wrote/flushed already, it should be considered as being active, to
3121  * avoid hibernating too early.
3122  */
3123  return true;
3124 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1780
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
XLogRecPtr asyncXactLSN
Definition: xlog.c:458
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:65
int WalWriterFlushAfter
Definition: walwriter.c:70
int WalWriterDelay
Definition: walwriter.c:69
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1504
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1985
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2314
static void XLogFileClose(void)
Definition: xlog.c:3639
static XLogSegNo openLogSegNo
Definition: xlog.c:636
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2290 of file xlog.c.

2291 {
2292  XLogSegNo old_segno;
2293 
2295 
2296  if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2297  return true;
2298  return false;
2299 }
int CheckPointSegments
Definition: xlog.c:156

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3380 of file xlog.c.

3381 {
3382  bool ignore_added;
3383  char path[MAXPGPATH];
3384  int fd;
3385 
3386  Assert(logtli != 0);
3387 
3388  fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3389  if (fd >= 0)
3390  return fd;
3391 
3392  /* Now open original target segment (might not be file I just made) */
3393  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3395  if (fd < 0)
3396  ereport(ERROR,
3398  errmsg("could not open file \"%s\": %m", path)));
3399  return fd;
3400 }
#define PG_BINARY
Definition: c.h:1278
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1086
#define O_CLOEXEC
Definition: win32_port.h:359
static int get_sync_bit(int method)
Definition: xlog.c:8593
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3210

References Assert, BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3618 of file xlog.c.

3619 {
3620  char path[MAXPGPATH];
3621  int fd;
3622 
3623  XLogFilePath(path, tli, segno, wal_segment_size);
3624 
3625  fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3627  if (fd < 0)
3628  ereport(PANIC,
3630  errmsg("could not open file \"%s\": %m", path)));
3631 
3632  return fd;
3633 }
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2802 of file xlog.c.

2803 {
2804  XLogRecPtr WriteRqstPtr;
2805  XLogwrtRqst WriteRqst;
2806  TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2807 
2808  /*
2809  * During REDO, we are reading not writing WAL. Therefore, instead of
2810  * trying to flush the WAL, we should update minRecoveryPoint instead. We
2811  * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2812  * to act this way too, and because when it tries to write the
2813  * end-of-recovery checkpoint, it should indeed flush.
2814  */
2815  if (!XLogInsertAllowed())
2816  {
2817  UpdateMinRecoveryPoint(record, false);
2818  return;
2819  }
2820 
2821  /* Quick exit if already known flushed */
2822  if (record <= LogwrtResult.Flush)
2823  return;
2824 
2825 #ifdef WAL_DEBUG
2826  if (XLOG_DEBUG)
2827  elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
2828  LSN_FORMAT_ARGS(record),
2831 #endif
2832 
2834 
2835  /*
2836  * Since fsync is usually a horribly expensive operation, we try to
2837  * piggyback as much data as we can on each fsync: if we see any more data
2838  * entered into the xlog buffer, we'll write and fsync that too, so that
2839  * the final value of LogwrtResult.Flush is as large as possible. This
2840  * gives us some chance of avoiding another fsync immediately after.
2841  */
2842 
2843  /* initialize to given target; may increase below */
2844  WriteRqstPtr = record;
2845 
2846  /*
2847  * Now wait until we get the write lock, or someone else does the flush
2848  * for us.
2849  */
2850  for (;;)
2851  {
2852  XLogRecPtr insertpos;
2853 
2854  /* done already? */
2856  if (record <= LogwrtResult.Flush)
2857  break;
2858 
2859  /*
2860  * Before actually performing the write, wait for all in-flight
2861  * insertions to the pages we're about to write to finish.
2862  */
2864  if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2865  WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2867  insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2868 
2869  /*
2870  * Try to get the write lock. If we can't get it immediately, wait
2871  * until it's released, and recheck if we still need to do the flush
2872  * or if the backend that held the lock did it for us already. This
2873  * helps to maintain a good rate of group committing when the system
2874  * is bottlenecked by the speed of fsyncing.
2875  */
2876  if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2877  {
2878  /*
2879  * The lock is now free, but we didn't acquire it yet. Before we
2880  * do, loop back to check if someone else flushed the record for
2881  * us already.
2882  */
2883  continue;
2884  }
2885 
2886  /* Got the lock; recheck whether request is satisfied */
2888  if (record <= LogwrtResult.Flush)
2889  {
2890  LWLockRelease(WALWriteLock);
2891  break;
2892  }
2893 
2894  /*
2895  * Sleep before flush! By adding a delay here, we may give further
2896  * backends the opportunity to join the backlog of group commit
2897  * followers; this can significantly improve transaction throughput,
2898  * at the risk of increasing transaction latency.
2899  *
2900  * We do not sleep if enableFsync is not turned on, nor if there are
2901  * fewer than CommitSiblings other backends with active transactions.
2902  */
2903  if (CommitDelay > 0 && enableFsync &&
2905  {
2907 
2908  /*
2909  * Re-check how far we can now flush the WAL. It's generally not
2910  * safe to call WaitXLogInsertionsToFinish while holding
2911  * WALWriteLock, because an in-progress insertion might need to
2912  * also grab WALWriteLock to make progress. But we know that all
2913  * the insertions up to insertpos have already finished, because
2914  * that's what the earlier WaitXLogInsertionsToFinish() returned.
2915  * We're only calling it again to allow insertpos to be moved
2916  * further forward, not to actually wait for anyone.
2917  */
2918  insertpos = WaitXLogInsertionsToFinish(insertpos);
2919  }
2920 
2921  /* try to write/flush later additions to XLOG as well */
2922  WriteRqst.Write = insertpos;
2923  WriteRqst.Flush = insertpos;
2924 
2925  XLogWrite(WriteRqst, insertTLI, false);
2926 
2927  LWLockRelease(WALWriteLock);
2928  /* done */
2929  break;
2930  }
2931 
2932  END_CRIT_SECTION();
2933 
2934  /* wake up walsenders now that we've released heavily contended locks */
2936 
2937  /*
2938  * If we still haven't flushed to the request point then we have a
2939  * problem; most likely, the requested flush point is past end of XLOG.
2940  * This has been seen to occur when a disk page has a corrupted LSN.
2941  *
2942  * Formerly we treated this as a PANIC condition, but that hurts the
2943  * system's robustness rather than helping it: we do not want to take down
2944  * the whole system due to corruption on one data page. In particular, if
2945  * the bad page is encountered again during recovery then we would be
2946  * unable to restart the database at all! (This scenario actually
2947  * happened in the field several times with 7.1 releases.) As of 8.4, bad
2948  * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2949  * the only time we can reach here during recovery is while flushing the
2950  * end-of-recovery checkpoint record, and we don't expect that to have a
2951  * bad LSN.
2952  *
2953  * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2954  * since xact.c calls this routine inside a critical section. However,
2955  * calls from bufmgr.c are not within critical sections and so we will not
2956  * force a restart for a bad LSN on a data page.
2957  */
2958  if (LogwrtResult.Flush < record)
2959  elog(ERROR,
2960  "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2961  LSN_FORMAT_ARGS(record),
2963 }
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1396
bool MinimumActiveBackends(int min)
Definition: procarray.c:3550
int CommitDelay
Definition: xlog.c:132
int CommitSiblings
Definition: xlog.c:133
bool XLogInsertAllowed(void)
Definition: xlog.c:6389

References CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FlushBuffer(), LogLogicalMessage(), pg_attribute_noreturn(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3758 of file xlog.c.

3759 {
3760  XLogSegNo lastRemovedSegNo;
3761 
3763  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3765 
3766  return lastRemovedSegNo;
3767 }

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3774 of file xlog.c.

3775 {
3776  DIR *xldir;
3777  struct dirent *xlde;
3778  XLogSegNo oldest_segno = 0;
3779 
3780  xldir = AllocateDir(XLOGDIR);
3781  while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3782  {
3783  TimeLineID file_tli;
3784  XLogSegNo file_segno;
3785 
3786  /* Ignore files that are not XLOG segments. */
3787  if (!IsXLogFileName(xlde->d_name))
3788  continue;
3789 
3790  /* Parse filename to get TLI and segno. */
3791  XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3793 
3794  /* Ignore anything that's not from the TLI of interest. */
3795  if (tli != file_tli)
3796  continue;
3797 
3798  /* If it's the oldest so far, update oldest_segno. */
3799  if (oldest_segno == 0 || file_segno < oldest_segno)
3800  oldest_segno = file_segno;
3801  }
3802 
3803  FreeDir(xldir);
3804  return oldest_segno;
3805 }
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6389 of file xlog.c.

6390 {
6391  /*
6392  * If value is "unconditionally true" or "unconditionally false", just
6393  * return it. This provides the normal fast path once recovery is known
6394  * done.
6395  */
6396  if (LocalXLogInsertAllowed >= 0)
6397  return (bool) LocalXLogInsertAllowed;
6398 
6399  /*
6400  * Else, must check to see if we're still in recovery.
6401  */
6402  if (RecoveryInProgress())
6403  return false;
6404 
6405  /*
6406  * On exit from recovery, reset to "unconditionally true", since there is
6407  * no need to keep checking.
6408  */
6410  return true;
6411 }

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), and XLogInsertRecord().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( struct XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
bool  topxid_included 
)

Definition at line 748 of file xlog.c.

753 {
755  pg_crc32c rdata_crc;
756  bool inserted;
757  XLogRecord *rechdr = (XLogRecord *) rdata->data;
758  uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
760  XLogRecPtr StartPos;
761  XLogRecPtr EndPos;
762  bool prevDoPageWrites = doPageWrites;
763  TimeLineID insertTLI;
764 
765  /* Does this record type require special handling? */
766  if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
767  {
768  if (info == XLOG_SWITCH)
769  class = WALINSERT_SPECIAL_SWITCH;
770  else if (info == XLOG_CHECKPOINT_REDO)
772  }
773 
774  /* we assume that all of the record header is in the first chunk */
775  Assert(rdata->len >= SizeOfXLogRecord);
776 
777  /* cross-check on whether we should be here or not */
778  if (!XLogInsertAllowed())
779  elog(ERROR, "cannot make new WAL entries during recovery");
780 
781  /*
782  * Given that we're not in recovery, InsertTimeLineID is set and can't
783  * change, so we can read it without a lock.
784  */
785  insertTLI = XLogCtl->InsertTimeLineID;
786 
787  /*----------
788  *
789  * We have now done all the preparatory work we can without holding a
790  * lock or modifying shared state. From here on, inserting the new WAL
791  * record to the shared WAL buffer cache is a two-step process:
792  *
793  * 1. Reserve the right amount of space from the WAL. The current head of
794  * reserved space is kept in Insert->CurrBytePos, and is protected by
795  * insertpos_lck.
796  *
797  * 2. Copy the record to the reserved WAL space. This involves finding the
798  * correct WAL buffer containing the reserved space, and copying the
799  * record in place. This can be done concurrently in multiple processes.
800  *
801  * To keep track of which insertions are still in-progress, each concurrent
802  * inserter acquires an insertion lock. In addition to just indicating that
803  * an insertion is in progress, the lock tells others how far the inserter
804  * has progressed. There is a small fixed number of insertion locks,
805  * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
806  * boundary, it updates the value stored in the lock to the how far it has
807  * inserted, to allow the previous buffer to be flushed.
808  *
809  * Holding onto an insertion lock also protects RedoRecPtr and
810  * fullPageWrites from changing until the insertion is finished.
811  *
812  * Step 2 can usually be done completely in parallel. If the required WAL
813  * page is not initialized yet, you have to grab WALBufMappingLock to
814  * initialize it, but the WAL writer tries to do that ahead of insertions
815  * to avoid that from happening in the critical path.
816  *
817  *----------
818  */
820 
821  if (likely(class == WALINSERT_NORMAL))
822  {
824 
825  /*
826  * Check to see if my copy of RedoRecPtr is out of date. If so, may
827  * have to go back and have the caller recompute everything. This can
828  * only happen just after a checkpoint, so it's better to be slow in
829  * this case and fast otherwise.
830  *
831  * Also check to see if fullPageWrites was just turned on or there's a
832  * running backup (which forces full-page writes); if we weren't
833  * already doing full-page writes then go back and recompute.
834  *
835  * If we aren't doing full-page writes then RedoRecPtr doesn't
836  * actually affect the contents of the XLOG record, so we'll update
837  * our local copy but not force a recomputation. (If doPageWrites was
838  * just turned off, we could recompute the record without full pages,
839  * but we choose not to bother.)
840  */
841  if (RedoRecPtr != Insert->RedoRecPtr)
842  {
843  Assert(RedoRecPtr < Insert->RedoRecPtr);
844  RedoRecPtr = Insert->RedoRecPtr;
845  }
846  doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
847 
848  if (doPageWrites &&
849  (!prevDoPageWrites ||
850  (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr)))
851  {
852  /*
853  * Oops, some buffer now needs to be backed up that the caller
854  * didn't back up. Start over.
855  */
858  return InvalidXLogRecPtr;
859  }
860 
861  /*
862  * Reserve space for the record in the WAL. This also sets the xl_prev
863  * pointer.
864  */
865  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
866  &rechdr->xl_prev);
867 
868  /* Normal records are always inserted. */
869  inserted = true;
870  }
871  else if (class == WALINSERT_SPECIAL_SWITCH)
872  {
873  /*
874  * In order to insert an XLOG_SWITCH record, we need to hold all of
875  * the WAL insertion locks, not just one, so that no one else can
876  * begin inserting a record until we've figured out how much space
877  * remains in the current WAL segment and claimed all of it.
878  *
879  * Nonetheless, this case is simpler than the normal cases handled
880  * below, which must check for changes in doPageWrites and RedoRecPtr.
881  * Those checks are only needed for records that can contain buffer
882  * references, and an XLOG_SWITCH record never does.
883  */
884  Assert(fpw_lsn == InvalidXLogRecPtr);
886  inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
887  }
888  else
889  {
891 
892  /*
893  * We need to update both the local and shared copies of RedoRecPtr,
894  * which means that we need to hold all the WAL insertion locks.
895  * However, there can't be any buffer references, so as above, we need
896  * not check RedoRecPtr before inserting the record; we just need to
897  * update it afterwards.
898  */
899  Assert(fpw_lsn == InvalidXLogRecPtr);
901  ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
902  &rechdr->xl_prev);
903  RedoRecPtr = Insert->RedoRecPtr = StartPos;
904  inserted = true;
905  }
906 
907  if (inserted)
908  {
909  /*
910  * Now that xl_prev has been filled in, calculate CRC of the record
911  * header.
912  */
913  rdata_crc = rechdr->xl_crc;
914  COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
915  FIN_CRC32C(rdata_crc);
916  rechdr->xl_crc = rdata_crc;
917 
918  /*
919  * All the record data, including the header, is now ready to be
920  * inserted. Copy the record in the space reserved.
921  */
923  class == WALINSERT_SPECIAL_SWITCH, rdata,
924  StartPos, EndPos, insertTLI);
925 
926  /*
927  * Unless record is flagged as not important, update LSN of last
928  * important record in the current slot. When holding all locks, just
929  * update the first one.
930  */
931  if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
932  {
933  int lockno = holdingAllLocks ? 0 : MyLockNo;
934 
935  WALInsertLocks[lockno].l.lastImportantAt = StartPos;
936  }
937  }
938  else
939  {
940  /*
941  * This was an xlog-switch record, but the current insert location was
942  * already exactly at the beginning of a segment, so there was no need
943  * to do anything.
944  */
945  }
946 
947  /*
948  * Done! Let others know that we're finished.
949  */
951 
953 
955 
956  /*
957  * Mark top transaction id is logged (if needed) so that we should not try
958  * to log it again with the next WAL record in the current subtransaction.
959  */
960  if (topxid_included)
962 
963  /*
964  * Update shared LogwrtRqst.Write, if we crossed page boundary.
965  */
966  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
967  {
969  /* advance global request to include new block(s) */
970  if (XLogCtl->LogwrtRqst.Write < EndPos)
971  XLogCtl->LogwrtRqst.Write = EndPos;
974  }
975 
976  /*
977  * If this was an XLOG_SWITCH record, flush the record and the empty
978  * padding space that fills the rest of the segment, and perform
979  * end-of-segment actions (eg, notifying archiver).
980  */
981  if (class == WALINSERT_SPECIAL_SWITCH)
982  {
983  TRACE_POSTGRESQL_WAL_SWITCH();
984  XLogFlush(EndPos);
985 
986  /*
987  * Even though we reserved the rest of the segment for us, which is
988  * reflected in EndPos, we return a pointer to just the end of the
989  * xlog-switch record.
990  */
991  if (inserted)
992  {
993  EndPos = StartPos + SizeOfXLogRecord;
994  if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
995  {
996  uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
997 
998  if (offset == EndPos % XLOG_BLCKSZ)
999  EndPos += SizeOfXLogLongPHD;
1000  else
1001  EndPos += SizeOfXLogShortPHD;
1002  }
1003  }
1004  }
1005 
1006 #ifdef WAL_DEBUG
1007  if (XLOG_DEBUG)
1008  {
1009  static XLogReaderState *debug_reader = NULL;
1010  XLogRecord *record;
1011  DecodedXLogRecord *decoded;
1013  StringInfoData recordBuf;
1014  char *errormsg = NULL;
1015  MemoryContext oldCxt;
1016 
1017  oldCxt = MemoryContextSwitchTo(walDebugCxt);
1018 
1019  initStringInfo(&buf);
1020  appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
1021 
1022  /*
1023  * We have to piece together the WAL record data from the XLogRecData
1024  * entries, so that we can pass it to the rm_desc function as one
1025  * contiguous chunk.
1026  */
1027  initStringInfo(&recordBuf);
1028  for (; rdata != NULL; rdata = rdata->next)
1029  appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1030 
1031  /* We also need temporary space to decode the record. */
1032  record = (XLogRecord *) recordBuf.data;
1033  decoded = (DecodedXLogRecord *)
1035 
1036  if (!debug_reader)
1037  debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1038  XL_ROUTINE(.page_read = NULL,
1039  .segment_open = NULL,
1040  .segment_close = NULL),
1041  NULL);
1042  if (!debug_reader)
1043  {
1044  appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1045  }
1046  else if (!DecodeXLogRecord(debug_reader,
1047  decoded,
1048  record,
1049  EndPos,
1050  &errormsg))
1051  {
1052  appendStringInfo(&buf, "error decoding record: %s",
1053  errormsg ? errormsg : "no error message");
1054  }
1055  else
1056  {
1057  appendStringInfoString(&buf, " - ");
1058 
1059  debug_reader->record = decoded;
1060  xlog_outdesc(&buf, debug_reader);
1061  debug_reader->record = NULL;
1062  }
1063  elog(LOG, "%s", buf.data);
1064 
1065  pfree(decoded);
1066  pfree(buf.data);
1067  pfree(recordBuf.data);
1068  MemoryContextSwitchTo(oldCxt);
1069  }
1070 #endif
1071 
1072  /*
1073  * Update our global variables
1074  */
1075  ProcLastRecPtr = StartPos;
1076  XactLastRecEnd = EndPos;
1077 
1078  /* Report WAL traffic to the instrumentation. */
1079  if (inserted)
1080  {
1081  pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1083  pgWalUsage.wal_fpi += num_fpi;
1084  }
1085 
1086  return EndPos;
1087 }
#define likely(x)
Definition: c.h:325
#define unlikely(x)
Definition: c.h:326
WalUsage pgWalUsage
Definition: instrument.c:22
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
MemoryContextSwitchTo(old_ctx)
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:236
const char * data
struct XLogRecData * next
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:590
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:540
XLogRecPtr XactLastRecEnd
Definition: xlog.c:254
static void WALInsertLockAcquire(void)
Definition: xlog.c:1371
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1225
static bool holdingAllLocks
Definition: xlog.c:652
static int MyLockNo
Definition: xlog.c:651
WalInsertClass
Definition: xlog.c:560
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:562
@ WALINSERT_NORMAL
Definition: xlog.c:561
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:563
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1108
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1164
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:106
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1662
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1629
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert, buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3133 of file xlog.c.

3134 {
3135  /*
3136  * During recovery, we don't flush WAL but update minRecoveryPoint
3137  * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3138  * would need to be updated.
3139  */
3140  if (RecoveryInProgress())
3141  {
3142  /*
3143  * An invalid minRecoveryPoint means that we need to recover all the
3144  * WAL, i.e., we're doing crash recovery. We never modify the control
3145  * file's value in that case, so we can short-circuit future checks
3146  * here too. This triggers a quick exit path for the startup process,
3147  * which cannot update its local copy of minRecoveryPoint as long as
3148  * it has not replayed all WAL available when doing crash recovery.
3149  */
3151  updateMinRecoveryPoint = false;
3152 
3153  /* Quick exit if already known to be updated or cannot be updated */
3155  return false;
3156 
3157  /*
3158  * Update local copy of minRecoveryPoint. But if the lock is busy,
3159  * just return a conservative guess.
3160  */
3161  if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3162  return true;
3165  LWLockRelease(ControlFileLock);
3166 
3167  /*
3168  * Check minRecoveryPoint for any other process than the startup
3169  * process doing crash recovery, which should not update the control
3170  * file value if crash recovery is still running.
3171  */
3173  updateMinRecoveryPoint = false;
3174 
3175  /* check again */
3177  return false;
3178  else
3179  return true;
3180  }
3181 
3182  /* Quick exit if already known flushed */
3183  if (record <= LogwrtResult.Flush)
3184  return false;
3185 
3186  /* read LogwrtResult and update local state */
3188 
3189  /* check again */
3190  if (record <= LogwrtResult.Flush)
3191  return false;
3192 
3193  return true;
3194 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RecoveryInProgress(), RefreshXLogWriteResult, updateMinRecoveryPoint, and XLogRecPtrIsInvalid.

Referenced by GetVictimBuffer(), and SetHintBits().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8023 of file xlog.c.

8024 {
8025  XLogBeginInsert();
8026  XLogRegisterData((char *) (&nextOid), sizeof(Oid));
8027  (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
8028 
8029  /*
8030  * We need not flush the NEXTOID record immediately, because any of the
8031  * just-allocated OIDs could only reach disk as part of a tuple insert or
8032  * update that would have its own XLOG record that must follow the NEXTOID
8033  * record. Therefore, the standard buffer LSN interlock applied to those
8034  * records will ensure no such OID reaches disk before the NEXTOID record
8035  * does.
8036  *
8037  * Note, however, that the above statement only covers state "within" the
8038  * database. When we use a generated OID as a file or directory name, we
8039  * are in a sense violating the basic WAL rule, because that filesystem
8040  * change may reach disk before the NEXTOID WAL record does. The impact
8041  * of this is that if a database crash occurs immediately afterward, we
8042  * might after restart re-generate the same OID and find that it conflicts
8043  * with the leftover file or directory. But since for safety's sake we
8044  * always loop until finding a nonconflicting filename, this poses no real
8045  * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8046  */
8047 }

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 8078 of file xlog.c.

8079 {
8080  XLogRecPtr RecPtr;
8081  xl_restore_point xlrec;
8082 
8083  xlrec.rp_time = GetCurrentTimestamp();
8084  strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8085 
8086  XLogBeginInsert();
8087  XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
8088 
8089  RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8090 
8091  ereport(LOG,
8092  (errmsg("restore point \"%s\" created at %X/%X",
8093  rpName, LSN_FORMAT_ARGS(RecPtr))));
8094 
8095  return RecPtr;
8096 }
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2631 of file xlog.c.

2632 {
2633  XLogRecPtr WriteRqstPtr = asyncXactLSN;
2634  bool sleeping;
2635  bool wakeup = false;
2636  XLogRecPtr prevAsyncXactLSN;
2637 
2639  sleeping = XLogCtl->WalWriterSleeping;
2640  prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2641  if (XLogCtl->asyncXactLSN < asyncXactLSN)
2642  XLogCtl->asyncXactLSN = asyncXactLSN;
2644 
2645  /*
2646  * If somebody else already called this function with a more aggressive
2647  * LSN, they will have done what we needed (and perhaps more).
2648  */
2649  if (asyncXactLSN <= prevAsyncXactLSN)
2650  return;
2651 
2652  /*
2653  * If the WALWriter is sleeping, kick it to make it come out of low-power
2654  * mode, so that this async commit will reach disk within the expected
2655  * amount of time. Otherwise, determine whether it has enough WAL
2656  * available to flush, the same way that XLogBackgroundFlush() does.
2657  */
2658  if (sleeping)
2659  wakeup = true;
2660  else
2661  {
2662  int flushblocks;
2663 
2665 
2666  flushblocks =
2667  WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2668 
2669  if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2670  wakeup = true;
2671  }
2672 
2673  if (wakeup)
2674  {
2675  volatile PROC_HDR *procglobal = ProcGlobal;
2676  ProcNumber walwriterProc = procglobal->walwriterProc;
2677 
2678  if (walwriterProc != INVALID_PROC_NUMBER)
2679  SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2680  }
2681 }
void SetLatch(Latch *latch)
Definition: latch.c:632
#define GetPGProcByNumber(n)
Definition: proc.h:436
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int ProcNumber
Definition: procnumber.h:24
PROC_HDR * ProcGlobal
Definition: proc.c:78
Definition: proc.h:382
ProcNumber walwriterProc
Definition: proc.h:420
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:129

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4918 of file xlog.c.

4919 {
4920  bool foundCFile,
4921  foundXLog;
4922  char *allocptr;
4923  int i;
4924  ControlFileData *localControlFile;
4925 
4926 #ifdef WAL_DEBUG
4927 
4928  /*
4929  * Create a memory context for WAL debugging that's exempt from the normal
4930  * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4931  * an allocation fails, but wal_debug is not for production use anyway.
4932  */
4933  if (walDebugCxt == NULL)
4934  {
4936  "WAL Debug",
4938  MemoryContextAllowInCriticalSection(walDebugCxt, true);
4939  }
4940 #endif
4941 
4942 
4943  XLogCtl = (XLogCtlData *)
4944  ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
4945 
4946  localControlFile = ControlFile;
4948  ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
4949 
4950  if (foundCFile || foundXLog)
4951  {
4952  /* both should be present or neither */
4953  Assert(foundCFile && foundXLog);
4954 
4955  /* Initialize local copy of WALInsertLocks */
4957 
4958  if (localControlFile)
4959  pfree(localControlFile);
4960  return;
4961  }
4962  memset(XLogCtl, 0, sizeof(XLogCtlData));
4963 
4964  /*
4965  * Already have read control file locally, unless in bootstrap mode. Move
4966  * contents into shared memory.
4967  */
4968  if (localControlFile)
4969  {
4970  memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
4971  pfree(localControlFile);
4972  }
4973 
4974  /*
4975  * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
4976  * multiple of the alignment for same, so no extra alignment padding is
4977  * needed here.
4978  */
4979  allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
4980  XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
4981  allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
4982 
4983  for (i = 0; i < XLOGbuffers; i++)
4984  {
4986  }
4987 
4988  /* WAL insertion locks. Ensure they're aligned to the full padded size */
4989  allocptr += sizeof(WALInsertLockPadded) -
4990  ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
4992  (WALInsertLockPadded *) allocptr;
4993  allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
4994 
4995  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
4996  {
5000  }
5001 
5002  /*
5003  * Align the start of the page buffers to a full xlog block size boundary.
5004  * This simplifies some calculations in XLOG insertion. It is also
5005  * required for O_DIRECT.
5006  */
5007  allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
5008  XLogCtl->pages = allocptr;
5009  memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
5010 
5011  /*
5012  * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5013  * in additional info.)
5014  */
5018  XLogCtl->WalWriterSleeping = false;
5019 
5026 }
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:453
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:186
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:694
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
#define SpinLockInit(lock)
Definition: spin.h:57
int XLogCacheBlck
Definition: xlog.c:494
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:444
slock_t insertpos_lck
Definition: xlog.c:398
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4868
int XLOGbuffers
Definition: xlog.c:117
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), LWTRANCHE_WAL_INSERT, MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4868 of file xlog.c.

4869 {
4870  Size size;
4871 
4872  /*
4873  * If the value of wal_buffers is -1, use the preferred auto-tune value.
4874  * This isn't an amazingly clean place to do this, but we must wait till
4875  * NBuffers has received its final value, and must do it before using the
4876  * value of XLOGbuffers to do anything important.
4877  *
4878  * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4879  * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4880  * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4881  * the matter with PGC_S_OVERRIDE.
4882  */
4883  if (XLOGbuffers == -1)
4884  {
4885  char buf[32];
4886 
4887  snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4888  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4890  if (XLOGbuffers == -1) /* failed to apply it? */
4891  SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4892  PGC_S_OVERRIDE);
4893  }
4894  Assert(XLOGbuffers > 0);
4895 
4896  /* XLogCtl */
4897  size = sizeof(XLogCtlData);
4898 
4899  /* WAL insertion locks, plus alignment */
4901  /* xlblocks array */
4903  /* extra alignment padding for XLOG I/O buffers */
4904  size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4905  /* and the buffers themselves */
4906  size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4907 
4908  /*
4909  * Note: we don't count ControlFileData, it comes out of the "slop factor"
4910  * added by CreateSharedMemoryAndSemaphores. This lets us use this
4911  * routine again below to compute the actual allocation size.
4912  */
4913 
4914  return size;
4915 }
#define Max(x, y)
Definition: c.h:1003
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4332
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:110
@ PGC_S_OVERRIDE
Definition: guc.h:119
@ PGC_POSTMASTER
Definition: guc.h:70
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4621

References add_size(), Assert, buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), size, snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9473 of file xlog.c.

9474 {
9475  ShutdownWalRcv();
9476 
9477  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9479  LWLockRelease(ControlFileLock);
9480 }
void ShutdownWalRcv(void)

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ShutdownWalRcv(), and XLogCtl.

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

Variable Documentation

◆ CheckPointSegments

PGDLLIMPORT int CheckPointSegments
extern

◆ CheckpointStats

◆ CommitDelay

PGDLLIMPORT int CommitDelay
extern

Definition at line 132 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

PGDLLIMPORT int CommitSiblings
extern

Definition at line 133 of file xlog.c.

Referenced by XLogFlush().

◆ EnableHotStandby

◆ fullPageWrites

PGDLLIMPORT bool fullPageWrites
extern

Definition at line 122 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ log_checkpoints

PGDLLIMPORT bool log_checkpoints
extern

◆ max_slot_wal_keep_size_mb

PGDLLIMPORT int max_slot_wal_keep_size_mb
extern

Definition at line 135 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

PGDLLIMPORT int max_wal_size_mb
extern

◆ min_wal_size_mb

PGDLLIMPORT int min_wal_size_mb
extern

Definition at line 115 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ ProcLastRecPtr

PGDLLIMPORT XLogRecPtr ProcLastRecPtr
extern

◆ track_wal_io_timing

PGDLLIMPORT bool track_wal_io_timing
extern

Definition at line 137 of file xlog.c.

Referenced by issue_xlog_fsync(), and XLogWrite().

◆ wal_compression

PGDLLIMPORT int wal_compression
extern

Definition at line 124 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

PGDLLIMPORT bool* wal_consistency_checking
extern

Definition at line 126 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

PGDLLIMPORT char* wal_consistency_checking_string
extern

Definition at line 125 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

PGDLLIMPORT int wal_decode_buffer_size
extern

Definition at line 136 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

PGDLLIMPORT bool wal_init_zero
extern

Definition at line 127 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

PGDLLIMPORT int wal_keep_size_mb
extern

Definition at line 116 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

PGDLLIMPORT bool wal_log_hints
extern

Definition at line 123 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

PGDLLIMPORT bool wal_recycle
extern

Definition at line 128 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

PGDLLIMPORT int wal_retrieve_retry_interval
extern

◆ wal_segment_size

PGDLLIMPORT int wal_segment_size
extern

Definition at line 143 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

PGDLLIMPORT int wal_sync_method
extern

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

PGDLLIMPORT char* XLogArchiveCommand
extern

◆ XLogArchiveMode

◆ XLogArchiveTimeout

PGDLLIMPORT int XLogArchiveTimeout
extern

Definition at line 118 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

PGDLLIMPORT int XLOGbuffers
extern

Definition at line 117 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().