PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
xlog.h File Reference
#include "access/rmgr.h"
#include "access/xlogdefs.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
#include "storage/fd.h"
Include dependency graph for xlog.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  CheckpointStatsData
 

Macros

#define SYNC_METHOD_FSYNC   0
 
#define SYNC_METHOD_FDATASYNC   1
 
#define SYNC_METHOD_OPEN   2 /* for O_SYNC */
 
#define SYNC_METHOD_FSYNC_WRITETHROUGH   3
 
#define SYNC_METHOD_OPEN_DSYNC   4 /* for O_DSYNC */
 
#define InHotStandby   (standbyState >= STANDBY_SNAPSHOT_PENDING)
 
#define XLogArchivingActive()   (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
 
#define XLogArchivingAlways()   (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
 
#define XLogArchiveCommandSet()   (XLogArchiveCommand[0] != '\0')
 
#define XLogIsNeeded()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogHintBitIsNeeded()   (DataChecksumsEnabled() || wal_log_hints)
 
#define XLogStandbyInfoActive()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogLogicalInfoActive()   (wal_level >= WAL_LEVEL_LOGICAL)
 
#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */
 
#define CHECKPOINT_END_OF_RECOVERY
 
#define CHECKPOINT_IMMEDIATE   0x0004 /* Do it without delays */
 
#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */
 
#define CHECKPOINT_FLUSH_ALL
 
#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */
 
#define CHECKPOINT_CAUSE_XLOG   0x0040 /* XLOG consumption */
 
#define CHECKPOINT_CAUSE_TIME   0x0080 /* Elapsed time */
 
#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */
 
#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */
 
#define BACKUP_LABEL_FILE   "backup_label"
 
#define BACKUP_LABEL_OLD   "backup_label.old"
 
#define TABLESPACE_MAP   "tablespace_map"
 
#define TABLESPACE_MAP_OLD   "tablespace_map.old"
 

Typedefs

typedef enum ArchiveMode ArchiveMode
 
typedef enum WalLevel WalLevel
 
typedef struct CheckpointStatsData CheckpointStatsData
 
typedef enum SessionBackupState SessionBackupState
 

Enumerations

enum  HotStandbyState { STANDBY_DISABLED, STANDBY_INITIALIZED, STANDBY_SNAPSHOT_PENDING, STANDBY_SNAPSHOT_READY }
 
enum  RecoveryTargetType {
  RECOVERY_TARGET_UNSET, RECOVERY_TARGET_XID, RECOVERY_TARGET_TIME, RECOVERY_TARGET_NAME,
  RECOVERY_TARGET_LSN, RECOVERY_TARGET_IMMEDIATE
}
 
enum  ArchiveMode { ARCHIVE_MODE_OFF = 0, ARCHIVE_MODE_ON, ARCHIVE_MODE_ALWAYS }
 
enum  WalLevel { WAL_LEVEL_MINIMAL = 0, WAL_LEVEL_REPLICA, WAL_LEVEL_LOGICAL }
 
enum  SessionBackupState { SESSION_BACKUP_NONE, SESSION_BACKUP_EXCLUSIVE, SESSION_BACKUP_NON_EXCLUSIVE }
 

Functions

XLogRecPtr XLogInsertRecord (struct XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags)
 
void XLogFlush (XLogRecPtr RecPtr)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr RecPtr)
 
int XLogFileInit (XLogSegNo segno, bool *use_existent, bool use_lock)
 
int XLogFileOpen (XLogSegNo segno)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
void XLogSetAsyncXactLSN (XLogRecPtr record)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void xlog_redo (XLogReaderState *record)
 
void xlog_desc (StringInfo buf, XLogReaderState *record)
 
const char * xlog_identify (uint8 info)
 
void issue_xlog_fsync (int fd, XLogSegNo segno)
 
bool RecoveryInProgress (void)
 
bool HotStandbyActive (void)
 
bool HotStandbyActiveInReplay (void)
 
bool XLogInsertAllowed (void)
 
void GetXLogReceiptTime (TimestampTz *rtime, bool *fromStream)
 
XLogRecPtr GetXLogReplayRecPtr (TimeLineID *replayTLI)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
bool RecoveryIsPaused (void)
 
void SetRecoveryPause (bool recoveryPause)
 
TimestampTz GetLatestXTime (void)
 
TimestampTz GetCurrentChunkReplayStartTime (void)
 
char * XLogFileNameP (TimeLineID tli, XLogSegNo segno)
 
void UpdateControlFile (void)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (void)
 
void LocalProcessControlFile (bool reset)
 
void StartupXLOG (void)
 
void ShutdownXLOG (int code, Datum arg)
 
void InitXLOGAccess (void)
 
void CreateCheckPoint (int flags)
 
bool CreateRestartPoint (int flags)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetRedoRecPtr (void)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
void GetNextXidAndEpoch (TransactionId *xid, uint32 *epoch)
 
void RemovePromoteSignalFiles (void)
 
bool CheckPromoteSignal (void)
 
void WakeupRecovery (void)
 
void SetWalWriterSleeping (bool sleeping)
 
void XLogRequestWalReceiverReply (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
XLogRecPtr do_pg_start_backup (const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, DIR *tblspcdir, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, bool needtblspcmapfile)
 
XLogRecPtr do_pg_stop_backup (char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
 
void do_pg_abort_backup (void)
 
SessionBackupState get_backup_status (void)
 

Variables

int sync_method
 
PGDLLIMPORT TimeLineID ThisTimeLineID
 
bool InRecovery
 
HotStandbyState standbyState
 
XLogRecPtr ProcLastRecPtr
 
XLogRecPtr XactLastRecEnd
 
PGDLLIMPORT XLogRecPtr XactLastCommitEnd
 
bool reachedConsistency
 
int wal_segment_size
 
int min_wal_size_mb
 
int max_wal_size_mb
 
int wal_keep_segments
 
int XLOGbuffers
 
int XLogArchiveTimeout
 
int wal_retrieve_retry_interval
 
char * XLogArchiveCommand
 
bool EnableHotStandby
 
bool fullPageWrites
 
bool wal_log_hints
 
bool wal_compression
 
boolwal_consistency_checking
 
char * wal_consistency_checking_string
 
bool log_checkpoints
 
int CheckPointSegments
 
int XLogArchiveMode
 
PGDLLIMPORT int wal_level
 
CheckpointStatsData CheckpointStats
 

Macro Definition Documentation

#define BACKUP_LABEL_FILE   "backup_label"
#define BACKUP_LABEL_OLD   "backup_label.old"

Definition at line 323 of file xlog.h.

Referenced by CancelBackup(), and StartupXLOG().

#define CHECKPOINT_CAUSE_TIME   0x0080 /* Elapsed time */

Definition at line 187 of file xlog.h.

Referenced by CheckpointerMain(), and LogCheckpointStart().

#define CHECKPOINT_CAUSE_XLOG   0x0040 /* XLOG consumption */

Definition at line 186 of file xlog.h.

Referenced by CheckpointerMain(), LogCheckpointStart(), XLogPageRead(), and XLogWrite().

#define CHECKPOINT_END_OF_RECOVERY
Value:
0x0002 /* Like shutdown checkpoint, but
* issued at end of WAL recovery */

Definition at line 177 of file xlog.h.

Referenced by BufferSync(), CheckpointerMain(), CreateCheckPoint(), LogCheckpointStart(), and StartupXLOG().

#define CHECKPOINT_FLUSH_ALL
Value:
0x0010 /* Flush all pages, including those
* belonging to unlogged tables */

Definition at line 181 of file xlog.h.

Referenced by BufferSync(), createdb(), LogCheckpointStart(), and movedb().

#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */
#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */
#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */
#define SYNC_METHOD_FDATASYNC   1

Definition at line 26 of file xlog.h.

Referenced by get_sync_bit(), and issue_xlog_fsync().

#define SYNC_METHOD_FSYNC   0

Definition at line 25 of file xlog.h.

Referenced by get_sync_bit(), and issue_xlog_fsync().

#define SYNC_METHOD_FSYNC_WRITETHROUGH   3

Definition at line 28 of file xlog.h.

Referenced by get_sync_bit(), issue_xlog_fsync(), and pg_fsync().

#define SYNC_METHOD_OPEN   2 /* for O_SYNC */

Definition at line 27 of file xlog.h.

Referenced by get_sync_bit(), issue_xlog_fsync(), and XLogWrite().

#define SYNC_METHOD_OPEN_DSYNC   4 /* for O_DSYNC */

Definition at line 29 of file xlog.h.

Referenced by get_sync_bit(), issue_xlog_fsync(), and XLogWrite().

#define TABLESPACE_MAP   "tablespace_map"
#define TABLESPACE_MAP_OLD   "tablespace_map.old"

Definition at line 326 of file xlog.h.

Referenced by CancelBackup(), and StartupXLOG().

#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */
#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */
#define XLogArchiveCommandSet ( )    (XLogArchiveCommand[0] != '\0')

Definition at line 140 of file xlog.h.

Referenced by pgarch_ArchiverCopyLoop(), and ShutdownXLOG().

Definition at line 138 of file xlog.h.

Referenced by do_pg_stop_backup(), and sigusr1_handler().

#define XLogLogicalInfoActive ( )    (wal_level >= WAL_LEVEL_LOGICAL)

Definition at line 163 of file xlog.h.

Referenced by AssignTransactionId(), and XactLogCommitRecord().

Typedef Documentation

Enumeration Type Documentation

Enumerator
ARCHIVE_MODE_OFF 
ARCHIVE_MODE_ON 
ARCHIVE_MODE_ALWAYS 

Definition at line 116 of file xlog.h.

117 {
118  ARCHIVE_MODE_OFF = 0, /* disabled */
119  ARCHIVE_MODE_ON, /* enabled while server is running normally */
120  ARCHIVE_MODE_ALWAYS /* enabled always (even during recovery) */
121 } ArchiveMode;
ArchiveMode
Definition: xlog.h:116
Enumerator
STANDBY_DISABLED 
STANDBY_INITIALIZED 
STANDBY_SNAPSHOT_PENDING 
STANDBY_SNAPSHOT_READY 

Definition at line 64 of file xlog.h.

Enumerator
RECOVERY_TARGET_UNSET 
RECOVERY_TARGET_XID 
RECOVERY_TARGET_TIME 
RECOVERY_TARGET_NAME 
RECOVERY_TARGET_LSN 
RECOVERY_TARGET_IMMEDIATE 

Definition at line 80 of file xlog.h.

Enumerator
SESSION_BACKUP_NONE 
SESSION_BACKUP_EXCLUSIVE 
SESSION_BACKUP_NON_EXCLUSIVE 

Definition at line 305 of file xlog.h.

enum WalLevel
Enumerator
WAL_LEVEL_MINIMAL 
WAL_LEVEL_REPLICA 
WAL_LEVEL_LOGICAL 

Definition at line 125 of file xlog.h.

Function Documentation

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2247 of file xlog.c.

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

2248 {
2251 }
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2216
#define newval
double CheckPointCompletionTarget
Definition: checkpointer.c:147
void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2240 of file xlog.c.

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

2241 {
2244 }
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2216
int max_wal_size_mb
Definition: xlog.c:89
#define newval
void BootStrapXLOG ( void  )

Definition at line 5013 of file xlog.c.

References AdvanceOldestClogXid(), Assert, bootstrap_data_checksum_version, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), buffer, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstBootstrapObjectId, FirstMultiXactId, FirstNormalTransactionId, CheckPoint::fullPageWrites, fullPageWrites, gettimeofday(), INIT_CRC32C, InvalidTransactionId, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, VariableCacheData::nextXid, CheckPoint::nextXidEpoch, offsetof, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, palloc(), PANIC, pfree(), pg_backend_random(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetMultiXactIdLimit(), SetTransactionIdLimit(), ShmemVariableCache, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, ControlFileData::state, ControlFileData::system_identifier, TemplateDbOid, CheckPoint::ThisTimeLineID, ThisTimeLineID, CheckPoint::time, ControlFileData::time, track_commit_timestamp, ControlFileData::track_commit_timestamp, TYPEALIGN, ControlFileData::unloggedLSN, WAIT_EVENT_WAL_BOOTSTRAP_SYNC, WAIT_EVENT_WAL_BOOTSTRAP_WRITE, wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by AuxiliaryProcessMain().

5014 {
5015  CheckPoint checkPoint;
5016  char *buffer;
5017  XLogPageHeader page;
5018  XLogLongPageHeader longpage;
5019  XLogRecord *record;
5020  char *recptr;
5021  bool use_existent;
5022  uint64 sysidentifier;
5023  char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
5024  struct timeval tv;
5025  pg_crc32c crc;
5026 
5027  /*
5028  * Select a hopefully-unique system identifier code for this installation.
5029  * We use the result of gettimeofday(), including the fractional seconds
5030  * field, as being about as unique as we can easily get. (Think not to
5031  * use random(), since it hasn't been seeded and there's no portable way
5032  * to seed it other than the system clock value...) The upper half of the
5033  * uint64 value is just the tv_sec part, while the lower half contains the
5034  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5035  * PID for a little extra uniqueness. A person knowing this encoding can
5036  * determine the initialization time of the installation, which could
5037  * perhaps be useful sometimes.
5038  */
5039  gettimeofday(&tv, NULL);
5040  sysidentifier = ((uint64) tv.tv_sec) << 32;
5041  sysidentifier |= ((uint64) tv.tv_usec) << 12;
5042  sysidentifier |= getpid() & 0xFFF;
5043 
5044  /*
5045  * Generate a random nonce. This is used for authentication requests that
5046  * will fail because the user does not exist. The nonce is used to create
5047  * a genuine-looking password challenge for the non-existent user, in lieu
5048  * of an actual stored password.
5049  */
5050  if (!pg_backend_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
5051  ereport(PANIC,
5052  (errcode(ERRCODE_INTERNAL_ERROR),
5053  errmsg("could not generate secret authorization token")));
5054 
5055  /* First timeline ID is always 1 */
5056  ThisTimeLineID = 1;
5057 
5058  /* page buffer must be aligned suitably for O_DIRECT */
5059  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5060  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5061  memset(page, 0, XLOG_BLCKSZ);
5062 
5063  /*
5064  * Set up information for the initial checkpoint record
5065  *
5066  * The initial checkpoint record is written to the beginning of the WAL
5067  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5068  * used, so that we can use 0/0 to mean "before any valid WAL segment".
5069  */
5070  checkPoint.redo = wal_segment_size + SizeOfXLogLongPHD;
5071  checkPoint.ThisTimeLineID = ThisTimeLineID;
5072  checkPoint.PrevTimeLineID = ThisTimeLineID;
5073  checkPoint.fullPageWrites = fullPageWrites;
5074  checkPoint.nextXidEpoch = 0;
5075  checkPoint.nextXid = FirstNormalTransactionId;
5076  checkPoint.nextOid = FirstBootstrapObjectId;
5077  checkPoint.nextMulti = FirstMultiXactId;
5078  checkPoint.nextMultiOffset = 0;
5079  checkPoint.oldestXid = FirstNormalTransactionId;
5080  checkPoint.oldestXidDB = TemplateDbOid;
5081  checkPoint.oldestMulti = FirstMultiXactId;
5082  checkPoint.oldestMultiDB = TemplateDbOid;
5085  checkPoint.time = (pg_time_t) time(NULL);
5087 
5088  ShmemVariableCache->nextXid = checkPoint.nextXid;
5089  ShmemVariableCache->nextOid = checkPoint.nextOid;
5091  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5092  AdvanceOldestClogXid(checkPoint.oldestXid);
5093  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5094  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5096 
5097  /* Set up the XLOG page header */
5098  page->xlp_magic = XLOG_PAGE_MAGIC;
5099  page->xlp_info = XLP_LONG_HEADER;
5100  page->xlp_tli = ThisTimeLineID;
5102  longpage = (XLogLongPageHeader) page;
5103  longpage->xlp_sysid = sysidentifier;
5104  longpage->xlp_seg_size = wal_segment_size;
5105  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5106 
5107  /* Insert the initial checkpoint record */
5108  recptr = ((char *) page + SizeOfXLogLongPHD);
5109  record = (XLogRecord *) recptr;
5110  record->xl_prev = 0;
5111  record->xl_xid = InvalidTransactionId;
5112  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5114  record->xl_rmid = RM_XLOG_ID;
5115  recptr += SizeOfXLogRecord;
5116  /* fill the XLogRecordDataHeaderShort struct */
5117  *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5118  *(recptr++) = sizeof(checkPoint);
5119  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5120  recptr += sizeof(checkPoint);
5121  Assert(recptr - (char *) record == record->xl_tot_len);
5122 
5123  INIT_CRC32C(crc);
5124  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5125  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5126  FIN_CRC32C(crc);
5127  record->xl_crc = crc;
5128 
5129  /* Create first XLOG segment file */
5130  use_existent = false;
5131  openLogFile = XLogFileInit(1, &use_existent, false);
5132 
5133  /* Write the first page with the initial record */
5134  errno = 0;
5136  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5137  {
5138  /* if write didn't set errno, assume problem is no disk space */
5139  if (errno == 0)
5140  errno = ENOSPC;
5141  ereport(PANIC,
5143  errmsg("could not write bootstrap write-ahead log file: %m")));
5144  }
5146 
5148  if (pg_fsync(openLogFile) != 0)
5149  ereport(PANIC,
5151  errmsg("could not fsync bootstrap write-ahead log file: %m")));
5153 
5154  if (close(openLogFile))
5155  ereport(PANIC,
5157  errmsg("could not close bootstrap write-ahead log file: %m")));
5158 
5159  openLogFile = -1;
5160 
5161  /* Now create pg_control */
5162 
5163  memset(ControlFile, 0, sizeof(ControlFileData));
5164  /* Initialize pg_control status fields */
5165  ControlFile->system_identifier = sysidentifier;
5166  memcpy(ControlFile->mock_authentication_nonce, mock_auth_nonce, MOCK_AUTH_NONCE_LEN);
5168  ControlFile->time = checkPoint.time;
5169  ControlFile->checkPoint = checkPoint.redo;
5170  ControlFile->checkPointCopy = checkPoint;
5171  ControlFile->unloggedLSN = 1;
5172 
5173  /* Set important parameter values for use when replaying WAL */
5182 
5183  /* some additional ControlFile fields are set in WriteControlFile() */
5184 
5185  WriteControlFile();
5186 
5187  /* Bootstrap the commit log, too */
5188  BootStrapCLOG();
5192 
5193  pfree(buffer);
5194 
5195  /*
5196  * Force control file to be read - in contrast to normal processing we'd
5197  * otherwise never run the checks and GUC related initializations therein.
5198  */
5199  ReadControlFile();
5200 }
static void WriteControlFile(void)
Definition: xlog.c:4392
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
int max_locks_per_xact
Definition: pg_control.h:182
int gettimeofday(struct timeval *tp, struct timezone *tzp)
Definition: gettimeofday.c:105
int max_prepared_xacts
Definition: pg_control.h:181
int64 pg_time_t
Definition: pgtime.h:23
int wal_segment_size
Definition: xlog.c:113
pg_time_t time
Definition: pg_control.h:128
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:849
uint32 oidCount
Definition: transam.h:112
#define write(a, b, c)
Definition: win32.h:14
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:201
int max_worker_processes
Definition: pg_control.h:180
uint32 pg_crc32c
Definition: pg_crc32c.h:38
TransactionId oldestActiveXid
Definition: pg_control.h:63
int wal_level
Definition: xlog.c:104
int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
Definition: xlog.c:3170
void BootStrapMultiXact(void)
Definition: multixact.c:1866
MultiXactId oldestMulti
Definition: pg_control.h:49
TimeLineID PrevTimeLineID
Definition: pg_control.h:39
int errcode(int sqlerrcode)
Definition: elog.c:575
RmgrId xl_rmid
Definition: xlogrecord.h:47
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:57
CheckPoint checkPointCopy
Definition: pg_control.h:132
TransactionId oldestXid
Definition: pg_control.h:47
TransactionId nextXid
Definition: pg_control.h:43
pg_time_t time
Definition: pg_control.h:51
#define PANIC
Definition: elog.h:53
uint32 bootstrap_data_checksum_version
Definition: bootstrap.c:50
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:74
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:27
bool fullPageWrites
Definition: xlog.c:97
void BootStrapSUBTRANS(void)
Definition: subtrans.c:212
MultiXactOffset nextMultiOffset
Definition: pg_control.h:46
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:271
TransactionId oldestCommitTsXid
Definition: pg_control.h:52
void pfree(void *pointer)
Definition: mcxt.c:949
#define FirstNormalTransactionId
Definition: transam.h:34
int max_prepared_xacts
Definition: twophase.c:117
uint64 system_identifier
Definition: pg_control.h:106
uint32 xl_tot_len
Definition: xlogrecord.h:43
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
TransactionId nextXid
Definition: transam.h:117
static void ReadControlFile(void)
Definition: xlog.c:4481
uint32 nextXidEpoch
Definition: pg_control.h:42
bool track_commit_timestamp
Definition: commit_ts.c:103
#define TemplateDbOid
Definition: pg_database.h:80
uint32 data_checksum_version
Definition: pg_control.h:222
bool pg_backend_random(char *dst, int len)
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:67
XLogRecPtr unloggedLSN
Definition: pg_control.h:134
int errcode_for_file_access(void)
Definition: elog.c:598
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define InvalidTransactionId
Definition: transam.h:31
#define FirstBootstrapObjectId
Definition: transam.h:93
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1244
#define FirstMultiXactId
Definition: multixact.h:24
#define ereport(elevel, rest)
Definition: elog.h:122
int max_locks_per_xact
Definition: lock.c:54
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
TransactionId newestCommitTsXid
Definition: pg_control.h:54
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:229
int MaxConnections
Definition: globals.c:123
Oid oldestMultiDB
Definition: pg_control.h:50
static int openLogFile
Definition: xlog.c:774
static ControlFileData * ControlFile
Definition: xlog.c:715
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2194
TimeLineID ThisTimeLineID
Definition: xlog.c:181
Oid nextOid
Definition: pg_control.h:44
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:569
bool fullPageWrites
Definition: pg_control.h:41
bool wal_log_hints
Definition: xlog.c:98
void BootStrapCLOG(void)
Definition: clog.c:712
bool track_commit_timestamp
Definition: pg_control.h:183
#define Assert(condition)
Definition: c.h:681
#define XLP_LONG_HEADER
Definition: xlog_internal.h:79
Oid oldestXidDB
Definition: pg_control.h:48
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1220
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:288
uint8 xl_info
Definition: xlogrecord.h:46
MultiXactId nextMulti
Definition: pg_control.h:45
pg_crc32c xl_crc
Definition: xlogrecord.h:49
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:224
TransactionId xl_xid
Definition: xlogrecord.h:44
TimeLineID ThisTimeLineID
Definition: pg_control.h:38
void * palloc(Size size)
Definition: mcxt.c:848
int errmsg(const char *fmt,...)
Definition: elog.c:797
int max_worker_processes
Definition: globals.c:124
int pg_fsync(int fd)
Definition: fd.c:338
#define close(a)
Definition: win32.h:12
void BootStrapCommitTs(void)
Definition: commit_ts.c:523
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:73
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:78
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:36
#define offsetof(type, field)
Definition: c.h:549
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:72
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2160
bool CheckPromoteSignal ( void  )

Definition at line 12171 of file xlog.c.

References FALLBACK_PROMOTE_SIGNAL_FILE, and PROMOTE_SIGNAL_FILE.

Referenced by sigusr1_handler().

12172 {
12173  struct stat stat_buf;
12174 
12175  if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0 ||
12177  return true;
12178 
12179  return false;
12180 }
#define FALLBACK_PROMOTE_SIGNAL_FILE
Definition: xlog.c:85
struct stat stat_buf
Definition: pg_standby.c:103
#define PROMOTE_SIGNAL_FILE
Definition: xlog.c:84
void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3779 of file xlog.c.

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, and XLogFileName.

Referenced by perform_base_backup(), and XLogRead().

3780 {
3781  XLogSegNo lastRemovedSegNo;
3782 
3784  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3786 
3787  if (segno <= lastRemovedSegNo)
3788  {
3789  char filename[MAXFNAMELEN];
3790 
3791  XLogFileName(filename, tli, segno, wal_segment_size);
3792  ereport(ERROR,
3794  errmsg("requested WAL segment %s has already been removed",
3795  filename)));
3796  }
3797 }
int wal_segment_size
Definition: xlog.c:113
slock_t info_lck
Definition: xlog.c:704
XLogSegNo lastRemovedSegNo
Definition: xlog.c:587
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define ERROR
Definition: elog.h:43
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
#define MAXFNAMELEN
#define SpinLockRelease(lock)
Definition: spin.h:64
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static XLogCtlData * XLogCtl
Definition: xlog.c:707
static char * filename
Definition: pg_dumpall.c:90
int errmsg(const char *fmt,...)
Definition: elog.c:797
void CreateCheckPoint ( int  flags)

Definition at line 8612 of file xlog.c.

References ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptXid, XLogCtlData::ckptXidEpoch, XLogCtlInsert::CurrBytePos, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, elog, END_CRIT_SECTION, ereport, errmsg(), ERROR, CheckPoint::fullPageWrites, XLogCtlInsert::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestXmin(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, InitXLogInsert(), Insert(), XLogCtlData::Insert, INSERT_FREESPACE, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, CheckPoint::newestCommitTsXid, VariableCacheData::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, VariableCacheData::nextXid, CheckPoint::nextXidEpoch, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, VariableCacheData::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, VariableCacheData::oldestXid, CheckPoint::oldestXidDB, VariableCacheData::oldestXidDB, PANIC, pfree(), pg_usleep(), PreallocXlogFiles(), ControlFileData::prevCheckPoint, CheckPoint::PrevTimeLineID, XLogCtlData::PrevTimeLineID, PROCARRAY_FLAGS_DEFAULT, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), ShmemVariableCache, SizeOfXLogLongPHD, SizeOfXLogShortPHD, smgrpostckpt(), smgrpreckpt(), SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, CheckPoint::ThisTimeLineID, ThisTimeLineID, CheckPoint::time, ControlFileData::time, TruncateSUBTRANS(), XLogCtlData::ulsn_lck, ControlFileData::unloggedLSN, XLogCtlData::unloggedLSN, UpdateCheckPointDistanceEstimate(), UpdateControlFile(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), ShutdownXLOG(), and StartupXLOG().

8613 {
8614  bool shutdown;
8615  CheckPoint checkPoint;
8616  XLogRecPtr recptr;
8618  uint32 freespace;
8619  XLogRecPtr PriorRedoPtr;
8620  XLogRecPtr curInsert;
8621  XLogRecPtr last_important_lsn;
8622  VirtualTransactionId *vxids;
8623  int nvxids;
8624 
8625  /*
8626  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
8627  * issued at a different time.
8628  */
8630  shutdown = true;
8631  else
8632  shutdown = false;
8633 
8634  /* sanity check */
8635  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
8636  elog(ERROR, "can't create a checkpoint during recovery");
8637 
8638  /*
8639  * Initialize InitXLogInsert working areas before entering the critical
8640  * section. Normally, this is done by the first call to
8641  * RecoveryInProgress() or LocalSetXLogInsertAllowed(), but when creating
8642  * an end-of-recovery checkpoint, the LocalSetXLogInsertAllowed call is
8643  * done below in a critical section, and InitXLogInsert cannot be called
8644  * in a critical section.
8645  */
8646  InitXLogInsert();
8647 
8648  /*
8649  * Acquire CheckpointLock to ensure only one checkpoint happens at a time.
8650  * (This is just pro forma, since in the present system structure there is
8651  * only one process that is allowed to issue checkpoints at any given
8652  * time.)
8653  */
8654  LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
8655 
8656  /*
8657  * Prepare to accumulate statistics.
8658  *
8659  * Note: because it is possible for log_checkpoints to change while a
8660  * checkpoint proceeds, we always accumulate stats, even if
8661  * log_checkpoints is currently off.
8662  */
8663  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
8665 
8666  /*
8667  * Use a critical section to force system panic if we have trouble.
8668  */
8670 
8671  if (shutdown)
8672  {
8673  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8675  ControlFile->time = (pg_time_t) time(NULL);
8677  LWLockRelease(ControlFileLock);
8678  }
8679 
8680  /*
8681  * Let smgr prepare for checkpoint; this has to happen before we determine
8682  * the REDO pointer. Note that smgr must not do anything that'd have to
8683  * be undone if we decide no checkpoint is needed.
8684  */
8685  smgrpreckpt();
8686 
8687  /* Begin filling in the checkpoint WAL record */
8688  MemSet(&checkPoint, 0, sizeof(checkPoint));
8689  checkPoint.time = (pg_time_t) time(NULL);
8690 
8691  /*
8692  * For Hot Standby, derive the oldestActiveXid before we fix the redo
8693  * pointer. This allows us to begin accumulating changes to assemble our
8694  * starting snapshot of locks and transactions.
8695  */
8696  if (!shutdown && XLogStandbyInfoActive())
8698  else
8700 
8701  /*
8702  * Get location of last important record before acquiring insert locks (as
8703  * GetLastImportantRecPtr() also locks WAL locks).
8704  */
8705  last_important_lsn = GetLastImportantRecPtr();
8706 
8707  /*
8708  * We must block concurrent insertions while examining insert state to
8709  * determine the checkpoint REDO pointer.
8710  */
8712  curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
8713 
8714  /*
8715  * If this isn't a shutdown or forced checkpoint, and if there has been no
8716  * WAL activity requiring a checkpoint, skip it. The idea here is to
8717  * avoid inserting duplicate checkpoints when the system is idle.
8718  */
8719  if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
8720  CHECKPOINT_FORCE)) == 0)
8721  {
8722  if (last_important_lsn == ControlFile->checkPoint)
8723  {
8725  LWLockRelease(CheckpointLock);
8726  END_CRIT_SECTION();
8727  ereport(DEBUG1,
8728  (errmsg("checkpoint skipped because system is idle")));
8729  return;
8730  }
8731  }
8732 
8733  /*
8734  * An end-of-recovery checkpoint is created before anyone is allowed to
8735  * write WAL. To allow us to write the checkpoint record, temporarily
8736  * enable XLogInsertAllowed. (This also ensures ThisTimeLineID is
8737  * initialized, which we need here and in AdvanceXLInsertBuffer.)
8738  */
8739  if (flags & CHECKPOINT_END_OF_RECOVERY)
8741 
8742  checkPoint.ThisTimeLineID = ThisTimeLineID;
8743  if (flags & CHECKPOINT_END_OF_RECOVERY)
8744  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
8745  else
8746  checkPoint.PrevTimeLineID = ThisTimeLineID;
8747 
8748  checkPoint.fullPageWrites = Insert->fullPageWrites;
8749 
8750  /*
8751  * Compute new REDO record ptr = location of next XLOG record.
8752  *
8753  * NB: this is NOT necessarily where the checkpoint record itself will be,
8754  * since other backends may insert more XLOG records while we're off doing
8755  * the buffer flush work. Those XLOG records are logically after the
8756  * checkpoint, even though physically before it. Got that?
8757  */
8758  freespace = INSERT_FREESPACE(curInsert);
8759  if (freespace == 0)
8760  {
8761  if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
8762  curInsert += SizeOfXLogLongPHD;
8763  else
8764  curInsert += SizeOfXLogShortPHD;
8765  }
8766  checkPoint.redo = curInsert;
8767 
8768  /*
8769  * Here we update the shared RedoRecPtr for future XLogInsert calls; this
8770  * must be done while holding all the insertion locks.
8771  *
8772  * Note: if we fail to complete the checkpoint, RedoRecPtr will be left
8773  * pointing past where it really needs to point. This is okay; the only
8774  * consequence is that XLogInsert might back up whole buffers that it
8775  * didn't really need to. We can't postpone advancing RedoRecPtr because
8776  * XLogInserts that happen while we are dumping buffers must assume that
8777  * their buffer changes are not included in the checkpoint.
8778  */
8779  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
8780 
8781  /*
8782  * Now we can release the WAL insertion locks, allowing other xacts to
8783  * proceed while we are flushing disk buffers.
8784  */
8786 
8787  /* Update the info_lck-protected copy of RedoRecPtr as well */
8789  XLogCtl->RedoRecPtr = checkPoint.redo;
8791 
8792  /*
8793  * If enabled, log checkpoint start. We postpone this until now so as not
8794  * to log anything if we decided to skip the checkpoint.
8795  */
8796  if (log_checkpoints)
8797  LogCheckpointStart(flags, false);
8798 
8799  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
8800 
8801  /*
8802  * Get the other info we need for the checkpoint record.
8803  *
8804  * We don't need to save oldestClogXid in the checkpoint, it only matters
8805  * for the short period in which clog is being truncated, and if we crash
8806  * during that we'll redo the clog truncation and fix up oldestClogXid
8807  * there.
8808  */
8809  LWLockAcquire(XidGenLock, LW_SHARED);
8810  checkPoint.nextXid = ShmemVariableCache->nextXid;
8811  checkPoint.oldestXid = ShmemVariableCache->oldestXid;
8813  LWLockRelease(XidGenLock);
8814 
8815  LWLockAcquire(CommitTsLock, LW_SHARED);
8818  LWLockRelease(CommitTsLock);
8819 
8820  /* Increase XID epoch if we've wrapped around since last checkpoint */
8822  if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid)
8823  checkPoint.nextXidEpoch++;
8824 
8825  LWLockAcquire(OidGenLock, LW_SHARED);
8826  checkPoint.nextOid = ShmemVariableCache->nextOid;
8827  if (!shutdown)
8828  checkPoint.nextOid += ShmemVariableCache->oidCount;
8829  LWLockRelease(OidGenLock);
8830 
8831  MultiXactGetCheckptMulti(shutdown,
8832  &checkPoint.nextMulti,
8833  &checkPoint.nextMultiOffset,
8834  &checkPoint.oldestMulti,
8835  &checkPoint.oldestMultiDB);
8836 
8837  /*
8838  * Having constructed the checkpoint record, ensure all shmem disk buffers
8839  * and commit-log buffers are flushed to disk.
8840  *
8841  * This I/O could fail for various reasons. If so, we will fail to
8842  * complete the checkpoint, but there is no reason to force a system
8843  * panic. Accordingly, exit critical section while doing it.
8844  */
8845  END_CRIT_SECTION();
8846 
8847  /*
8848  * In some cases there are groups of actions that must all occur on one
8849  * side or the other of a checkpoint record. Before flushing the
8850  * checkpoint record we must explicitly wait for any backend currently
8851  * performing those groups of actions.
8852  *
8853  * One example is end of transaction, so we must wait for any transactions
8854  * that are currently in commit critical sections. If an xact inserted
8855  * its commit record into XLOG just before the REDO point, then a crash
8856  * restart from the REDO point would not replay that record, which means
8857  * that our flushing had better include the xact's update of pg_xact. So
8858  * we wait till he's out of his commit critical section before proceeding.
8859  * See notes in RecordTransactionCommit().
8860  *
8861  * Because we've already released the insertion locks, this test is a bit
8862  * fuzzy: it is possible that we will wait for xacts we didn't really need
8863  * to wait for. But the delay should be short and it seems better to make
8864  * checkpoint take a bit longer than to hold off insertions longer than
8865  * necessary. (In fact, the whole reason we have this issue is that xact.c
8866  * does commit record XLOG insertion and clog update as two separate steps
8867  * protected by different locks, but again that seems best on grounds of
8868  * minimizing lock contention.)
8869  *
8870  * A transaction that has not yet set delayChkpt when we look cannot be at
8871  * risk, since he's not inserted his commit record yet; and one that's
8872  * already cleared it is not at risk either, since he's done fixing clog
8873  * and we will correctly flush the update below. So we cannot miss any
8874  * xacts we need to wait for.
8875  */
8876  vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
8877  if (nvxids > 0)
8878  {
8879  do
8880  {
8881  pg_usleep(10000L); /* wait for 10 msec */
8882  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
8883  }
8884  pfree(vxids);
8885 
8886  CheckPointGuts(checkPoint.redo, flags);
8887 
8888  /*
8889  * Take a snapshot of running transactions and write this to WAL. This
8890  * allows us to reconstruct the state of running transactions during
8891  * archive recovery, if required. Skip, if this info disabled.
8892  *
8893  * If we are shutting down, or Startup process is completing crash
8894  * recovery we don't need to write running xact data.
8895  */
8896  if (!shutdown && XLogStandbyInfoActive())
8898 
8900 
8901  /*
8902  * Now insert the checkpoint record into XLOG.
8903  */
8904  XLogBeginInsert();
8905  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
8906  recptr = XLogInsert(RM_XLOG_ID,
8907  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
8909 
8910  XLogFlush(recptr);
8911 
8912  /*
8913  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
8914  * overwritten at next startup. No-one should even try, this just allows
8915  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
8916  * to just temporarily disable writing until the system has exited
8917  * recovery.
8918  */
8919  if (shutdown)
8920  {
8921  if (flags & CHECKPOINT_END_OF_RECOVERY)
8922  LocalXLogInsertAllowed = -1; /* return to "check" state */
8923  else
8924  LocalXLogInsertAllowed = 0; /* never again write WAL */
8925  }
8926 
8927  /*
8928  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
8929  * = end of actual checkpoint record.
8930  */
8931  if (shutdown && checkPoint.redo != ProcLastRecPtr)
8932  ereport(PANIC,
8933  (errmsg("concurrent write-ahead log activity while database system is shutting down")));
8934 
8935  /*
8936  * Remember the prior checkpoint's redo pointer, used later to determine
8937  * the point where the log can be truncated.
8938  */
8939  PriorRedoPtr = ControlFile->checkPointCopy.redo;
8940 
8941  /*
8942  * Update the control file.
8943  */
8944  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8945  if (shutdown)
8949  ControlFile->checkPointCopy = checkPoint;
8950  ControlFile->time = (pg_time_t) time(NULL);
8951  /* crash recovery should always recover to the end of WAL */
8954 
8955  /*
8956  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
8957  * unused on non-shutdown checkpoints, but seems useful to store it always
8958  * for debugging purposes.
8959  */
8963 
8965  LWLockRelease(ControlFileLock);
8966 
8967  /* Update shared-memory copy of checkpoint XID/epoch */
8969  XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
8970  XLogCtl->ckptXid = checkPoint.nextXid;
8972 
8973  /*
8974  * We are now done with critical updates; no need for system panic if we
8975  * have trouble while fooling with old log segments.
8976  */
8977  END_CRIT_SECTION();
8978 
8979  /*
8980  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
8981  */
8982  smgrpostckpt();
8983 
8984  /*
8985  * Delete old log files (those no longer needed even for previous
8986  * checkpoint or the standbys in XLOG streaming).
8987  */
8988  if (PriorRedoPtr != InvalidXLogRecPtr)
8989  {
8990  XLogSegNo _logSegNo;
8991 
8992  /* Update the average distance between checkpoints. */
8994 
8995  XLByteToSeg(PriorRedoPtr, _logSegNo, wal_segment_size);
8996  KeepLogSeg(recptr, &_logSegNo);
8997  _logSegNo--;
8998  RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, recptr);
8999  }
9000 
9001  /*
9002  * Make more log segments if needed. (Do this after recycling old log
9003  * segments, since that may supply some of the needed files.)
9004  */
9005  if (!shutdown)
9006  PreallocXlogFiles(recptr);
9007 
9008  /*
9009  * Truncate pg_subtrans if possible. We can throw away all data before
9010  * the oldest XMIN of any running transaction. No future transaction will
9011  * attempt to reference any pg_subtrans entry older than that (see Asserts
9012  * in subtrans.c). During recovery, though, we mustn't do this because
9013  * StartupSUBTRANS hasn't been called yet.
9014  */
9015  if (!RecoveryInProgress())
9017 
9018  /* Real work is done, but log and update stats before releasing lock. */
9019  LogCheckpointEnd(false);
9020 
9021  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
9022  NBuffers,
9026 
9027  LWLockRelease(CheckpointLock);
9028 }
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:8324
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:8550
static int LocalXLogInsertAllowed
Definition: xlog.c:235
bool log_checkpoints
Definition: xlog.c:102
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
int64 pg_time_t
Definition: pgtime.h:23
TransactionId ckptXid
Definition: xlog.c:583
static void WALInsertLockRelease(void)
Definition: xlog.c:1651
int wal_segment_size
Definition: xlog.c:113
pg_time_t time
Definition: pg_control.h:128
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:68
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:168
uint32 oidCount
Definition: transam.h:112
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1570
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1920
XLogRecPtr unloggedLSN
Definition: xlog.c:590
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:338
TransactionId oldestActiveXid
Definition: pg_control.h:63
void InitXLogInsert(void)
Definition: xloginsert.c:1028
TimestampTz ckpt_start_t
Definition: xlog.h:199
slock_t info_lck
Definition: xlog.c:704
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:2254
MultiXactId oldestMulti
Definition: pg_control.h:49
TimeLineID PrevTimeLineID
Definition: xlog.c:631
TimeLineID PrevTimeLineID
Definition: pg_control.h:39
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
int ckpt_segs_recycled
Definition: xlog.h:209
TransactionId oldestXid
Definition: transam.h:119
#define MemSet(start, val, len)
Definition: c.h:863
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
Definition: xlog.c:3844
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2118
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:9089
CheckPoint checkPointCopy
Definition: pg_control.h:132
XLogCtlInsert Insert
Definition: xlog.c:577
TransactionId oldestXid
Definition: pg_control.h:47
bool RecoveryInProgress(void)
Definition: xlog.c:7954
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:354
uint32 ckptXidEpoch
Definition: xlog.c:582
TransactionId nextXid
Definition: pg_control.h:43
pg_time_t time
Definition: pg_control.h:51
#define PANIC
Definition: elog.h:53
bool fullPageWrites
Definition: xlog.c:552
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2773
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
MultiXactOffset nextMultiOffset
Definition: pg_control.h:46
void UpdateControlFile(void)
Definition: xlog.c:4680
TransactionId oldestCommitTsXid
Definition: pg_control.h:52
void pfree(void *pointer)
Definition: mcxt.c:949
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:909
#define ERROR
Definition: elog.h:43
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:8465
TransactionId nextXid
Definition: transam.h:117
uint32 nextXidEpoch
Definition: pg_control.h:42
static XLogRecPtr RedoRecPtr
Definition: xlog.c:352
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:67
XLogRecPtr unloggedLSN
Definition: pg_control.h:134
static void PreallocXlogFiles(XLogRecPtr endptr)
Definition: xlog.c:3751
uint64 XLogSegNo
Definition: xlogdefs.h:34
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:177
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define InvalidTransactionId
Definition: transam.h:31
uint64 CurrBytePos
Definition: xlog.c:528
unsigned int uint32
Definition: c.h:258
XLogRecPtr RedoRecPtr
Definition: xlog.c:581
int ckpt_segs_removed
Definition: xlog.h:208
#define CHECKPOINT_FORCE
Definition: xlog.h:180
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:721
#define ereport(elevel, rest)
Definition: elog.h:122
TransactionId oldestCommitTsXid
Definition: transam.h:129
static void Insert(File file)
Definition: fd.c:1059
int ckpt_bufs_written
Definition: xlog.h:205
static void LocalSetXLogInsertAllowed(void)
Definition: xlog.c:8080
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
TransactionId newestCommitTsXid
Definition: pg_control.h:54
#define PROCARRAY_FLAGS_DEFAULT
Definition: procarray.h:50
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:9413
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
Oid oldestMultiDB
Definition: pg_control.h:50
#define XLogStandbyInfoActive()
Definition: xlog.h:160
XLogRecPtr prevCheckPoint
Definition: pg_control.h:130
static ControlFileData * ControlFile
Definition: xlog.c:715
TimeLineID ThisTimeLineID
Definition: xlog.c:181
Oid nextOid
Definition: pg_control.h:44
bool fullPageWrites
Definition: pg_control.h:41
TransactionId GetOldestXmin(Relation rel, int flags)
Definition: procarray.c:1315
void smgrpreckpt(void)
Definition: smgr.c:744
uint64 XLogRecPtr
Definition: xlogdefs.h:21
Oid oldestXidDB
Definition: pg_control.h:48
TransactionId newestCommitTsXid
Definition: transam.h:130
CheckpointStatsData CheckpointStats
Definition: xlog.c:175
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:55
MultiXactId nextMulti
Definition: pg_control.h:45
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1622
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
int ckpt_segs_added
Definition: xlog.h:207
slock_t ulsn_lck
Definition: xlog.c:591
TimeLineID ThisTimeLineID
Definition: pg_control.h:38
int errmsg(const char *fmt,...)
Definition: elog.c:797
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2092
int NBuffers
Definition: globals.c:122
#define elog
Definition: elog.h:219
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:2299
void XLogBeginInsert(void)
Definition: xloginsert.c:120
XLogRecPtr RedoRecPtr
Definition: xlog.c:550
void smgrpostckpt(void)
Definition: smgr.c:774
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:36
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:8447
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:176
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:167
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:72
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
bool CreateRestartPoint ( int  flags)

Definition at line 9159 of file xlog.c.

References XLogCtlData::archiveCleanupCommand, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStatsData::ckpt_start_t, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestXmin(), GetWalRcvWriteRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, XLogCtlData::Insert, InvalidXLogRecPtr, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, minRecoveryPoint, ControlFileData::minRecoveryPointTLI, minRecoveryPointTLI, PreallocXlogFiles(), ControlFileData::prevCheckPoint, PROCARRAY_FLAGS_DEFAULT, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, ThisTimeLineID, ControlFileData::time, timestamptz_to_str(), TruncateSUBTRANS(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

9160 {
9161  XLogRecPtr lastCheckPointRecPtr;
9162  XLogRecPtr lastCheckPointEndPtr;
9163  CheckPoint lastCheckPoint;
9164  XLogRecPtr PriorRedoPtr;
9165  TimestampTz xtime;
9166 
9167  /*
9168  * Acquire CheckpointLock to ensure only one restartpoint or checkpoint
9169  * happens at a time.
9170  */
9171  LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
9172 
9173  /* Get a local copy of the last safe checkpoint record. */
9175  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
9176  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
9177  lastCheckPoint = XLogCtl->lastCheckPoint;
9179 
9180  /*
9181  * Check that we're still in recovery mode. It's ok if we exit recovery
9182  * mode after this check, the restart point is valid anyway.
9183  */
9184  if (!RecoveryInProgress())
9185  {
9186  ereport(DEBUG2,
9187  (errmsg("skipping restartpoint, recovery has already ended")));
9188  LWLockRelease(CheckpointLock);
9189  return false;
9190  }
9191 
9192  /*
9193  * If the last checkpoint record we've replayed is already our last
9194  * restartpoint, we can't perform a new restart point. We still update
9195  * minRecoveryPoint in that case, so that if this is a shutdown restart
9196  * point, we won't start up earlier than before. That's not strictly
9197  * necessary, but when hot standby is enabled, it would be rather weird if
9198  * the database opened up for read-only connections at a point-in-time
9199  * before the last shutdown. Such time travel is still possible in case of
9200  * immediate shutdown, though.
9201  *
9202  * We don't explicitly advance minRecoveryPoint when we do create a
9203  * restartpoint. It's assumed that flushing the buffers will do that as a
9204  * side-effect.
9205  */
9206  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
9207  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
9208  {
9209  ereport(DEBUG2,
9210  (errmsg("skipping restartpoint, already performed at %X/%X",
9211  (uint32) (lastCheckPoint.redo >> 32),
9212  (uint32) lastCheckPoint.redo)));
9213 
9215  if (flags & CHECKPOINT_IS_SHUTDOWN)
9216  {
9217  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9219  ControlFile->time = (pg_time_t) time(NULL);
9221  LWLockRelease(ControlFileLock);
9222  }
9223  LWLockRelease(CheckpointLock);
9224  return false;
9225  }
9226 
9227  /*
9228  * Update the shared RedoRecPtr so that the startup process can calculate
9229  * the number of segments replayed since last restartpoint, and request a
9230  * restartpoint if it exceeds CheckPointSegments.
9231  *
9232  * Like in CreateCheckPoint(), hold off insertions to update it, although
9233  * during recovery this is just pro forma, because no WAL insertions are
9234  * happening.
9235  */
9237  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
9239 
9240  /* Also update the info_lck-protected copy */
9242  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
9244 
9245  /*
9246  * Prepare to accumulate statistics.
9247  *
9248  * Note: because it is possible for log_checkpoints to change while a
9249  * checkpoint proceeds, we always accumulate stats, even if
9250  * log_checkpoints is currently off.
9251  */
9252  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
9254 
9255  if (log_checkpoints)
9256  LogCheckpointStart(flags, true);
9257 
9258  CheckPointGuts(lastCheckPoint.redo, flags);
9259 
9260  /*
9261  * Remember the prior checkpoint's redo pointer, used later to determine
9262  * the point at which we can truncate the log.
9263  */
9264  PriorRedoPtr = ControlFile->checkPointCopy.redo;
9265 
9266  /*
9267  * Update pg_control, using current time. Check that it still shows
9268  * IN_ARCHIVE_RECOVERY state and an older checkpoint, else do nothing;
9269  * this is a quick hack to make sure nothing really bad happens if somehow
9270  * we get here after the end-of-recovery checkpoint.
9271  */
9272  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9274  ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
9275  {
9277  ControlFile->checkPoint = lastCheckPointRecPtr;
9278  ControlFile->checkPointCopy = lastCheckPoint;
9279  ControlFile->time = (pg_time_t) time(NULL);
9280 
9281  /*
9282  * Ensure minRecoveryPoint is past the checkpoint record. Normally,
9283  * this will have happened already while writing out dirty buffers,
9284  * but not necessarily - e.g. because no buffers were dirtied. We do
9285  * this because a non-exclusive base backup uses minRecoveryPoint to
9286  * determine which WAL files must be included in the backup, and the
9287  * file (or files) containing the checkpoint record must be included,
9288  * at a minimum. Note that for an ordinary restart of recovery there's
9289  * no value in having the minimum recovery point any earlier than this
9290  * anyway, because redo will begin just after the checkpoint record.
9291  */
9292  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
9293  {
9294  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
9296 
9297  /* update local copy */
9300  }
9301  if (flags & CHECKPOINT_IS_SHUTDOWN)
9304  }
9305  LWLockRelease(ControlFileLock);
9306 
9307  /*
9308  * Delete old log files (those no longer needed even for previous
9309  * checkpoint/restartpoint) to prevent the disk holding the xlog from
9310  * growing full.
9311  */
9312  if (PriorRedoPtr != InvalidXLogRecPtr)
9313  {
9314  XLogRecPtr receivePtr;
9315  XLogRecPtr replayPtr;
9316  TimeLineID replayTLI;
9317  XLogRecPtr endptr;
9318  XLogSegNo _logSegNo;
9319 
9320  /* Update the average distance between checkpoints/restartpoints. */
9322 
9323  XLByteToSeg(PriorRedoPtr, _logSegNo, wal_segment_size);
9324 
9325  /*
9326  * Get the current end of xlog replayed or received, whichever is
9327  * later.
9328  */
9329  receivePtr = GetWalRcvWriteRecPtr(NULL, NULL);
9330  replayPtr = GetXLogReplayRecPtr(&replayTLI);
9331  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
9332 
9333  KeepLogSeg(endptr, &_logSegNo);
9334  _logSegNo--;
9335 
9336  /*
9337  * Try to recycle segments on a useful timeline. If we've been
9338  * promoted since the beginning of this restartpoint, use the new
9339  * timeline chosen at end of recovery (RecoveryInProgress() sets
9340  * ThisTimeLineID in that case). If we're still in recovery, use the
9341  * timeline we're currently replaying.
9342  *
9343  * There is no guarantee that the WAL segments will be useful on the
9344  * current timeline; if recovery proceeds to a new timeline right
9345  * after this, the pre-allocated WAL segments on this timeline will
9346  * not be used, and will go wasted until recycled on the next
9347  * restartpoint. We'll live with that.
9348  */
9349  if (RecoveryInProgress())
9350  ThisTimeLineID = replayTLI;
9351 
9352  RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, endptr);
9353 
9354  /*
9355  * Make more log segments if needed. (Do this after recycling old log
9356  * segments, since that may supply some of the needed files.)
9357  */
9358  PreallocXlogFiles(endptr);
9359 
9360  /*
9361  * ThisTimeLineID is normally not set when we're still in recovery.
9362  * However, recycling/preallocating segments above needed
9363  * ThisTimeLineID to determine which timeline to install the segments
9364  * on. Reset it now, to restore the normal state of affairs for
9365  * debugging purposes.
9366  */
9367  if (RecoveryInProgress())
9368  ThisTimeLineID = 0;
9369  }
9370 
9371  /*
9372  * Truncate pg_subtrans if possible. We can throw away all data before
9373  * the oldest XMIN of any running transaction. No future transaction will
9374  * attempt to reference any pg_subtrans entry older than that (see Asserts
9375  * in subtrans.c). When hot standby is disabled, though, we mustn't do
9376  * this because StartupSUBTRANS hasn't been called yet.
9377  */
9378  if (EnableHotStandby)
9380 
9381  /* Real work is done, but log and update before releasing lock. */
9382  LogCheckpointEnd(true);
9383 
9384  xtime = GetLatestXTime();
9386  (errmsg("recovery restart point at %X/%X",
9387  (uint32) (lastCheckPoint.redo >> 32), (uint32) lastCheckPoint.redo),
9388  xtime ? errdetail("last completed transaction was at log time %s",
9389  timestamptz_to_str(xtime)) : 0));
9390 
9391  LWLockRelease(CheckpointLock);
9392 
9393  /*
9394  * Finally, execute archive_cleanup_command, if any.
9395  */
9398  "archive_cleanup_command",
9399  false);
9400 
9401  return true;
9402 }
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:8550
bool log_checkpoints
Definition: xlog.c:102
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void WALInsertLockRelease(void)
Definition: xlog.c:1651
int wal_segment_size
Definition: xlog.c:113
pg_time_t time
Definition: pg_control.h:128
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:168
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1570
int64 TimestampTz
Definition: timestamp.h:39
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2698
TimestampTz ckpt_start_t
Definition: xlog.h:199
slock_t info_lck
Definition: xlog.c:704
#define MemSet(start, val, len)
Definition: c.h:863
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
Definition: xlog.c:3844
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:9089
TimestampTz GetLatestXTime(void)
Definition: xlog.c:6146
CheckPoint checkPointCopy
Definition: pg_control.h:132
XLogCtlInsert Insert
Definition: xlog.c:577
#define LOG
Definition: elog.h:26
bool RecoveryInProgress(void)
Definition: xlog.c:7954
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:354
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:673
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
#define SpinLockAcquire(lock)
Definition: spin.h:62
void UpdateControlFile(void)
Definition: xlog.c:4680
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:8465
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
Definition: xlog.c:11177
#define DEBUG2
Definition: elog.h:24
static XLogRecPtr RedoRecPtr
Definition: xlog.c:352
static void PreallocXlogFiles(XLogRecPtr endptr)
Definition: xlog.c:3751
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errdetail(const char *fmt,...)
Definition: elog.c:873
unsigned int uint32
Definition: c.h:258
XLogRecPtr RedoRecPtr
Definition: xlog.c:581
#define ereport(elevel, rest)
Definition: elog.h:122
CheckPoint lastCheckPoint
Definition: xlog.c:675
void ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
Definition: xlogarchive.c:330
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define SpinLockRelease(lock)
Definition: spin.h:64
static TimeLineID minRecoveryPointTLI
Definition: xlog.c:826
#define PROCARRAY_FLAGS_DEFAULT
Definition: procarray.h:50
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:9413
XLogRecPtr prevCheckPoint
Definition: pg_control.h:130
static ControlFileData * ControlFile
Definition: xlog.c:715
TimeLineID ThisTimeLineID
Definition: xlog.c:181
TransactionId GetOldestXmin(Relation rel, int flags)
Definition: procarray.c:1315
uint64 XLogRecPtr
Definition: xlogdefs.h:21
CheckpointStatsData CheckpointStats
Definition: xlog.c:175
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1622
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
char archiveCleanupCommand[MAXPGPATH]
Definition: xlog.c:637
bool EnableHotStandby
Definition: xlog.c:96
TimeLineID ThisTimeLineID
Definition: pg_control.h:38
int errmsg(const char *fmt,...)
Definition: elog.c:797
XLogRecPtr RedoRecPtr
Definition: xlog.c:550
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:674
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:36
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:8447
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:176
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:167
static XLogRecPtr minRecoveryPoint
Definition: xlog.c:824
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1710
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
bool DataChecksumsEnabled ( void  )

Definition at line 4748 of file xlog.c.

References Assert, and ControlFileData::data_checksum_version.

Referenced by PageIsVerified(), PageSetChecksumCopy(), PageSetChecksumInplace(), and ReadControlFile().

4749 {
4750  Assert(ControlFile != NULL);
4751  return (ControlFile->data_checksum_version > 0);
4752 }
uint32 data_checksum_version
Definition: pg_control.h:222
static ControlFileData * ControlFile
Definition: xlog.c:715
#define Assert(condition)
Definition: c.h:681
void do_pg_abort_backup ( void  )

Definition at line 11157 of file xlog.c.

References Assert, EXCLUSIVE_BACKUP_NONE, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, XLogCtlData::Insert, XLogCtlInsert::nonExclusiveBackups, WALInsertLockAcquireExclusive(), and WALInsertLockRelease().

Referenced by base_backup_cleanup(), and nonexclusive_base_backup_cleanup().

11158 {
11162 
11165  {
11166  XLogCtl->Insert.forcePageWrites = false;
11167  }
11169 }
static void WALInsertLockRelease(void)
Definition: xlog.c:1651
XLogCtlInsert Insert
Definition: xlog.c:577
bool forcePageWrites
Definition: xlog.c:551
int nonExclusiveBackups
Definition: xlog.c:563
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:562
#define Assert(condition)
Definition: c.h:681
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1622
static XLogCtlData * XLogCtl
Definition: xlog.c:707
XLogRecPtr do_pg_start_backup ( const char *  backupidstr,
bool  fast,
TimeLineID starttli_p,
StringInfo  labelfile,
DIR tblspcdir,
List **  tablespaces,
StringInfo  tblspcmapfile,
bool  infotbssize,
bool  needtblspcmapfile 
)

Definition at line 10262 of file xlog.c.

References AllocateFile(), appendStringInfo(), appendStringInfoChar(), BACKUP_LABEL_FILE, backup_started_in_recovery, BoolGetDatum, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, dirent::d_name, StringInfoData::data, DataDir, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_NONE, EXCLUSIVE_BACKUP_STARTING, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, FreeFile(), CheckPoint::fullPageWrites, XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, StringInfoData::len, log_timezone, LW_SHARED, LWLockAcquire(), LWLockRelease(), makeStringInfo(), MAXFNAMELEN, MAXPGPATH, XLogCtlInsert::nonExclusiveBackups, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, pg_fsync(), pg_localtime(), pg_start_backup_callback(), pg_strftime(), pstrdup(), ReadDir(), RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, sendTablespace(), SESSION_BACKUP_EXCLUSIVE, SESSION_BACKUP_NON_EXCLUSIVE, sessionBackupState, tablespaceinfo::size, snprintf(), SpinLockAcquire, SpinLockRelease, TABLESPACE_MAP, CheckPoint::ThisTimeLineID, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLByteToSeg, XLogFileName, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_start_backup().

10266 {
10267  bool exclusive = (labelfile == NULL);
10268  bool backup_started_in_recovery = false;
10269  XLogRecPtr checkpointloc;
10270  XLogRecPtr startpoint;
10271  TimeLineID starttli;
10272  pg_time_t stamp_time;
10273  char strfbuf[128];
10274  char xlogfilename[MAXFNAMELEN];
10275  XLogSegNo _logSegNo;
10276  struct stat stat_buf;
10277  FILE *fp;
10278 
10279  backup_started_in_recovery = RecoveryInProgress();
10280 
10281  /*
10282  * Currently only non-exclusive backup can be taken during recovery.
10283  */
10284  if (backup_started_in_recovery && exclusive)
10285  ereport(ERROR,
10286  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10287  errmsg("recovery is in progress"),
10288  errhint("WAL control functions cannot be executed during recovery.")));
10289 
10290  /*
10291  * During recovery, we don't need to check WAL level. Because, if WAL
10292  * level is not sufficient, it's impossible to get here during recovery.
10293  */
10294  if (!backup_started_in_recovery && !XLogIsNeeded())
10295  ereport(ERROR,
10296  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10297  errmsg("WAL level not sufficient for making an online backup"),
10298  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
10299 
10300  if (strlen(backupidstr) > MAXPGPATH)
10301  ereport(ERROR,
10302  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
10303  errmsg("backup label too long (max %d bytes)",
10304  MAXPGPATH)));
10305 
10306  /*
10307  * Mark backup active in shared memory. We must do full-page WAL writes
10308  * during an on-line backup even if not doing so at other times, because
10309  * it's quite possible for the backup dump to obtain a "torn" (partially
10310  * written) copy of a database page if it reads the page concurrently with
10311  * our write to the same page. This can be fixed as long as the first
10312  * write to the page in the WAL sequence is a full-page write. Hence, we
10313  * turn on forcePageWrites and then force a CHECKPOINT, to ensure there
10314  * are no dirty pages in shared memory that might get dumped while the
10315  * backup is in progress without having a corresponding WAL record. (Once
10316  * the backup is complete, we need not force full-page writes anymore,
10317  * since we expect that any pages not modified during the backup interval
10318  * must have been correctly captured by the backup.)
10319  *
10320  * Note that forcePageWrites has no effect during an online backup from
10321  * the standby.
10322  *
10323  * We must hold all the insertion locks to change the value of
10324  * forcePageWrites, to ensure adequate interlocking against
10325  * XLogInsertRecord().
10326  */
10328  if (exclusive)
10329  {
10330  /*
10331  * At first, mark that we're now starting an exclusive backup, to
10332  * ensure that there are no other sessions currently running
10333  * pg_start_backup() or pg_stop_backup().
10334  */
10336  {
10338  ereport(ERROR,
10339  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10340  errmsg("a backup is already in progress"),
10341  errhint("Run pg_stop_backup() and try again.")));
10342  }
10344  }
10345  else
10347  XLogCtl->Insert.forcePageWrites = true;
10349 
10350  /* Ensure we release forcePageWrites if fail below */
10352  {
10353  bool gotUniqueStartpoint = false;
10354  struct dirent *de;
10355  tablespaceinfo *ti;
10356  int datadirpathlen;
10357 
10358  /*
10359  * Force an XLOG file switch before the checkpoint, to ensure that the
10360  * WAL segment the checkpoint is written to doesn't contain pages with
10361  * old timeline IDs. That would otherwise happen if you called
10362  * pg_start_backup() right after restoring from a PITR archive: the
10363  * first WAL segment containing the startup checkpoint has pages in
10364  * the beginning with the old timeline ID. That can cause trouble at
10365  * recovery: we won't have a history file covering the old timeline if
10366  * pg_wal directory was not included in the base backup and the WAL
10367  * archive was cleared too before starting the backup.
10368  *
10369  * This also ensures that we have emitted a WAL page header that has
10370  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
10371  * Therefore, if a WAL archiver (such as pglesslog) is trying to
10372  * compress out removable backup blocks, it won't remove any that
10373  * occur after this point.
10374  *
10375  * During recovery, we skip forcing XLOG file switch, which means that
10376  * the backup taken during recovery is not available for the special
10377  * recovery case described above.
10378  */
10379  if (!backup_started_in_recovery)
10380  RequestXLogSwitch(false);
10381 
10382  do
10383  {
10384  bool checkpointfpw;
10385 
10386  /*
10387  * Force a CHECKPOINT. Aside from being necessary to prevent torn
10388  * page problems, this guarantees that two successive backup runs
10389  * will have different checkpoint positions and hence different
10390  * history file names, even if nothing happened in between.
10391  *
10392  * During recovery, establish a restartpoint if possible. We use
10393  * the last restartpoint as the backup starting checkpoint. This
10394  * means that two successive backup runs can have same checkpoint
10395  * positions.
10396  *
10397  * Since the fact that we are executing do_pg_start_backup()
10398  * during recovery means that checkpointer is running, we can use
10399  * RequestCheckpoint() to establish a restartpoint.
10400  *
10401  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
10402  * passing fast = true). Otherwise this can take awhile.
10403  */
10405  (fast ? CHECKPOINT_IMMEDIATE : 0));
10406 
10407  /*
10408  * Now we need to fetch the checkpoint record location, and also
10409  * its REDO pointer. The oldest point in WAL that would be needed
10410  * to restore starting from the checkpoint is precisely the REDO
10411  * pointer.
10412  */
10413  LWLockAcquire(ControlFileLock, LW_SHARED);
10414  checkpointloc = ControlFile->checkPoint;
10415  startpoint = ControlFile->checkPointCopy.redo;
10417  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
10418  LWLockRelease(ControlFileLock);
10419 
10420  if (backup_started_in_recovery)
10421  {
10422  XLogRecPtr recptr;
10423 
10424  /*
10425  * Check to see if all WAL replayed during online backup
10426  * (i.e., since last restartpoint used as backup starting
10427  * checkpoint) contain full-page writes.
10428  */
10430  recptr = XLogCtl->lastFpwDisableRecPtr;
10432 
10433  if (!checkpointfpw || startpoint <= recptr)
10434  ereport(ERROR,
10435  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10436  errmsg("WAL generated with full_page_writes=off was replayed "
10437  "since last restartpoint"),
10438  errhint("This means that the backup being taken on the standby "
10439  "is corrupt and should not be used. "
10440  "Enable full_page_writes and run CHECKPOINT on the master, "
10441  "and then try an online backup again.")));
10442 
10443  /*
10444  * During recovery, since we don't use the end-of-backup WAL
10445  * record and don't write the backup history file, the
10446  * starting WAL location doesn't need to be unique. This means
10447  * that two base backups started at the same time might use
10448  * the same checkpoint as starting locations.
10449  */
10450  gotUniqueStartpoint = true;
10451  }
10452 
10453  /*
10454  * If two base backups are started at the same time (in WAL sender
10455  * processes), we need to make sure that they use different
10456  * checkpoints as starting locations, because we use the starting
10457  * WAL location as a unique identifier for the base backup in the
10458  * end-of-backup WAL record and when we write the backup history
10459  * file. Perhaps it would be better generate a separate unique ID
10460  * for each backup instead of forcing another checkpoint, but
10461  * taking a checkpoint right after another is not that expensive
10462  * either because only few buffers have been dirtied yet.
10463  */
10465  if (XLogCtl->Insert.lastBackupStart < startpoint)
10466  {
10467  XLogCtl->Insert.lastBackupStart = startpoint;
10468  gotUniqueStartpoint = true;
10469  }
10471  } while (!gotUniqueStartpoint);
10472 
10473  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
10474  XLogFileName(xlogfilename, starttli, _logSegNo, wal_segment_size);
10475 
10476  /*
10477  * Construct tablespace_map file
10478  */
10479  if (exclusive)
10480  tblspcmapfile = makeStringInfo();
10481 
10482  datadirpathlen = strlen(DataDir);
10483 
10484  /* Collect information about all tablespaces */
10485  while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
10486  {
10487  char fullpath[MAXPGPATH + 10];
10488  char linkpath[MAXPGPATH];
10489  char *relpath = NULL;
10490  int rllen;
10491  StringInfoData buflinkpath;
10492  char *s = linkpath;
10493 
10494  /* Skip special stuff */
10495  if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
10496  continue;
10497 
10498  snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
10499 
10500 #if defined(HAVE_READLINK) || defined(WIN32)
10501  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
10502  if (rllen < 0)
10503  {
10504  ereport(WARNING,
10505  (errmsg("could not read symbolic link \"%s\": %m",
10506  fullpath)));
10507  continue;
10508  }
10509  else if (rllen >= sizeof(linkpath))
10510  {
10511  ereport(WARNING,
10512  (errmsg("symbolic link \"%s\" target is too long",
10513  fullpath)));
10514  continue;
10515  }
10516  linkpath[rllen] = '\0';
10517 
10518  /*
10519  * Add the escape character '\\' before newline in a string to
10520  * ensure that we can distinguish between the newline in the
10521  * tablespace path and end of line while reading tablespace_map
10522  * file during archive recovery.
10523  */
10524  initStringInfo(&buflinkpath);
10525 
10526  while (*s)
10527  {
10528  if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
10529  appendStringInfoChar(&buflinkpath, '\\');
10530  appendStringInfoChar(&buflinkpath, *s++);
10531  }
10532 
10533 
10534  /*
10535  * Relpath holds the relative path of the tablespace directory
10536  * when it's located within PGDATA, or NULL if it's located
10537  * elsewhere.
10538  */
10539  if (rllen > datadirpathlen &&
10540  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
10541  IS_DIR_SEP(linkpath[datadirpathlen]))
10542  relpath = linkpath + datadirpathlen + 1;
10543 
10544  ti = palloc(sizeof(tablespaceinfo));
10545  ti->oid = pstrdup(de->d_name);
10546  ti->path = pstrdup(buflinkpath.data);
10547  ti->rpath = relpath ? pstrdup(relpath) : NULL;
10548  ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
10549 
10550  if (tablespaces)
10551  *tablespaces = lappend(*tablespaces, ti);
10552 
10553  appendStringInfo(tblspcmapfile, "%s %s\n", ti->oid, ti->path);
10554 
10555  pfree(buflinkpath.data);
10556 #else
10557 
10558  /*
10559  * If the platform does not have symbolic links, it should not be
10560  * possible to have tablespaces - clearly somebody else created
10561  * them. Warn about it and ignore.
10562  */
10563  ereport(WARNING,
10564  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
10565  errmsg("tablespaces are not supported on this platform")));
10566 #endif
10567  }
10568 
10569  /*
10570  * Construct backup label file
10571  */
10572  if (exclusive)
10573  labelfile = makeStringInfo();
10574 
10575  /* Use the log timezone here, not the session timezone */
10576  stamp_time = (pg_time_t) time(NULL);
10577  pg_strftime(strfbuf, sizeof(strfbuf),
10578  "%Y-%m-%d %H:%M:%S %Z",
10579  pg_localtime(&stamp_time, log_timezone));
10580  appendStringInfo(labelfile, "START WAL LOCATION: %X/%X (file %s)\n",
10581  (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename);
10582  appendStringInfo(labelfile, "CHECKPOINT LOCATION: %X/%X\n",
10583  (uint32) (checkpointloc >> 32), (uint32) checkpointloc);
10584  appendStringInfo(labelfile, "BACKUP METHOD: %s\n",
10585  exclusive ? "pg_start_backup" : "streamed");
10586  appendStringInfo(labelfile, "BACKUP FROM: %s\n",
10587  backup_started_in_recovery ? "standby" : "master");
10588  appendStringInfo(labelfile, "START TIME: %s\n", strfbuf);
10589  appendStringInfo(labelfile, "LABEL: %s\n", backupidstr);
10590 
10591  /*
10592  * Okay, write the file, or return its contents to caller.
10593  */
10594  if (exclusive)
10595  {
10596  /*
10597  * Check for existing backup label --- implies a backup is already
10598  * running. (XXX given that we checked exclusiveBackupState
10599  * above, maybe it would be OK to just unlink any such label
10600  * file?)
10601  */
10602  if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
10603  {
10604  if (errno != ENOENT)
10605  ereport(ERROR,
10607  errmsg("could not stat file \"%s\": %m",
10608  BACKUP_LABEL_FILE)));
10609  }
10610  else
10611  ereport(ERROR,
10612  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10613  errmsg("a backup is already in progress"),
10614  errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
10615  BACKUP_LABEL_FILE)));
10616 
10617  fp = AllocateFile(BACKUP_LABEL_FILE, "w");
10618 
10619  if (!fp)
10620  ereport(ERROR,
10622  errmsg("could not create file \"%s\": %m",
10623  BACKUP_LABEL_FILE)));
10624  if (fwrite(labelfile->data, labelfile->len, 1, fp) != 1 ||
10625  fflush(fp) != 0 ||
10626  pg_fsync(fileno(fp)) != 0 ||
10627  ferror(fp) ||
10628  FreeFile(fp))
10629  ereport(ERROR,
10631  errmsg("could not write file \"%s\": %m",
10632  BACKUP_LABEL_FILE)));
10633  /* Allocated locally for exclusive backups, so free separately */
10634  pfree(labelfile->data);
10635  pfree(labelfile);
10636 
10637  /* Write backup tablespace_map file. */
10638  if (tblspcmapfile->len > 0)
10639  {
10640  if (stat(TABLESPACE_MAP, &stat_buf) != 0)
10641  {
10642  if (errno != ENOENT)
10643  ereport(ERROR,
10645  errmsg("could not stat file \"%s\": %m",
10646  TABLESPACE_MAP)));
10647  }
10648  else
10649  ereport(ERROR,
10650  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10651  errmsg("a backup is already in progress"),
10652  errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
10653  TABLESPACE_MAP)));
10654 
10655  fp = AllocateFile(TABLESPACE_MAP, "w");
10656 
10657  if (!fp)
10658  ereport(ERROR,
10660  errmsg("could not create file \"%s\": %m",
10661  TABLESPACE_MAP)));
10662  if (fwrite(tblspcmapfile->data, tblspcmapfile->len, 1, fp) != 1 ||
10663  fflush(fp) != 0 ||
10664  pg_fsync(fileno(fp)) != 0 ||
10665  ferror(fp) ||
10666  FreeFile(fp))
10667  ereport(ERROR,
10669  errmsg("could not write file \"%s\": %m",
10670  TABLESPACE_MAP)));
10671  }
10672 
10673  /* Allocated locally for exclusive backups, so free separately */
10674  pfree(tblspcmapfile->data);
10675  pfree(tblspcmapfile);
10676  }
10677  }
10679 
10680  /*
10681  * Mark that start phase has correctly finished for an exclusive backup.
10682  * Session-level locks are updated as well to reflect that state.
10683  */
10684  if (exclusive)
10685  {
10690  }
10691  else
10693 
10694  /*
10695  * We're done. As a convenience, return the starting WAL location.
10696  */
10697  if (starttli_p)
10698  *starttli_p = starttli;
10699  return startpoint;
10700 }
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:122
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:9490
int errhint(const char *fmt,...)
Definition: elog.c:987
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1651
int wal_segment_size
Definition: xlog.c:113
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:702
static SessionBackupState sessionBackupState
Definition: xlog.c:512
XLogRecPtr lastBackupStart
Definition: xlog.c:564
char * pstrdup(const char *in)
Definition: mcxt.c:1076
#define XLogIsNeeded()
Definition: xlog.h:146
char * rpath
Definition: basebackup.h:28
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
slock_t info_lck
Definition: xlog.c:704
int errcode(int sqlerrcode)
Definition: elog.c:575
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
CheckPoint checkPointCopy
Definition: pg_control.h:132
XLogCtlInsert Insert
Definition: xlog.c:577
bool RecoveryInProgress(void)
Definition: xlog.c:7954
static bool backup_started_in_recovery
Definition: basebackup.c:73
Definition: dirent.h:9
#define IS_DIR_SEP(ch)
Definition: port.h:75
pg_tz * log_timezone
Definition: pgtz.c:31
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
#define TABLESPACE_MAP
Definition: xlog.h:325
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
void pfree(void *pointer)
Definition: mcxt.c:949
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:78
bool forcePageWrites
Definition: xlog.c:551
#define ERROR
Definition: elog.h:43
struct stat stat_buf
Definition: pg_standby.c:103
#define MAXPGPATH
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errcode_for_file_access(void)
Definition: elog.c:598
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2117
unsigned int uint32
Definition: c.h:258
int64 sendTablespace(char *path, bool sizeonly)
Definition: basebackup.c:910
#define CHECKPOINT_FORCE
Definition: xlog.h:180
#define ereport(elevel, rest)
Definition: elog.h:122
List * lappend(List *list, void *datum)
Definition: list.c:128
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
#define WARNING
Definition: elog.h:40
int nonExclusiveBackups
Definition: xlog.c:563
#define MAXFNAMELEN
#define SpinLockRelease(lock)
Definition: spin.h:64
static void pg_start_backup_callback(int code, Datum arg)
Definition: xlog.c:10704
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:562
uintptr_t Datum
Definition: postgres.h:372
static ControlFileData * ControlFile
Definition: xlog.c:715
#define BoolGetDatum(X)
Definition: postgres.h:408
bool fullPageWrites
Definition: pg_control.h:41
#define CHECKPOINT_WAIT
Definition: xlog.h:184
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2433
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1622
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1314
int FreeFile(FILE *file)
Definition: fd.c:2309
void * palloc(Size size)
Definition: mcxt.c:848
TimeLineID ThisTimeLineID
Definition: pg_control.h:38
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:179
#define relpath(rnode, forknum)
Definition: relpath.h:71
char * DataDir
Definition: globals.c:60
#define BACKUP_LABEL_FILE
Definition: xlog.h:322
int pg_fsync(int fd)
Definition: fd.c:338
char d_name[MAX_PATH]
Definition: dirent.h:14
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:36
void RequestCheckpoint(int flags)
Definition: checkpointer.c:967
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
XLogRecPtr do_pg_stop_backup ( char *  labelfile,
bool  waitforarchive,
TimeLineID stoptli_p 
)

Definition at line 10770 of file xlog.c.

References AllocateFile(), Assert, BACKUP_LABEL_FILE, backup_started_in_recovery, BackupHistoryFileName, BackupHistoryFilePath, BoolGetDatum, CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), DEBUG1, durable_unlink(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_NONE, EXCLUSIVE_BACKUP_STOPPING, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::lastFpwDisableRecPtr, log_timezone, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlInsert::nonExclusiveBackups, NOTICE, palloc(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, pg_localtime(), pg_stop_backup_callback(), pg_strftime(), pg_usleep(), RecoveryInProgress(), remaining, RequestXLogSwitch(), SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, TABLESPACE_MAP, ThisTimeLineID, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogFileName, XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), pg_stop_backup(), and pg_stop_backup_v2().

10771 {
10772  bool exclusive = (labelfile == NULL);
10773  bool backup_started_in_recovery = false;
10774  XLogRecPtr startpoint;
10775  XLogRecPtr stoppoint;
10776  TimeLineID stoptli;
10777  pg_time_t stamp_time;
10778  char strfbuf[128];
10779  char histfilepath[MAXPGPATH];
10780  char startxlogfilename[MAXFNAMELEN];
10781  char stopxlogfilename[MAXFNAMELEN];
10782  char lastxlogfilename[MAXFNAMELEN];
10783  char histfilename[MAXFNAMELEN];
10784  char backupfrom[20];
10785  XLogSegNo _logSegNo;
10786  FILE *lfp;
10787  FILE *fp;
10788  char ch;
10789  int seconds_before_warning;
10790  int waits = 0;
10791  bool reported_waiting = false;
10792  char *remaining;
10793  char *ptr;
10794  uint32 hi,
10795  lo;
10796 
10797  backup_started_in_recovery = RecoveryInProgress();
10798 
10799  /*
10800  * Currently only non-exclusive backup can be taken during recovery.
10801  */
10802  if (backup_started_in_recovery && exclusive)
10803  ereport(ERROR,
10804  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10805  errmsg("recovery is in progress"),
10806  errhint("WAL control functions cannot be executed during recovery.")));
10807 
10808  /*
10809  * During recovery, we don't need to check WAL level. Because, if WAL
10810  * level is not sufficient, it's impossible to get here during recovery.
10811  */
10812  if (!backup_started_in_recovery && !XLogIsNeeded())
10813  ereport(ERROR,
10814  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10815  errmsg("WAL level not sufficient for making an online backup"),
10816  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
10817 
10818  if (exclusive)
10819  {
10820  /*
10821  * At first, mark that we're now stopping an exclusive backup, to
10822  * ensure that there are no other sessions currently running
10823  * pg_start_backup() or pg_stop_backup().
10824  */
10827  {
10829  ereport(ERROR,
10830  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10831  errmsg("exclusive backup not in progress")));
10832  }
10835 
10836  /*
10837  * Remove backup_label. In case of failure, the state for an exclusive
10838  * backup is switched back to in-progress.
10839  */
10841  {
10842  /*
10843  * Read the existing label file into memory.
10844  */
10845  struct stat statbuf;
10846  int r;
10847 
10848  if (stat(BACKUP_LABEL_FILE, &statbuf))
10849  {
10850  /* should not happen per the upper checks */
10851  if (errno != ENOENT)
10852  ereport(ERROR,
10854  errmsg("could not stat file \"%s\": %m",
10855  BACKUP_LABEL_FILE)));
10856  ereport(ERROR,
10857  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10858  errmsg("a backup is not in progress")));
10859  }
10860 
10861  lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
10862  if (!lfp)
10863  {
10864  ereport(ERROR,
10866  errmsg("could not read file \"%s\": %m",
10867  BACKUP_LABEL_FILE)));
10868  }
10869  labelfile = palloc(statbuf.st_size + 1);
10870  r = fread(labelfile, statbuf.st_size, 1, lfp);
10871  labelfile[statbuf.st_size] = '\0';
10872 
10873  /*
10874  * Close and remove the backup label file
10875  */
10876  if (r != 1 || ferror(lfp) || FreeFile(lfp))
10877  ereport(ERROR,
10879  errmsg("could not read file \"%s\": %m",
10880  BACKUP_LABEL_FILE)));
10882 
10883  /*
10884  * Remove tablespace_map file if present, it is created only if
10885  * there are tablespaces.
10886  */
10888  }
10890  }
10891 
10892  /*
10893  * OK to update backup counters and forcePageWrites
10894  */
10896  if (exclusive)
10897  {
10899  }
10900  else
10901  {
10902  /*
10903  * The user-visible pg_start/stop_backup() functions that operate on
10904  * exclusive backups can be called at any time, but for non-exclusive
10905  * backups, it is expected that each do_pg_start_backup() call is
10906  * matched by exactly one do_pg_stop_backup() call.
10907  */
10910  }
10911 
10914  {
10915  XLogCtl->Insert.forcePageWrites = false;
10916  }
10918 
10919  /* Clean up session-level lock */
10921 
10922  /*
10923  * Read and parse the START WAL LOCATION line (this code is pretty crude,
10924  * but we are not expecting any variability in the file format).
10925  */
10926  if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c",
10927  &hi, &lo, startxlogfilename,
10928  &ch) != 4 || ch != '\n')
10929  ereport(ERROR,
10930  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10931  errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
10932  startpoint = ((uint64) hi) << 32 | lo;
10933  remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */
10934 
10935  /*
10936  * Parse the BACKUP FROM line. If we are taking an online backup from the
10937  * standby, we confirm that the standby has not been promoted during the
10938  * backup.
10939  */
10940  ptr = strstr(remaining, "BACKUP FROM:");
10941  if (!ptr || sscanf(ptr, "BACKUP FROM: %19s\n", backupfrom) != 1)
10942  ereport(ERROR,
10943  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10944  errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
10945  if (strcmp(backupfrom, "standby") == 0 && !backup_started_in_recovery)
10946  ereport(ERROR,
10947  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10948  errmsg("the standby was promoted during online backup"),
10949  errhint("This means that the backup being taken is corrupt "
10950  "and should not be used. "
10951  "Try taking another online backup.")));
10952 
10953  /*
10954  * During recovery, we don't write an end-of-backup record. We assume that
10955  * pg_control was backed up last and its minimum recovery point can be
10956  * available as the backup end location. Since we don't have an
10957  * end-of-backup record, we use the pg_control value to check whether
10958  * we've reached the end of backup when starting recovery from this
10959  * backup. We have no way of checking if pg_control wasn't backed up last
10960  * however.
10961  *
10962  * We don't force a switch to new WAL file but it is still possible to
10963  * wait for all the required files to be archived if waitforarchive is
10964  * true. This is okay if we use the backup to start a standby and fetch
10965  * the missing WAL using streaming replication. But in the case of an
10966  * archive recovery, a user should set waitforarchive to true and wait for
10967  * them to be archived to ensure that all the required files are
10968  * available.
10969  *
10970  * We return the current minimum recovery point as the backup end
10971  * location. Note that it can be greater than the exact backup end
10972  * location if the minimum recovery point is updated after the backup of
10973  * pg_control. This is harmless for current uses.
10974  *
10975  * XXX currently a backup history file is for informational and debug
10976  * purposes only. It's not essential for an online backup. Furthermore,
10977  * even if it's created, it will not be archived during recovery because
10978  * an archiver is not invoked. So it doesn't seem worthwhile to write a
10979  * backup history file during recovery.
10980  */
10981  if (backup_started_in_recovery)
10982  {
10983  XLogRecPtr recptr;
10984 
10985  /*
10986  * Check to see if all WAL replayed during online backup contain
10987  * full-page writes.
10988  */
10990  recptr = XLogCtl->lastFpwDisableRecPtr;
10992 
10993  if (startpoint <= recptr)
10994  ereport(ERROR,
10995  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10996  errmsg("WAL generated with full_page_writes=off was replayed "
10997  "during online backup"),
10998  errhint("This means that the backup being taken on the standby "
10999  "is corrupt and should not be used. "
11000  "Enable full_page_writes and run CHECKPOINT on the master, "
11001  "and then try an online backup again.")));
11002 
11003 
11004  LWLockAcquire(ControlFileLock, LW_SHARED);
11005  stoppoint = ControlFile->minRecoveryPoint;
11006  stoptli = ControlFile->minRecoveryPointTLI;
11007  LWLockRelease(ControlFileLock);
11008  }
11009  else
11010  {
11011  /*
11012  * Write the backup-end xlog record
11013  */
11014  XLogBeginInsert();
11015  XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
11016  stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
11017  stoptli = ThisTimeLineID;
11018 
11019  /*
11020  * Force a switch to a new xlog segment file, so that the backup is
11021  * valid as soon as archiver moves out the current segment file.
11022  */
11023  RequestXLogSwitch(false);
11024 
11025  XLByteToPrevSeg(stoppoint, _logSegNo, wal_segment_size);
11026  XLogFileName(stopxlogfilename, stoptli, _logSegNo, wal_segment_size);
11027 
11028  /* Use the log timezone here, not the session timezone */
11029  stamp_time = (pg_time_t) time(NULL);
11030  pg_strftime(strfbuf, sizeof(strfbuf),
11031  "%Y-%m-%d %H:%M:%S %Z",
11032  pg_localtime(&stamp_time, log_timezone));
11033 
11034  /*
11035  * Write the backup history file
11036  */
11037  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
11038  BackupHistoryFilePath(histfilepath, stoptli, _logSegNo,
11039  startpoint, wal_segment_size);
11040  fp = AllocateFile(histfilepath, "w");
11041  if (!fp)
11042  ereport(ERROR,
11044  errmsg("could not create file \"%s\": %m",
11045  histfilepath)));
11046  fprintf(fp, "START WAL LOCATION: %X/%X (file %s)\n",
11047  (uint32) (startpoint >> 32), (uint32) startpoint, startxlogfilename);
11048  fprintf(fp, "STOP WAL LOCATION: %X/%X (file %s)\n",
11049  (uint32) (stoppoint >> 32), (uint32) stoppoint, stopxlogfilename);
11050  /* transfer remaining lines from label to history file */
11051  fprintf(fp, "%s", remaining);
11052  fprintf(fp, "STOP TIME: %s\n", strfbuf);
11053  if (fflush(fp) || ferror(fp) || FreeFile(fp))
11054  ereport(ERROR,
11056  errmsg("could not write file \"%s\": %m",
11057  histfilepath)));
11058 
11059  /*
11060  * Clean out any no-longer-needed history files. As a side effect,
11061  * this will post a .ready file for the newly created history file,
11062  * notifying the archiver that history file may be archived
11063  * immediately.
11064  */
11066  }
11067 
11068  /*
11069  * If archiving is enabled, wait for all the required WAL files to be
11070  * archived before returning. If archiving isn't enabled, the required WAL
11071  * needs to be transported via streaming replication (hopefully with
11072  * wal_keep_segments set high enough), or some more exotic mechanism like
11073  * polling and copying files from pg_wal with script. We have no knowledge
11074  * of those mechanisms, so it's up to the user to ensure that he gets all
11075  * the required WAL.
11076  *
11077  * We wait until both the last WAL file filled during backup and the
11078  * history file have been archived, and assume that the alphabetic sorting
11079  * property of the WAL files ensures any earlier WAL files are safely
11080  * archived as well.
11081  *
11082  * We wait forever, since archive_command is supposed to work and we
11083  * assume the admin wanted his backup to work completely. If you don't
11084  * wish to wait, then either waitforarchive should be passed in as false,
11085  * or you can set statement_timeout. Also, some notices are issued to
11086  * clue in anyone who might be doing this interactively.
11087  */
11088 
11089  if (waitforarchive &&
11090  ((!backup_started_in_recovery && XLogArchivingActive()) ||
11091  (backup_started_in_recovery && XLogArchivingAlways())))
11092  {
11093  XLByteToPrevSeg(stoppoint, _logSegNo, wal_segment_size);
11094  XLogFileName(lastxlogfilename, stoptli, _logSegNo, wal_segment_size);
11095 
11096  XLByteToSeg(startpoint, _logSegNo, wal_segment_size);
11097  BackupHistoryFileName(histfilename, stoptli, _logSegNo,
11098  startpoint, wal_segment_size);
11099 
11100  seconds_before_warning = 60;
11101  waits = 0;
11102 
11103  while (XLogArchiveIsBusy(lastxlogfilename) ||
11104  XLogArchiveIsBusy(histfilename))
11105  {
11107 
11108  if (!reported_waiting && waits > 5)
11109  {
11110  ereport(NOTICE,
11111  (errmsg("pg_stop_backup cleanup done, waiting for required WAL segments to be archived")));
11112  reported_waiting = true;
11113  }
11114 
11115  pg_usleep(1000000L);
11116 
11117  if (++waits >= seconds_before_warning)
11118  {
11119  seconds_before_warning *= 2; /* This wraps in >10 years... */
11120  ereport(WARNING,
11121  (errmsg("pg_stop_backup still waiting for all required WAL segments to be archived (%d seconds elapsed)",
11122  waits),
11123  errhint("Check that your archive_command is executing properly. "
11124  "pg_stop_backup can be canceled safely, "
11125  "but the database backup will not be usable without all the WAL segments.")));
11126  }
11127  }
11128 
11129  ereport(NOTICE,
11130  (errmsg("pg_stop_backup complete, all required WAL segments have been archived")));
11131  }
11132  else if (waitforarchive)
11133  ereport(NOTICE,
11134  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
11135 
11136  /*
11137  * We're done. As a convenience, return the ending WAL location.
11138  */
11139  if (stoptli_p)
11140  *stoptli_p = stoptli;
11141  return stoppoint;
11142 }
int remaining
Definition: informix.c:692
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:122
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:9490
#define DEBUG1
Definition: elog.h:25
int errhint(const char *fmt,...)
Definition: elog.c:987
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1651
int wal_segment_size
Definition: xlog.c:113
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:168
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:702
static SessionBackupState sessionBackupState
Definition: xlog.c:512
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogIsNeeded()
Definition: xlog.h:146
slock_t info_lck
Definition: xlog.c:704
int errcode(int sqlerrcode)
Definition: elog.c:575
XLogCtlInsert Insert
Definition: xlog.c:577
#define BackupHistoryFileName(fname, tli, logSegNo, startpoint, wal_segsz_bytes)
bool RecoveryInProgress(void)
Definition: xlog.c:7954
static bool backup_started_in_recovery
Definition: basebackup.c:73
pg_tz * log_timezone
Definition: pgtz.c:31
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
#define TABLESPACE_MAP
Definition: xlog.h:325
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define XLogArchivingAlways()
Definition: xlog.h:138
bool forcePageWrites
Definition: xlog.c:551
#define ERROR
Definition: elog.h:43
static void CleanupBackupHistory(void)
Definition: xlog.c:4114
#define MAXPGPATH
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errcode_for_file_access(void)
Definition: elog.c:598
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2117
unsigned int uint32
Definition: c.h:258
#define ereport(elevel, rest)
Definition: elog.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:72
#define WARNING
Definition: elog.h:40
int nonExclusiveBackups
Definition: xlog.c:563
#define MAXFNAMELEN
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define SpinLockRelease(lock)
Definition: spin.h:64
#define BackupHistoryFilePath(path, tli, logSegNo, startpoint, wal_segsz_bytes)
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:562
uintptr_t Datum
Definition: postgres.h:372
static ControlFileData * ControlFile
Definition: xlog.c:715
#define BoolGetDatum(X)
Definition: postgres.h:408
TimeLineID ThisTimeLineID
Definition: xlog.c:181
#define NOTICE
Definition: elog.h:37
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:659
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:681
#define XLogArchivingActive()
Definition: xlog.h:135
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1622
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
static void pg_stop_backup_callback(int code, Datum arg)
Definition: xlog.c:10733
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1314
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:681
int FreeFile(FILE *file)
Definition: fd.c:2309
void * palloc(Size size)
Definition: mcxt.c:848
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define BACKUP_LABEL_FILE
Definition: xlog.h:322
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
void XLogBeginInsert(void)
Definition: xloginsert.c:120
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:167
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
SessionBackupState get_backup_status ( void  )

Definition at line 10751 of file xlog.c.

References sessionBackupState.

Referenced by pg_start_backup(), pg_stop_backup(), and pg_stop_backup_v2().

10752 {
10753  return sessionBackupState;
10754 }
static SessionBackupState sessionBackupState
Definition: xlog.c:512
TimestampTz GetCurrentChunkReplayStartTime ( void  )

Definition at line 6176 of file xlog.c.

References XLogCtlData::currentChunkStartTime, XLogCtlData::info_lck, SpinLockAcquire, and SpinLockRelease.

Referenced by GetReplicationApplyDelay().

6177 {
6178  TimestampTz xtime;
6179 
6181  xtime = XLogCtl->currentChunkStartTime;
6183 
6184  return xtime;
6185 }
int64 TimestampTz
Definition: timestamp.h:39
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:707
TimestampTz currentChunkStartTime
Definition: xlog.c:694
XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4764 of file xlog.c.

References SpinLockAcquire, SpinLockRelease, XLogCtlData::ulsn_lck, and XLogCtlData::unloggedLSN.

Referenced by gistGetFakeLSN().

4765 {
4766  XLogRecPtr nextUnloggedLSN;
4767 
4768  /* increment the unloggedLSN counter, need SpinLock */
4770  nextUnloggedLSN = XLogCtl->unloggedLSN++;
4772 
4773  return nextUnloggedLSN;
4774 }
XLogRecPtr unloggedLSN
Definition: xlog.c:590
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static XLogCtlData * XLogCtl
Definition: xlog.c:707
slock_t ulsn_lck
Definition: xlog.c:591
XLogRecPtr GetFlushRecPtr ( void  )

Definition at line 8306 of file xlog.c.

References XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::LogwrtResult, SpinLockAcquire, and SpinLockRelease.

Referenced by get_flush_position(), IdentifySystem(), pg_current_wal_flush_lsn(), pg_logical_slot_get_changes_guts(), read_local_xlog_page(), StartReplication(), WalSndWaitForWal(), XLogSendLogical(), and XLogSendPhysical().

8307 {
8311 
8312  return LogwrtResult.Flush;
8313 }
static XLogwrtResult LogwrtResult
Definition: xlog.c:751
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
XLogwrtResult LogwrtResult
Definition: xlog.c:601
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:707
XLogRecPtr Flush
Definition: xlog.c:426
void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 8275 of file xlog.c.

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

8276 {
8277  *RedoRecPtr_p = RedoRecPtr;
8278  *doPageWrites_p = doPageWrites;
8279 }
static bool doPageWrites
Definition: xlog.c:359
static XLogRecPtr RedoRecPtr
Definition: xlog.c:352
XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 8290 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, and XLogwrtRqst::Write.

Referenced by CheckpointerMain(), and IsCheckpointOnSchedule().

8291 {
8292  XLogRecPtr recptr;
8293 
8295  recptr = XLogCtl->LogwrtRqst.Write;
8297 
8298  return recptr;
8299 }
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr Write
Definition: xlog.c:419
XLogwrtRqst LogwrtRqst
Definition: xlog.c:580
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static XLogCtlData * XLogCtl
Definition: xlog.c:707
XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 8324 of file xlog.c.

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and NUM_XLOGINSERT_LOCKS.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

8325 {
8327  int i;
8328 
8329  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
8330  {
8331  XLogRecPtr last_important;
8332 
8333  /*
8334  * Need to take a lock to prevent torn reads of the LSN, which are
8335  * possible on some of the supported platforms. WAL insert locks only
8336  * support exclusive mode, so we have to use that.
8337  */
8339  last_important = WALInsertLocks[i].l.lastImportantAt;
8340  LWLockRelease(&WALInsertLocks[i].l.lock);
8341 
8342  if (res < last_important)
8343  res = last_important;
8344  }
8345 
8346  return res;
8347 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr lastImportantAt
Definition: xlog.c:469
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:120
WALInsertLock l
Definition: xlog.c:481
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
uint64 XLogRecPtr
Definition: xlogdefs.h:21
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
int i
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:710
TimestampTz GetLatestXTime ( void  )

Definition at line 6146 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::recoveryLastXTime, SpinLockAcquire, and SpinLockRelease.

Referenced by CreateRestartPoint(), pg_last_xact_replay_timestamp(), and StartupXLOG().

6147 {
6148  TimestampTz xtime;
6149 
6151  xtime = XLogCtl->recoveryLastXTime;
6153 
6154  return xtime;
6155 }
int64 TimestampTz
Definition: timestamp.h:39
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
TimestampTz recoveryLastXTime
Definition: xlog.c:688
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:707
char* GetMockAuthenticationNonce ( void  )

Definition at line 4738 of file xlog.c.

References Assert, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

4739 {
4740  Assert(ControlFile != NULL);
4742 }
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:229
static ControlFileData * ControlFile
Definition: xlog.c:715
#define Assert(condition)
Definition: c.h:681
void GetNextXidAndEpoch ( TransactionId xid,
uint32 epoch 
)

Definition at line 8375 of file xlog.c.

References XLogCtlData::ckptXid, XLogCtlData::ckptXidEpoch, XLogCtlData::info_lck, ReadNewTransactionId(), SpinLockAcquire, and SpinLockRelease.

Referenced by load_xid_epoch(), TransactionIdInRecentPast(), and XLogWalRcvSendHSFeedback().

8376 {
8377  uint32 ckptXidEpoch;
8378  TransactionId ckptXid;
8379  TransactionId nextXid;
8380 
8381  /* Must read checkpoint info first, else have race condition */
8383  ckptXidEpoch = XLogCtl->ckptXidEpoch;
8384  ckptXid = XLogCtl->ckptXid;
8386 
8387  /* Now fetch current nextXid */
8388  nextXid = ReadNewTransactionId();
8389 
8390  /*
8391  * nextXid is certainly logically later than ckptXid. So if it's
8392  * numerically less, it must have wrapped into the next epoch.
8393  */
8394  if (nextXid < ckptXid)
8395  ckptXidEpoch++;
8396 
8397  *xid = nextXid;
8398  *epoch = ckptXidEpoch;
8399 }
TransactionId ckptXid
Definition: xlog.c:583
uint32 TransactionId
Definition: c.h:391
slock_t info_lck
Definition: xlog.c:704
uint32 ckptXidEpoch
Definition: xlog.c:582
#define SpinLockAcquire(lock)
Definition: spin.h:62
TransactionId ReadNewTransactionId(void)
Definition: varsup.c:250
unsigned int uint32
Definition: c.h:258
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:707
static const unsigned __int64 epoch
Definition: gettimeofday.c:34
XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 8247 of file xlog.c.

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, and SpinLockRelease.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), InitXLOGAccess(), nextval_internal(), ReplicationSlotReserveWal(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

8248 {
8249  XLogRecPtr ptr;
8250 
8251  /*
8252  * The possibly not up-to-date copy in XlogCtl is enough. Even if we
8253  * grabbed a WAL insertion lock to read the master copy, someone might
8254  * update it just after we've released the lock.
8255  */
8257  ptr = XLogCtl->RedoRecPtr;
8259 
8260  if (RedoRecPtr < ptr)
8261  RedoRecPtr = ptr;
8262 
8263  return RedoRecPtr;
8264 }
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
static XLogRecPtr RedoRecPtr
Definition: xlog.c:352
XLogRecPtr RedoRecPtr
Definition: xlog.c:581
#define SpinLockRelease(lock)
Definition: spin.h:64
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static XLogCtlData * XLogCtl
Definition: xlog.c:707
uint64 GetSystemIdentifier ( void  )

Definition at line 4728 of file xlog.c.

References Assert, and ControlFileData::system_identifier.

Referenced by IdentifySystem(), and WalReceiverMain().

4729 {
4730  Assert(ControlFile != NULL);
4732 }
uint64 system_identifier
Definition: pg_control.h:106
static ControlFileData * ControlFile
Definition: xlog.c:715
#define Assert(condition)
Definition: c.h:681
XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 11196 of file xlog.c.

References XLogCtlInsert::CurrBytePos, Insert(), XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, SpinLockAcquire, SpinLockRelease, and XLogBytePosToRecPtr().

Referenced by GetSnapshotData(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

11197 {
11199  uint64 current_bytepos;
11200 
11201  SpinLockAcquire(&Insert->insertpos_lck);
11202  current_bytepos = Insert->CurrBytePos;
11203  SpinLockRelease(&Insert->insertpos_lck);
11204 
11205  return XLogBytePosToRecPtr(current_bytepos);
11206 }
slock_t insertpos_lck
Definition: xlog.c:519
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1920
XLogCtlInsert Insert
Definition: xlog.c:577
#define SpinLockAcquire(lock)
Definition: spin.h:62
uint64 CurrBytePos
Definition: xlog.c:528
static void Insert(File file)
Definition: fd.c:1059
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:707
void GetXLogReceiptTime ( TimestampTz rtime,
bool fromStream 
)

Definition at line 6192 of file xlog.c.

References Assert, InRecovery, XLOG_FROM_STREAM, XLogReceiptSource, and XLogReceiptTime.

Referenced by GetStandbyLimitTime().

6193 {
6194  /*
6195  * This must be executed in the startup process, since we don't export the
6196  * relevant state to shared memory.
6197  */
6198  Assert(InRecovery);
6199 
6200  *rtime = XLogReceiptTime;
6201  *fromStream = (XLogReceiptSource == XLOG_FROM_STREAM);
6202 }
static XLogSource XLogReceiptSource
Definition: xlog.c:818
bool InRecovery
Definition: xlog.c:194
static TimestampTz XLogReceiptTime
Definition: xlog.c:817
#define Assert(condition)
Definition: c.h:681
XLogRecPtr GetXLogReplayRecPtr ( TimeLineID replayTLI)

Definition at line 11177 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::lastReplayedEndRecPtr, XLogCtlData::lastReplayedTLI, SpinLockAcquire, and SpinLockRelease.

Referenced by CheckpointerMain(), CreateRestartPoint(), GetReplicationApplyDelay(), GetStandbyFlushRecPtr(), IsCheckpointOnSchedule(), pg_last_wal_replay_lsn(), pg_logical_slot_get_changes_guts(), read_local_xlog_page(), WalReceiverMain(), WalSndWaitForWal(), and XLogWalRcvSendReply().

11178 {
11179  XLogRecPtr recptr;
11180  TimeLineID tli;
11181 
11183  recptr = XLogCtl->lastReplayedEndRecPtr;
11184  tli = XLogCtl->lastReplayedTLI;
11186 
11187  if (replayTLI)
11188  *replayTLI = tli;
11189  return recptr;
11190 }
uint32 TimeLineID
Definition: xlogdefs.h:45
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static XLogCtlData * XLogCtl
Definition: xlog.c:707
TimeLineID lastReplayedTLI
Definition: xlog.c:684
XLogRecPtr lastReplayedEndRecPtr
Definition: xlog.c:683
XLogRecPtr GetXLogWriteRecPtr ( void  )

Definition at line 11212 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::LogwrtResult, SpinLockAcquire, SpinLockRelease, and XLogwrtResult::Write.

Referenced by pg_attribute_noreturn(), and pg_current_wal_lsn().

11213 {
11217 
11218  return LogwrtResult.Write;
11219 }
static XLogwrtResult LogwrtResult
Definition: xlog.c:751
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
XLogwrtResult LogwrtResult
Definition: xlog.c:601
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:707
XLogRecPtr Write
Definition: xlog.c:425
bool HotStandbyActive ( void  )

Definition at line 8010 of file xlog.c.

References XLogCtlData::info_lck, LocalHotStandbyActive, XLogCtlData::SharedHotStandbyActive, SpinLockAcquire, and SpinLockRelease.

Referenced by XLogWalRcvSendHSFeedback().

8011 {
8012  /*
8013  * We check shared state each time only until Hot Standby is active. We
8014  * can't de-activate Hot Standby, so there's no need to keep checking
8015  * after the shared variable has once been seen true.
8016  */
8018  return true;
8019  else
8020  {
8021  /* spinlock is essential on machines with weak memory ordering! */
8025 
8026  return LocalHotStandbyActive;
8027  }
8028 }
bool SharedHotStandbyActive
Definition: xlog.c:649
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
static bool LocalHotStandbyActive
Definition: xlog.c:223
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:707
bool HotStandbyActiveInReplay ( void  )

Definition at line 8035 of file xlog.c.

References AmStartupProcess, Assert, IsPostmasterEnvironment, and LocalHotStandbyActive.

Referenced by btree_xlog_vacuum().

8036 {
8038  return LocalHotStandbyActive;
8039 }
#define AmStartupProcess()
Definition: miscadmin.h:405
bool IsPostmasterEnvironment
Definition: globals.c:100
static bool LocalHotStandbyActive
Definition: xlog.c:223
#define Assert(condition)
Definition: c.h:681
void InitXLOGAccess ( void  )

Definition at line 8221 of file xlog.c.

References Assert, doPageWrites, XLogCtlInsert::forcePageWrites, XLogCtlInsert::fullPageWrites, GetRedoRecPtr(), InitXLogInsert(), Insert(), XLogCtlData::Insert, IsBootstrapProcessingMode, ThisTimeLineID, XLogCtlData::ThisTimeLineID, wal_segment_size, and ControlFileData::xlog_seg_size.

Referenced by AuxiliaryProcessMain(), LocalSetXLogInsertAllowed(), and RecoveryInProgress().

8222 {
8224 
8225  /* ThisTimeLineID doesn't change so we need no lock to copy it */
8228 
8229  /* set wal_segment_size */
8231 
8232  /* Use GetRedoRecPtr to copy the RedoRecPtr safely */
8233  (void) GetRedoRecPtr();
8234  /* Also update our copy of doPageWrites. */
8235  doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
8236 
8237  /* Also initialize the working areas for constructing WAL records */
8238  InitXLogInsert();
8239 }
int wal_segment_size
Definition: xlog.c:113
void InitXLogInsert(void)
Definition: xloginsert.c:1028
TimeLineID ThisTimeLineID
Definition: xlog.c:630
XLogCtlInsert Insert
Definition: xlog.c:577
bool fullPageWrites
Definition: xlog.c:552
uint32 xlog_seg_size
Definition: pg_control.h:209
static bool doPageWrites
Definition: xlog.c:359
bool forcePageWrites
Definition: xlog.c:551
static void Insert(File file)
Definition: fd.c:1059
static ControlFileData * ControlFile
Definition: xlog.c:715
TimeLineID ThisTimeLineID
Definition: xlog.c:181
#define Assert(condition)
Definition: c.h:681
static XLogCtlData * XLogCtl
Definition: xlog.c:707
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:8247
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:367
void issue_xlog_fsync ( int  fd,
XLogSegNo  segno 
)

Definition at line 10172 of file xlog.c.

References elog, ereport, errcode_for_file_access(), errmsg(), PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), sync_method, SYNC_METHOD_FDATASYNC, SYNC_METHOD_FSYNC, SYNC_METHOD_FSYNC_WRITETHROUGH, SYNC_METHOD_OPEN, SYNC_METHOD_OPEN_DSYNC, ThisTimeLineID, and XLogFileNameP().

Referenced by XLogWalRcvFlush(), and XLogWrite().

10173 {
10174  switch (sync_method)
10175  {
10176  case SYNC_METHOD_FSYNC:
10177  if (pg_fsync_no_writethrough(fd) != 0)
10178  ereport(PANIC,
10180  errmsg("could not fsync log file %s: %m",
10181  XLogFileNameP(ThisTimeLineID, segno))));
10182  break;
10183 #ifdef HAVE_FSYNC_WRITETHROUGH
10185  if (pg_fsync_writethrough(fd) != 0)
10186  ereport(PANIC,
10188  errmsg("could not fsync write-through log file %s: %m",
10189  XLogFileNameP(ThisTimeLineID, segno))));
10190  break;
10191 #endif
10192 #ifdef HAVE_FDATASYNC
10193  case SYNC_METHOD_FDATASYNC:
10194  if (pg_fdatasync(fd) != 0)
10195  ereport(PANIC,
10197  errmsg("could not fdatasync log file %s: %m",
10198  XLogFileNameP(ThisTimeLineID, segno))));
10199  break;
10200 #endif
10201  case SYNC_METHOD_OPEN:
10203  /* write synced it already */
10204  break;
10205  default:
10206  elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
10207  break;
10208  }
10209 }
int pg_fdatasync(int fd)
Definition: fd.c:390
#define SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:28
int pg_fsync_writethrough(int fd)
Definition: fd.c:367
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:355
#define PANIC
Definition: elog.h:53
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:29
char * XLogFileNameP(TimeLineID tli, XLogSegNo segno)
Definition: xlog.c:10215
int errcode_for_file_access(void)
Definition: elog.c:598
#define SYNC_METHOD_FSYNC
Definition: xlog.h:25
#define ereport(elevel, rest)
Definition: elog.h:122
#define SYNC_METHOD_OPEN
Definition: xlog.h:27
TimeLineID ThisTimeLineID
Definition: xlog.c:181
int sync_method
Definition: xlog.c:103
#define SYNC_METHOD_FDATASYNC
Definition: xlog.h:26
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219
void LocalProcessControlFile ( bool  reset)

Definition at line 4848 of file xlog.c.

References Assert, palloc(), and ReadControlFile().

Referenced by PostgresMain(), PostmasterMain(), and PostmasterStateMachine().

4849 {
4850  Assert(reset || ControlFile == NULL);
4851  ControlFile = palloc(sizeof(ControlFileData));
4852  ReadControlFile();
4853 }
static void ReadControlFile(void)
Definition: xlog.c:4481
static ControlFileData * ControlFile
Definition: xlog.c:715
#define Assert(condition)
Definition: c.h:681
void * palloc(Size size)
Definition: mcxt.c:848
bool RecoveryInProgress ( void  )

Definition at line 7954 of file xlog.c.

References InitXLOGAccess(), LocalRecoveryInProgress, pg_memory_barrier, XLogCtlData::SharedRecoveryInProgress, and XLogCtl.

Referenced by BackgroundWriterMain(), check_transaction_read_only(), check_XactIsoLevel(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_start_backup(), do_pg_stop_backup(), error_commit_ts_disabled(), get_relation_info(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetOldestXmin(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), gin_clean_pending_list(), heap_page_prune_opt(), IdentifySystem(), InitPostgres(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), MarkBufferDirtyHint(), OldSerXidSetActiveSerXmin(), perform_base_backup(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_logical_slot_get_changes_guts(), pg_switch_wal(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PrepareRedoAdd(), PrepareRedoRemove(), PreventCommandDuringRecovery(), ProcSendSignal(), ProcSleep(), read_local_xlog_page(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), sendDir(), ShutdownXLOG(), SnapBuildWaitSnapshot(), standard_ProcessUtility(), StartLogicalReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), WalReceiverMain(), WalSndShutdown(), WalSndWaitForWal(), XLogBackgroundFlush(), XLogInsertAllowed(), XLogNeedsFlush(), and XLogSendPhysical().

7955 {
7956  /*
7957  * We check shared state each time only until we leave recovery mode. We
7958  * can't re-enter recovery, so there's no need to keep checking after the
7959  * shared variable has once been seen false.
7960  */
7962  return false;
7963  else
7964  {
7965  /*
7966  * use volatile pointer to make sure we make a fresh read of the
7967  * shared variable.
7968  */
7969  volatile XLogCtlData *xlogctl = XLogCtl;
7970 
7972 
7973  /*
7974  * Initialize TimeLineID and RedoRecPtr when we discover that recovery
7975  * is finished. InitPostgres() relies upon this behaviour to ensure
7976  * that InitXLOGAccess() is called at backend startup. (If you change
7977  * this, see also LocalSetXLogInsertAllowed.)
7978  */
7980  {
7981  /*
7982  * If we just exited recovery, make sure we read TimeLineID and
7983  * RedoRecPtr after SharedRecoveryInProgress (for machines with
7984  * weak memory ordering).
7985  */
7987  InitXLOGAccess();
7988  }
7989 
7990  /*
7991  * Note: We don't need a memory barrier when we're still in recovery.
7992  * We might exit recovery immediately after return, so the caller
7993  * can't rely on 'true' meaning that we're still in recovery anyway.
7994  */
7995 
7996  return LocalRecoveryInProgress;
7997  }
7998 }
void InitXLOGAccess(void)
Definition: xlog.c:8221
bool SharedRecoveryInProgress
Definition: xlog.c:643
#define pg_memory_barrier()
Definition: atomics.h:148
static XLogCtlData * XLogCtl
Definition: xlog.c:707
static bool LocalRecoveryInProgress
Definition: xlog.c:217
bool RecoveryIsPaused ( void  )

Definition at line 6015 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::recoveryPause, SpinLockAcquire, and SpinLockRelease.

Referenced by pg_is_wal_replay_paused(), and recoveryPausesHere().

6016 {
6017  bool recoveryPause;
6018 
6020  recoveryPause = XLogCtl->recoveryPause;
6022 
6023  return recoveryPause;
6024 }
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
bool recoveryPause
Definition: xlog.c:696
static XLogCtlData * XLogCtl
Definition: xlog.c:707
void RemovePromoteSignalFiles ( void  )

Definition at line 12160 of file xlog.c.

References FALLBACK_PROMOTE_SIGNAL_FILE, and PROMOTE_SIGNAL_FILE.

Referenced by PostmasterMain().

12161 {
12162  unlink(PROMOTE_SIGNAL_FILE);
12164 }
#define FALLBACK_PROMOTE_SIGNAL_FILE
Definition: xlog.c:85
#define PROMOTE_SIGNAL_FILE
Definition: xlog.c:84
void SetRecoveryPause ( bool  recoveryPause)

Definition at line 6027 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::recoveryPause, SpinLockAcquire, and SpinLockRelease.

Referenced by pg_wal_replay_pause(), pg_wal_replay_resume(), and StartupXLOG().

6028 {
6030  XLogCtl->recoveryPause = recoveryPause;
6032 }
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
bool recoveryPause
Definition: xlog.c:696
static XLogCtlData * XLogCtl
Definition: xlog.c:707
void SetWalWriterSleeping ( bool  sleeping)

Definition at line 12196 of file xlog.c.

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, and XLogCtlData::WalWriterSleeping.

Referenced by WalWriterMain().

12197 {
12199  XLogCtl->WalWriterSleeping = sleeping;
12201 }
slock_t info_lck
Definition: xlog.c:704
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
bool WalWriterSleeping
Definition: xlog.c:656
static XLogCtlData * XLogCtl
Definition: xlog.c:707
void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 8405 of file xlog.c.

References CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), ShutdownCLOG(), ShutdownCommitTs(), ShutdownMultiXact(), ShutdownSUBTRANS(), WalSndInitStopping(), WalSndWaitStopping(), XLogArchiveCommandSet, and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

8406 {
8407  /* Don't be chatty in standalone mode */
8409  (errmsg("shutting down")));
8410 
8411  /*
8412  * Signal walsenders to move to stopping state.
8413  */
8415 
8416  /*
8417  * Wait for WAL senders to be in stopping state. This prevents commands
8418  * from writing new WAL.
8419  */
8421 
8422  if (RecoveryInProgress())
8424  else
8425  {
8426  /*
8427  * If archiving is enabled, rotate the last XLOG file so that all the
8428  * remaining records are archived (postmaster wakes up the archiver
8429  * process one more time at the end of shutdown). The checkpoint
8430  * record will go to the next XLOG file and won't be archived (yet).
8431  */
8433  RequestXLogSwitch(false);
8434 
8436  }
8437  ShutdownCLOG();
8438  ShutdownCommitTs();
8439  ShutdownSUBTRANS();
8441 }
bool IsPostmasterEnvironment
Definition: globals.c:100
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:9490
void ShutdownSUBTRANS(void)
Definition: subtrans.c:283
void CreateCheckPoint(int flags)
Definition: xlog.c:8612
void ShutdownCLOG(void)
Definition: clog.c:823
bool CreateRestartPoint(int flags)
Definition: xlog.c:9159
#define XLogArchiveCommandSet()
Definition: xlog.h:140
#define LOG
Definition: elog.h:26
bool RecoveryInProgress(void)
Definition: xlog.c:7954
void WalSndWaitStopping(void)
Definition: walsender.c:3059
void ShutdownMultiXact(void)
Definition: multixact.c:2105
#define ereport(elevel, rest)
Definition: elog.h:122
#define NOTICE
Definition: elog.h:37
void WalSndInitStopping(void)
Definition: walsender.c:3033
void ShutdownCommitTs(void)
Definition: commit_ts.c:745
#define XLogArchivingActive()
Definition: xlog.h:135
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:179
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:176
void StartupXLOG ( void  )

Definition at line 6274 of file xlog.c.

References AdvanceOldestClogXid(), AllowCascadeReplication, appendStringInfo(), appendStringInfoString(), archiveCleanupCommand, XLogCtlData::archiveCleanupCommand, ArchiveRecoveryRequested, ErrorContextCallback::arg, Assert, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, bgwriterLaunched, buf, ErrorContextCallback::callback, ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, CheckRecoveryConsistency(), CheckRequiredParameterValues(), checkTimeLineSwitch(), checkXLogConsistency(), XLogCtlData::ckptXid, XLogCtlData::ckptXidEpoch, close, CompleteCommitTsInitialization(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), XLogCtlInsert::CurrBytePos, XLogCtlData::currentChunkStartTime, StringInfoData::data, DataDir, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DEBUG2, DEBUG3, DeleteAllExportedSnapshotFiles(), DisownLatch(), doPageWrites, doRequestWalReceiverReply, durable_rename(), elog, EnableHotStandby, EndRecPtr, ereport, errcode(), errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_internal(), ERROR, error_context_stack, ExecuteRecoveryCommand(), exitArchiveRecovery(), fast_promote, FATAL, findNewestTimeLine(), XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, XLogCtlInsert::fullPageWrites, GetCurrentTimestamp(), GetLatestXTime(), HandleStartupProcInterrupts(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), initStringInfo(), InRecovery, InRedo, Insert(), XLogCtlData::Insert, InvalidXLogRecPtr, IsPostmasterEnvironment, IsUnderPostmaster, lastFullPageWrites, LastRec, XLogCtlData::lastReplayedEndRecPtr, XLogCtlData::lastReplayedTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, RunningTransactionsData::latestCompletedXid, VariableCacheData::latestCompletedXid, lfirst, LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, LOG, XLogCtlData::LogwrtResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), master_image_masked, MAXFNAMELEN, MAXPGPATH, MemSet, ControlFileData::minRecoveryPoint, minRecoveryPoint, ControlFileData::minRecoveryPointTLI, minRecoveryPointTLI, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, RunningTransactionsData::nextXid, VariableCacheData::nextXid, CheckPoint::nextXidEpoch, NIL, NOTICE, tablespaceinfo::oid, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, OwnLatch(), XLogCtlData::pages, palloc(), PANIC, tablespaceinfo::path, pfree(), pg_usleep(), pgstat_reset_all(), PMSIGNAL_RECOVERY_STARTED, PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlInsert::PrevBytePos, ControlFileData::prevCheckPoint, ErrorContextCallback::previous, CheckPoint::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, XLogCtlData::PrevTimeLineID, proc_exit(), ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), psprintf(), PublishStartupProcessInformation(), reachedConsistency, read_backup_label(), read_tablespace_map(), XLogReaderState::readBuf, ReadCheckpointRecord(), readFile, readOff, XLogReaderState::readPageTLI, ReadRecord(), readRecoveryCommandFile(), ReadRecPtr, RecordKnownAssignedTransactionIds(), RecoverPreparedTransactions(), RECOVERY_TARGET_ACTION_PAUSE, RECOVERY_TARGET_ACTION_PROMOTE, RECOVERY_TARGET_ACTION_SHUTDOWN, RECOVERY_TARGET_IMMEDIATE, RECOVERY_TARGET_LSN, RECOVERY_TARGET_NAME, RECOVERY_TARGET_TIME, RECOVERY_TARGET_XID, recoveryApplyDelay(), recoveryEndCommand, XLogCtlData::recoveryLastXTime, XLogCtlData::recoveryPause, recoveryPausesHere(), recoveryStopAfter, recoveryStopLSN, recoveryStopName, recoveryStopsAfter(), recoveryStopsBefore(), recoveryStopTime, recoveryStopXid, recoveryTarget, recoveryTargetAction, recoveryTargetLSN, recoveryTargetName, recoveryTargetTime, recoveryTargetTLI, recoveryTargetXid, XLogCtlData::recoveryWakeupLatch, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RedoStartLSN, RelationCacheInitFileRemove(), remove_tablespace_symlink(), RemoveNonParentXlogFiles(), replay_image_masked, XLogCtlData::replayEndRecPtr, XLogCtlData::replayEndTLI, RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), RmgrData::rm_cleanup, RM_MAX_ID, RmgrData::rm_redo, rm_redo_error_callback(), RmgrData::rm_startup, RmgrTable, SendPostmasterSignal(), SetCommitTsLimit(), SetForwardFsyncRequests(), SetMultiXactIdLimit(), SetRecoveryPause(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryInProgress, ShmemVariableCache, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRcv(), snprintf(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_INITIALIZED, StandbyMode, StandbyModeRequested, StandbyRecoverPreparedTransactions(), standbyState, StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), strlcpy(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_overflow, SyncDataDirectory(), ControlFileData::system_identifier, XLogReaderState::system_identifier, TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, ThisTimeLineID, xl_end_of_recovery::ThisTimeLineID, XLogCtlData::ThisTimeLineID, CheckPoint::time, ControlFileData::time, timestamptz_to_str(), tliOfPointInHistory(), tliSwitchPoint(), trace_recovery_messages, track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdAdvance, TransactionIdFollowsOrEquals(), TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, ControlFileData::unloggedLSN, XLogCtlData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), wal_segment_size, WalRcvForceReply(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, RunningTransactionsData::xids, XLogRecord::xl_info, XLogRecord::xl_rmid, XLogRecord::xl_xid, XLogCtlData::xlblocks, XLByteToPrevSeg, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, xlog_outdesc(), XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName, XLogFilePath, XLogPageRead(), XLogReaderAllocate(), XLogReaderFree(), XLogReceiptTime, XLogRecGetData, XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), XLogSegmentOffset, XLR_CHECK_CONSISTENCY, XLR_INFO_MASK, and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

6275 {
6277  CheckPoint checkPoint;
6278  bool wasShutdown;
6279  bool reachedStopPoint = false;
6280  bool haveBackupLabel = false;
6281  bool haveTblspcMap = false;
6282  XLogRecPtr RecPtr,
6283  checkPointLoc,
6284  EndOfLog;
6285  TimeLineID EndOfLogTLI;
6286  TimeLineID PrevTimeLineID;
6287  XLogRecord *record;
6288  TransactionId oldestActiveXID;
6289  bool backupEndRequired = false;
6290  bool backupFromStandby = false;
6291  DBState dbstate_at_startup;
6292  XLogReaderState *xlogreader;
6293  XLogPageReadPrivate private;
6294  bool fast_promoted = false;
6295  struct stat st;
6296 
6297  /*
6298  * Verify XLOG status looks valid.
6299  */
6300  if (ControlFile->state < DB_SHUTDOWNED ||
6303  ereport(FATAL,
6304  (errmsg("control file contains invalid data")));
6305 
6307  {
6308  /* This is the expected case, so don't be chatty in standalone mode */
6310  (errmsg("database system was shut down at %s",
6311  str_time(ControlFile->time))));
6312  }
6314  ereport(LOG,
6315  (errmsg("database system was shut down in recovery at %s",
6316  str_time(ControlFile->time))));
6317  else if (ControlFile->state == DB_SHUTDOWNING)
6318  ereport(LOG,
6319  (errmsg("database system shutdown was interrupted; last known up at %s",
6320  str_time(ControlFile->time))));
6321  else if (ControlFile->state == DB_IN_CRASH_RECOVERY)
6322  ereport(LOG,
6323  (errmsg("database system was interrupted while in recovery at %s",
6325  errhint("This probably means that some data is corrupted and"
6326  " you will have to use the last backup for recovery.")));
6328  ereport(LOG,
6329  (errmsg("database system was interrupted while in recovery at log time %s",
6331  errhint("If this has occurred more than once some data might be corrupted"
6332  " and you might need to choose an earlier recovery target.")));
6333  else if (ControlFile->state == DB_IN_PRODUCTION)
6334  ereport(LOG,
6335  (errmsg("database system was interrupted; last known up at %s",
6336  str_time(ControlFile->time))));
6337 
6338  /* This is just to allow attaching to startup process with a debugger */
6339 #ifdef XLOG_REPLAY_DELAY
6341  pg_usleep(60000000L);
6342 #endif
6343 
6344  /*
6345  * Verify that pg_wal and pg_wal/archive_status exist. In cases where
6346  * someone has performed a copy for PITR, these directories may have been
6347  * excluded and need to be re-created.
6348  */
6350 
6351  /*
6352  * If we previously crashed, there might be data which we had written,
6353  * intending to fsync it, but which we had not actually fsync'd yet.
6354  * Therefore, a power failure in the near future might cause earlier
6355  * unflushed writes to be lost, even though more recent data written to
6356  * disk from here on would be persisted. To avoid that, fsync the entire
6357  * data directory.
6358  */
6359  if (ControlFile->state != DB_SHUTDOWNED &&
6362 
6363  /*
6364  * Initialize on the assumption we want to recover to the latest timeline
6365  * that's active according to pg_control.
6366  */
6370  else
6372 
6373  /*
6374  * Check for recovery control file, and if so set up state for offline
6375  * recovery
6376  */
6378 
6379  /*
6380  * Save archive_cleanup_command in shared memory so that other processes
6381  * can see it.
6382  */
6385  sizeof(XLogCtl->archiveCleanupCommand));
6386 
6388  {
6390  ereport(LOG,
6391  (errmsg("entering standby mode")));
6392  else if (recoveryTarget == RECOVERY_TARGET_XID)
6393  ereport(LOG,
6394  (errmsg("starting point-in-time recovery to XID %u",
6395  recoveryTargetXid)));
6397  ereport(LOG,
6398  (errmsg("starting point-in-time recovery to %s",
6401  ereport(LOG,
6402  (errmsg("starting point-in-time recovery to \"%s\"",
6403  recoveryTargetName)));
6404  else if (recoveryTarget == RECOVERY_TARGET_LSN)
6405  ereport(LOG,
6406  (errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
6407  (uint32) (recoveryTargetLSN >> 32),
6410  ereport(LOG,
6411  (errmsg("starting point-in-time recovery to earliest consistent point")));
6412  else
6413  ereport(LOG,
6414  (errmsg("starting archive recovery")));
6415  }
6416 
6417  /*
6418  * Take ownership of the wakeup latch if we're going to sleep during
6419  * recovery.
6420  */
6423 
6424  /* Set up XLOG reader facility */
6425  MemSet(&private, 0, sizeof(XLogPageReadPrivate));
6426  xlogreader = XLogReaderAllocate(wal_segment_size, &XLogPageRead, &private);
6427  if (!xlogreader)
6428  ereport(ERROR,
6429  (errcode(ERRCODE_OUT_OF_MEMORY),
6430  errmsg("out of memory"),
6431  errdetail("Failed while allocating a WAL reading processor.")));
6433 
6434  /*
6435  * Allocate pages dedicated to WAL consistency checks, those had better be
6436  * aligned.
6437  */
6438  replay_image_masked = (char *) palloc(BLCKSZ);
6439  master_image_masked = (char *) palloc(BLCKSZ);
6440 
6441  if (read_backup_label(&checkPointLoc, &backupEndRequired,
6442  &backupFromStandby))
6443  {
6444  List *tablespaces = NIL;
6445 
6446  /*
6447  * Archive recovery was requested, and thanks to the backup label
6448  * file, we know how far we need to replay to reach consistency. Enter
6449  * archive recovery directly.
6450  */
6451  InArchiveRecovery = true;
6453  StandbyMode = true;
6454 
6455  /*
6456  * When a backup_label file is present, we want to roll forward from
6457  * the checkpoint it identifies, rather than using pg_control.
6458  */
6459  record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
6460  if (record != NULL)
6461  {
6462  memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
6463  wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
6464  ereport(DEBUG1,
6465  (errmsg("checkpoint record is at %X/%X",
6466  (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
6467  InRecovery = true; /* force recovery even if SHUTDOWNED */
6468 
6469  /*
6470  * Make sure that REDO location exists. This may not be the case
6471  * if there was a crash during an online backup, which left a
6472  * backup_label around that references a WAL segment that's
6473  * already been archived.
6474  */
6475  if (checkPoint.redo < checkPointLoc)
6476  {
6477  if (!ReadRecord(xlogreader, checkPoint.redo, LOG, false))
6478  ereport(FATAL,
6479  (errmsg("could not find redo location referenced by checkpoint record"),
6480  errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
6481  }
6482  }
6483  else
6484  {
6485  ereport(FATAL,
6486  (errmsg("could not locate required checkpoint record"),
6487  errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
6488  wasShutdown = false; /* keep compiler quiet */
6489  }
6490 
6491  /* read the tablespace_map file if present and create symlinks. */
6492  if (read_tablespace_map(&tablespaces))
6493  {
6494  ListCell *lc;
6495 
6496  foreach(lc, tablespaces)
6497  {
6498  tablespaceinfo *ti = lfirst(lc);
6499  char *linkloc;
6500 
6501  linkloc = psprintf("pg_tblspc/%s", ti->oid);
6502 
6503  /*
6504  * Remove the existing symlink if any and Create the symlink
6505  * under PGDATA.
6506  */
6507  remove_tablespace_symlink(linkloc);
6508 
6509  if (symlink(ti->path, linkloc) < 0)
6510  ereport(ERROR,
6512  errmsg("could not create symbolic link \"%s\": %m",
6513  linkloc)));
6514 
6515  pfree(ti->oid);
6516  pfree(ti->path);
6517  pfree(ti);
6518  }
6519 
6520  /* set flag to delete it later */
6521  haveTblspcMap = true;
6522  }
6523 
6524  /* set flag to delete it later */
6525  haveBackupLabel = true;
6526  }
6527  else
6528  {
6529  /*
6530  * If tablespace_map file is present without backup_label file, there
6531  * is no use of such file. There is no harm in retaining it, but it
6532  * is better to get rid of the map file so that we don't have any
6533  * redundant file in data directory and it will avoid any sort of
6534  * confusion. It seems prudent though to just rename the file out of
6535  * the way rather than delete it completely, also we ignore any error
6536  * that occurs in rename operation as even if map file is present
6537  * without backup_label file, it is harmless.
6538  */
6539  if (stat(TABLESPACE_MAP, &st) == 0)
6540  {
6541  unlink(TABLESPACE_MAP_OLD);
6543  ereport(LOG,
6544  (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
6546  errdetail("File \"%s\" was renamed to \"%s\".",
6548  else
6549  ereport(LOG,
6550  (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
6552  errdetail("Could not rename file \"%s\" to \"%s\": %m.",
6554  }
6555 
6556  /*
6557  * It's possible that archive recovery was requested, but we don't
6558  * know how far we need to replay the WAL before we reach consistency.
6559  * This can happen for example if a base backup is taken from a
6560  * running server using an atomic filesystem snapshot, without calling
6561  * pg_start/stop_backup. Or if you just kill a running master server
6562  * and put it into archive recovery by creating a recovery.conf file.
6563  *
6564  * Our strategy in that case is to perform crash recovery first,
6565  * replaying all the WAL present in pg_wal, and only enter archive
6566  * recovery after that.
6567  *
6568  * But usually we already know how far we need to replay the WAL (up
6569  * to minRecoveryPoint, up to backupEndPoint, or until we see an
6570  * end-of-backup record), and we can enter archive recovery directly.
6571  */
6577  {
6578  InArchiveRecovery = true;
6580  StandbyMode = true;
6581  }
6582 
6583  /*
6584  * Get the last valid checkpoint record. If the latest one according
6585  * to pg_control is broken, try the next-to-last one.
6586  */
6587  checkPointLoc = ControlFile->checkPoint;
6589  record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, true);
6590  if (record != NULL)
6591  {
6592  ereport(DEBUG1,
6593  (errmsg("checkpoint record is at %X/%X",
6594  (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
6595  }
6596  else if (StandbyMode)
6597  {
6598  /*
6599  * The last valid checkpoint record required for a streaming
6600  * recovery exists in neither standby nor the primary.
6601  */
6602  ereport(PANIC,
6603  (errmsg("could not locate a valid checkpoint record")));
6604  }
6605  else
6606  {
6607  checkPointLoc = ControlFile->prevCheckPoint;
6608  record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true);
6609  if (record != NULL)
6610  {
6611  ereport(LOG,
6612  (errmsg("using previous checkpoint record at %X/%X",
6613  (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
6614  InRecovery = true; /* force recovery even if SHUTDOWNED */
6615  }
6616  else
6617  ereport(PANIC,
6618  (errmsg("could not locate a valid checkpoint record")));
6619  }
6620  memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
6621  wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
6622  }
6623 
6624  /*
6625  * Clear out any old relcache cache files. This is *necessary* if we do
6626  * any WAL replay, since that would probably result in the cache files
6627  * being out of sync with database reality. In theory we could leave them
6628  * in place if the database had been cleanly shut down, but it seems
6629  * safest to just remove them always and let them be rebuilt during the
6630  * first backend startup. These files needs to be removed from all
6631  * directories including pg_tblspc, however the symlinks are created only
6632  * after reading tablespace_map file in case of archive recovery from
6633  * backup, so needs to clear old relcache files here after creating
6634  * symlinks.
6635  */
6637 
6638  /*
6639  * If the location of the checkpoint record is not on the expected
6640  * timeline in the history of the requested timeline, we cannot proceed:
6641  * the backup is not part of the history of the requested timeline.
6642  */
6643  Assert(expectedTLEs); /* was initialized by reading checkpoint
6644  * record */
6645  if (tliOfPointInHistory(checkPointLoc, expectedTLEs) !=
6646  checkPoint.ThisTimeLineID)
6647  {
6648  XLogRecPtr switchpoint;
6649 
6650  /*
6651  * tliSwitchPoint will throw an error if the checkpoint's timeline is
6652  * not in expectedTLEs at all.
6653  */
6655  ereport(FATAL,
6656  (errmsg("requested timeline %u is not a child of this server's history",
6658  errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.",
6659  (uint32) (ControlFile->checkPoint >> 32),
6662  (uint32) (switchpoint >> 32),
6663  (uint32) switchpoint)));
6664  }
6665 
6666  /*
6667  * The min recovery point should be part of the requested timeline's
6668  * history, too.
6669  */
6673  ereport(FATAL,
6674  (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
6676  (uint32) (ControlFile->minRecoveryPoint >> 32),
6679 
6680  LastRec = RecPtr = checkPointLoc;
6681 
6682  ereport(DEBUG1,
6683  (errmsg_internal("redo record is at %X/%X; shutdown %s",
6684  (uint32) (checkPoint.redo >> 32), (uint32) checkPoint.redo,
6685  wasShutdown ? "TRUE" : "FALSE")));
6686  ereport(DEBUG1,
6687  (errmsg_internal("next transaction ID: %u:%u; next OID: %u",
6688  checkPoint.nextXidEpoch, checkPoint.nextXid,
6689  checkPoint.nextOid)));
6690  ereport(DEBUG1,
6691  (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
6692  checkPoint.nextMulti, checkPoint.nextMultiOffset)));
6693  ereport(DEBUG1,
6694  (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
6695  checkPoint.oldestXid, checkPoint.oldestXidDB)));
6696  ereport(DEBUG1,
6697  (errmsg_internal("oldest MultiXactId: %u, in database %u",
6698  checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
6699  ereport(DEBUG1,
6700  (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u",
6701  checkPoint.oldestCommitTsXid,
6702  checkPoint.newestCommitTsXid)));
6703  if (!TransactionIdIsNormal(checkPoint.nextXid))
6704  ereport(PANIC,
6705  (errmsg("invalid next transaction ID")));
6706 
6707  /* initialize shared memory variables from the checkpoint record */
6708  ShmemVariableCache->nextXid = checkPoint.nextXid;
6709  ShmemVariableCache->nextOid = checkPoint.nextOid;
6711  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
6712  AdvanceOldestClogXid(checkPoint.oldestXid);
6713  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
6714  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
6716  checkPoint.newestCommitTsXid);
6717  XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
6718  XLogCtl->ckptXid = checkPoint.nextXid;
6719 
6720  /*
6721  * Initialize replication slots, before there's a chance to remove
6722  * required resources.
6723  */
6725 
6726  /*
6727  * Startup logical state, needs to be setup now so we have proper data
6728  * during crash recovery.
6729  */
6731 
6732  /*
6733  * Startup MultiXact. We need to do this early to be able to replay
6734  * truncations.
6735  */
6736  StartupMultiXact();
6737 
6738  /*
6739  * Ditto commit timestamps. In a standby, we do it if setting is enabled
6740  * in ControlFile; in a master we base the decision on the GUC itself.
6741  */
6744  StartupCommitTs();
6745 
6746  /*
6747  * Recover knowledge about replay progress of known replication partners.
6748  */
6750 
6751  /*
6752  * Initialize unlogged LSN. On a clean shutdown, it's restored from the
6753  * control file. On recovery, all unlogged relations are blown away, so
6754  * the unlogged LSN counter can be reset too.
6755  */
6758  else
6759  XLogCtl->unloggedLSN = 1;
6760 
6761  /*
6762  * We must replay WAL entries using the same TimeLineID they were created
6763  * under, so temporarily adopt the TLI indicated by the checkpoint (see
6764  * also xlog_redo()).
6765  */
6766  ThisTimeLineID = checkPoint.ThisTimeLineID;
6767 
6768  /*
6769  * Copy any missing timeline history files between 'now' and the recovery
6770  * target timeline from archive to pg_wal. While we don't need those files
6771  * ourselves - the history file of the recovery target timeline covers all
6772  * the previous timelines in the history too - a cascading standby server
6773  * might be interested in them. Or, if you archive the WAL from this
6774  * server to a different archive than the master, it'd be good for all the
6775  * history files to get archived there after failover, so that you can use
6776  * one of the old timelines as a PITR target. Timeline history files are
6777  * small, so it's better to copy them unnecessarily than not copy them and
6778  * regret later.
6779  */
6781 
6782  /*
6783  * Before running in recovery, scan pg_twophase and fill in its status to
6784  * be able to work on entries generated by redo. Doing a scan before
6785  * taking any recovery action has the merit to discard any 2PC files that
6786  * are newer than the first record to replay, saving from any conflicts at
6787  * replay. This avoids as well any subsequent scans when doing recovery
6788  * of the on-disk two-phase data.
6789  */
6791 
6792  lastFullPageWrites = checkPoint.fullPageWrites;
6793 
6796 
6797  if (RecPtr < checkPoint.redo)
6798  ereport(PANIC,
6799  (errmsg("invalid redo in checkpoint record")));
6800 
6801  /*
6802  * Check whether we need to force recovery from WAL. If it appears to
6803  * have been a clean shutdown and we did not have a recovery.conf file,
6804  * then assume no recovery needed.
6805  */
6806  if (checkPoint.redo < RecPtr)
6807  {
6808  if (wasShutdown)
6809  ereport(PANIC,
6810  (errmsg("invalid redo record in shutdown checkpoint")));
6811  InRecovery = true;
6812  }
6813  else if (ControlFile->state != DB_SHUTDOWNED)
6814  InRecovery = true;
6815  else if (ArchiveRecoveryRequested)
6816  {
6817  /* force recovery due to presence of recovery.conf */
6818  InRecovery = true;
6819  }
6820 
6821  /* REDO */
6822  if (InRecovery)
6823  {
6824  int rmid;
6825 
6826  /*
6827  * Update pg_control to show that we are recovering and to show the
6828  * selected checkpoint as the place we are starting from. We also mark
6829  * pg_control with any minimum recovery stop point obtained from a
6830  * backup history file.
6831  */
6832  dbstate_at_startup = ControlFile->state;
6833  if (InArchiveRecovery)
6835  else
6836  {
6837  ereport(LOG,
6838  (errmsg("database system was not properly shut down; "
6839  "automatic recovery in progress")));
6841  ereport(LOG,
6842  (errmsg("crash recovery starts in timeline %u "
6843  "and has target timeline %u",
6845  recoveryTargetTLI)));
6847  }
6849  ControlFile->checkPoint = checkPointLoc;
6850  ControlFile->checkPointCopy = checkPoint;
6851  if (InArchiveRecovery)
6852  {
6853  /* initialize minRecoveryPoint if not set yet */
6854  if (ControlFile->minRecoveryPoint < checkPoint.redo)
6855  {
6856  ControlFile->minRecoveryPoint = checkPoint.redo;