PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
xlog.h File Reference
#include "access/rmgr.h"
#include "access/xlogdefs.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
#include "storage/fd.h"
Include dependency graph for xlog.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  CheckpointStatsData
 

Macros

#define SYNC_METHOD_FSYNC   0
 
#define SYNC_METHOD_FDATASYNC   1
 
#define SYNC_METHOD_OPEN   2 /* for O_SYNC */
 
#define SYNC_METHOD_FSYNC_WRITETHROUGH   3
 
#define SYNC_METHOD_OPEN_DSYNC   4 /* for O_DSYNC */
 
#define InHotStandby   (standbyState >= STANDBY_SNAPSHOT_PENDING)
 
#define XLogArchivingActive()   (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
 
#define XLogArchivingAlways()   (AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
 
#define XLogArchiveCommandSet()   (XLogArchiveCommand[0] != '\0')
 
#define XLogIsNeeded()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogHintBitIsNeeded()   (DataChecksumsEnabled() || wal_log_hints)
 
#define XLogStandbyInfoActive()   (wal_level >= WAL_LEVEL_REPLICA)
 
#define XLogLogicalInfoActive()   (wal_level >= WAL_LEVEL_LOGICAL)
 
#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */
 
#define CHECKPOINT_END_OF_RECOVERY
 
#define CHECKPOINT_IMMEDIATE   0x0004 /* Do it without delays */
 
#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */
 
#define CHECKPOINT_FLUSH_ALL
 
#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */
 
#define CHECKPOINT_CAUSE_XLOG   0x0040 /* XLOG consumption */
 
#define CHECKPOINT_CAUSE_TIME   0x0080 /* Elapsed time */
 
#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */
 
#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */
 
#define BACKUP_LABEL_FILE   "backup_label"
 
#define BACKUP_LABEL_OLD   "backup_label.old"
 
#define TABLESPACE_MAP   "tablespace_map"
 
#define TABLESPACE_MAP_OLD   "tablespace_map.old"
 

Typedefs

typedef enum ArchiveMode ArchiveMode
 
typedef enum WalLevel WalLevel
 
typedef struct CheckpointStatsData CheckpointStatsData
 

Enumerations

enum  HotStandbyState { STANDBY_DISABLED, STANDBY_INITIALIZED, STANDBY_SNAPSHOT_PENDING, STANDBY_SNAPSHOT_READY }
 
enum  RecoveryTargetType {
  RECOVERY_TARGET_UNSET, RECOVERY_TARGET_XID, RECOVERY_TARGET_TIME, RECOVERY_TARGET_NAME,
  RECOVERY_TARGET_LSN, RECOVERY_TARGET_IMMEDIATE
}
 
enum  ArchiveMode { ARCHIVE_MODE_OFF = 0, ARCHIVE_MODE_ON, ARCHIVE_MODE_ALWAYS }
 
enum  WalLevel { WAL_LEVEL_MINIMAL = 0, WAL_LEVEL_REPLICA, WAL_LEVEL_LOGICAL }
 

Functions

XLogRecPtr XLogInsertRecord (struct XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags)
 
void XLogFlush (XLogRecPtr RecPtr)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr RecPtr)
 
int XLogFileInit (XLogSegNo segno, bool *use_existent, bool use_lock)
 
int XLogFileOpen (XLogSegNo segno)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
void XLogSetAsyncXactLSN (XLogRecPtr record)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void xlog_redo (XLogReaderState *record)
 
void xlog_desc (StringInfo buf, XLogReaderState *record)
 
const char * xlog_identify (uint8 info)
 
void issue_xlog_fsync (int fd, XLogSegNo segno)
 
bool RecoveryInProgress (void)
 
bool HotStandbyActive (void)
 
bool HotStandbyActiveInReplay (void)
 
bool XLogInsertAllowed (void)
 
void GetXLogReceiptTime (TimestampTz *rtime, bool *fromStream)
 
XLogRecPtr GetXLogReplayRecPtr (TimeLineID *replayTLI)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
bool RecoveryIsPaused (void)
 
void SetRecoveryPause (bool recoveryPause)
 
TimestampTz GetLatestXTime (void)
 
TimestampTz GetCurrentChunkReplayStartTime (void)
 
char * XLogFileNameP (TimeLineID tli, XLogSegNo segno)
 
void UpdateControlFile (void)
 
uint64 GetSystemIdentifier (void)
 
bool DataChecksumsEnabled (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (void)
 
void StartupXLOG (void)
 
void ShutdownXLOG (int code, Datum arg)
 
void InitXLOGAccess (void)
 
void CreateCheckPoint (int flags)
 
bool CreateRestartPoint (int flags)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetRedoRecPtr (void)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
void GetNextXidAndEpoch (TransactionId *xid, uint32 *epoch)
 
void RemovePromoteSignalFiles (void)
 
bool CheckPromoteSignal (void)
 
void WakeupRecovery (void)
 
void SetWalWriterSleeping (bool sleeping)
 
void XLogRequestWalReceiverReply (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
XLogRecPtr do_pg_start_backup (const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, DIR *tblspcdir, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, bool needtblspcmapfile)
 
XLogRecPtr do_pg_stop_backup (char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
 
void do_pg_abort_backup (void)
 

Variables

int sync_method
 
PGDLLIMPORT TimeLineID ThisTimeLineID
 
bool InRecovery
 
HotStandbyState standbyState
 
XLogRecPtr ProcLastRecPtr
 
XLogRecPtr XactLastRecEnd
 
PGDLLIMPORT XLogRecPtr XactLastCommitEnd
 
bool reachedConsistency
 
int min_wal_size
 
int max_wal_size
 
int wal_keep_segments
 
int XLOGbuffers
 
int XLogArchiveTimeout
 
int wal_retrieve_retry_interval
 
char * XLogArchiveCommand
 
bool EnableHotStandby
 
bool fullPageWrites
 
bool wal_log_hints
 
bool wal_compression
 
boolwal_consistency_checking
 
char * wal_consistency_checking_string
 
bool log_checkpoints
 
int CheckPointSegments
 
int XLogArchiveMode
 
PGDLLIMPORT int wal_level
 
CheckpointStatsData CheckpointStats
 

Macro Definition Documentation

#define BACKUP_LABEL_FILE   "backup_label"
#define BACKUP_LABEL_OLD   "backup_label.old"

Definition at line 302 of file xlog.h.

Referenced by CancelBackup(), and StartupXLOG().

#define CHECKPOINT_CAUSE_TIME   0x0080 /* Elapsed time */

Definition at line 187 of file xlog.h.

Referenced by CheckpointerMain(), and LogCheckpointStart().

#define CHECKPOINT_CAUSE_XLOG   0x0040 /* XLOG consumption */

Definition at line 186 of file xlog.h.

Referenced by CheckpointerMain(), LogCheckpointStart(), XLogPageRead(), and XLogWrite().

#define CHECKPOINT_END_OF_RECOVERY
Value:
0x0002 /* Like shutdown checkpoint,
* but issued at end of WAL
* recovery */

Definition at line 176 of file xlog.h.

Referenced by BufferSync(), CheckpointerMain(), CreateCheckPoint(), LogCheckpointStart(), and StartupXLOG().

#define CHECKPOINT_FLUSH_ALL
Value:
0x0010 /* Flush all pages, including those
* belonging to unlogged tables */

Definition at line 181 of file xlog.h.

Referenced by BufferSync(), createdb(), LogCheckpointStart(), and movedb().

#define CHECKPOINT_FORCE   0x0008 /* Force even if no activity */
#define CHECKPOINT_IS_SHUTDOWN   0x0001 /* Checkpoint is for shutdown */
#define CHECKPOINT_WAIT   0x0020 /* Wait for completion */
#define SYNC_METHOD_FDATASYNC   1

Definition at line 26 of file xlog.h.

Referenced by get_sync_bit(), and issue_xlog_fsync().

#define SYNC_METHOD_FSYNC   0

Definition at line 25 of file xlog.h.

Referenced by get_sync_bit(), and issue_xlog_fsync().

#define SYNC_METHOD_FSYNC_WRITETHROUGH   3

Definition at line 28 of file xlog.h.

Referenced by get_sync_bit(), issue_xlog_fsync(), and pg_fsync().

#define SYNC_METHOD_OPEN   2 /* for O_SYNC */

Definition at line 27 of file xlog.h.

Referenced by get_sync_bit(), issue_xlog_fsync(), and XLogWrite().

#define SYNC_METHOD_OPEN_DSYNC   4 /* for O_DSYNC */

Definition at line 29 of file xlog.h.

Referenced by get_sync_bit(), issue_xlog_fsync(), and XLogWrite().

#define TABLESPACE_MAP   "tablespace_map"
#define TABLESPACE_MAP_OLD   "tablespace_map.old"

Definition at line 305 of file xlog.h.

Referenced by CancelBackup(), and StartupXLOG().

#define XLOG_INCLUDE_ORIGIN   0x01 /* include the replication origin */
#define XLOG_MARK_UNIMPORTANT   0x02 /* record not important for durability */
#define XLogArchiveCommandSet ( )    (XLogArchiveCommand[0] != '\0')

Definition at line 139 of file xlog.h.

Referenced by pgarch_ArchiverCopyLoop(), and ShutdownXLOG().

Definition at line 137 of file xlog.h.

Referenced by sigusr1_handler().

#define XLogLogicalInfoActive ( )    (wal_level >= WAL_LEVEL_LOGICAL)

Definition at line 162 of file xlog.h.

Referenced by AssignTransactionId(), RecoverPreparedTransactions(), and XactLogCommitRecord().

Typedef Documentation

Enumeration Type Documentation

Enumerator
ARCHIVE_MODE_OFF 
ARCHIVE_MODE_ON 
ARCHIVE_MODE_ALWAYS 

Definition at line 115 of file xlog.h.

116 {
117  ARCHIVE_MODE_OFF = 0, /* disabled */
118  ARCHIVE_MODE_ON, /* enabled while server is running normally */
119  ARCHIVE_MODE_ALWAYS /* enabled always (even during recovery) */
120 } ArchiveMode;
ArchiveMode
Definition: xlog.h:115
Enumerator
STANDBY_DISABLED 
STANDBY_INITIALIZED 
STANDBY_SNAPSHOT_PENDING 
STANDBY_SNAPSHOT_READY 

Definition at line 64 of file xlog.h.

Enumerator
RECOVERY_TARGET_UNSET 
RECOVERY_TARGET_XID 
RECOVERY_TARGET_TIME 
RECOVERY_TARGET_NAME 
RECOVERY_TARGET_LSN 
RECOVERY_TARGET_IMMEDIATE 

Definition at line 80 of file xlog.h.

enum WalLevel
Enumerator
WAL_LEVEL_MINIMAL 
WAL_LEVEL_REPLICA 
WAL_LEVEL_LOGICAL 

Definition at line 124 of file xlog.h.

Function Documentation

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2230 of file xlog.c.

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

2231 {
2234 }
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2200
#define newval
double CheckPointCompletionTarget
Definition: checkpointer.c:147
void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2223 of file xlog.c.

References CalculateCheckpointSegments(), max_wal_size, and newval.

2224 {
2225  max_wal_size = newval;
2227 }
int max_wal_size
Definition: xlog.c:88
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2200
#define newval
void BootStrapXLOG ( void  )

Definition at line 4907 of file xlog.c.

References Assert, bootstrap_data_checksum_version, BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstBootstrapObjectId, FirstMultiXactId, FirstNormalTransactionId, CheckPoint::fullPageWrites, fullPageWrites, gettimeofday(), INIT_CRC32C, InvalidTransactionId, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, VariableCacheData::nextXid, CheckPoint::nextXidEpoch, NULL, offsetof, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, palloc(), PANIC, pfree(), pg_fsync(), CheckPoint::PrevTimeLineID, CheckPoint::redo, SetCommitTsLimit(), SetMultiXactIdLimit(), SetTransactionIdLimit(), ShmemVariableCache, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, ControlFileData::state, ControlFileData::system_identifier, TemplateDbOid, CheckPoint::ThisTimeLineID, ThisTimeLineID, CheckPoint::time, ControlFileData::time, track_commit_timestamp, ControlFileData::track_commit_timestamp, TYPEALIGN, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, ControlFileData::wal_log_hints, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogSegSize, XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by AuxiliaryProcessMain().

4908 {
4909  CheckPoint checkPoint;
4910  char *buffer;
4911  XLogPageHeader page;
4912  XLogLongPageHeader longpage;
4913  XLogRecord *record;
4914  char *recptr;
4915  bool use_existent;
4916  uint64 sysidentifier;
4917  struct timeval tv;
4918  pg_crc32c crc;
4919 
4920  /*
4921  * Select a hopefully-unique system identifier code for this installation.
4922  * We use the result of gettimeofday(), including the fractional seconds
4923  * field, as being about as unique as we can easily get. (Think not to
4924  * use random(), since it hasn't been seeded and there's no portable way
4925  * to seed it other than the system clock value...) The upper half of the
4926  * uint64 value is just the tv_sec part, while the lower half contains the
4927  * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
4928  * PID for a little extra uniqueness. A person knowing this encoding can
4929  * determine the initialization time of the installation, which could
4930  * perhaps be useful sometimes.
4931  */
4932  gettimeofday(&tv, NULL);
4933  sysidentifier = ((uint64) tv.tv_sec) << 32;
4934  sysidentifier |= ((uint64) tv.tv_usec) << 12;
4935  sysidentifier |= getpid() & 0xFFF;
4936 
4937  /* First timeline ID is always 1 */
4938  ThisTimeLineID = 1;
4939 
4940  /* page buffer must be aligned suitably for O_DIRECT */
4941  buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
4942  page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
4943  memset(page, 0, XLOG_BLCKSZ);
4944 
4945  /*
4946  * Set up information for the initial checkpoint record
4947  *
4948  * The initial checkpoint record is written to the beginning of the WAL
4949  * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
4950  * used, so that we can use 0/0 to mean "before any valid WAL segment".
4951  */
4952  checkPoint.redo = XLogSegSize + SizeOfXLogLongPHD;
4953  checkPoint.ThisTimeLineID = ThisTimeLineID;
4954  checkPoint.PrevTimeLineID = ThisTimeLineID;
4955  checkPoint.fullPageWrites = fullPageWrites;
4956  checkPoint.nextXidEpoch = 0;
4957  checkPoint.nextXid = FirstNormalTransactionId;
4958  checkPoint.nextOid = FirstBootstrapObjectId;
4959  checkPoint.nextMulti = FirstMultiXactId;
4960  checkPoint.nextMultiOffset = 0;
4961  checkPoint.oldestXid = FirstNormalTransactionId;
4962  checkPoint.oldestXidDB = TemplateDbOid;
4963  checkPoint.oldestMulti = FirstMultiXactId;
4964  checkPoint.oldestMultiDB = TemplateDbOid;
4967  checkPoint.time = (pg_time_t) time(NULL);
4969 
4970  ShmemVariableCache->nextXid = checkPoint.nextXid;
4971  ShmemVariableCache->nextOid = checkPoint.nextOid;
4973  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
4974  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
4975  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
4977 
4978  /* Set up the XLOG page header */
4979  page->xlp_magic = XLOG_PAGE_MAGIC;
4980  page->xlp_info = XLP_LONG_HEADER;
4981  page->xlp_tli = ThisTimeLineID;
4982  page->xlp_pageaddr = XLogSegSize;
4983  longpage = (XLogLongPageHeader) page;
4984  longpage->xlp_sysid = sysidentifier;
4985  longpage->xlp_seg_size = XLogSegSize;
4986  longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
4987 
4988  /* Insert the initial checkpoint record */
4989  recptr = ((char *) page + SizeOfXLogLongPHD);
4990  record = (XLogRecord *) recptr;
4991  record->xl_prev = 0;
4992  record->xl_xid = InvalidTransactionId;
4993  record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
4995  record->xl_rmid = RM_XLOG_ID;
4996  recptr += SizeOfXLogRecord;
4997  /* fill the XLogRecordDataHeaderShort struct */
4998  *(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
4999  *(recptr++) = sizeof(checkPoint);
5000  memcpy(recptr, &checkPoint, sizeof(checkPoint));
5001  recptr += sizeof(checkPoint);
5002  Assert(recptr - (char *) record == record->xl_tot_len);
5003 
5004  INIT_CRC32C(crc);
5005  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5006  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5007  FIN_CRC32C(crc);
5008  record->xl_crc = crc;
5009 
5010  /* Create first XLOG segment file */
5011  use_existent = false;
5012  openLogFile = XLogFileInit(1, &use_existent, false);
5013 
5014  /* Write the first page with the initial record */
5015  errno = 0;
5016  if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5017  {
5018  /* if write didn't set errno, assume problem is no disk space */
5019  if (errno == 0)
5020  errno = ENOSPC;
5021  ereport(PANIC,
5023  errmsg("could not write bootstrap transaction log file: %m")));
5024  }
5025 
5026  if (pg_fsync(openLogFile) != 0)
5027  ereport(PANIC,
5029  errmsg("could not fsync bootstrap transaction log file: %m")));
5030 
5031  if (close(openLogFile))
5032  ereport(PANIC,
5034  errmsg("could not close bootstrap transaction log file: %m")));
5035 
5036  openLogFile = -1;
5037 
5038  /* Now create pg_control */
5039 
5040  memset(ControlFile, 0, sizeof(ControlFileData));
5041  /* Initialize pg_control status fields */
5042  ControlFile->system_identifier = sysidentifier;
5044  ControlFile->time = checkPoint.time;
5045  ControlFile->checkPoint = checkPoint.redo;
5046  ControlFile->checkPointCopy = checkPoint;
5047  ControlFile->unloggedLSN = 1;
5048 
5049  /* Set important parameter values for use when replaying WAL */
5058 
5059  /* some additional ControlFile fields are set in WriteControlFile() */
5060 
5061  WriteControlFile();
5062 
5063  /* Bootstrap the commit log, too */
5064  BootStrapCLOG();
5068 
5069  pfree(buffer);
5070 }
static void WriteControlFile(void)
Definition: xlog.c:4358
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define XLogSegSize
Definition: xlog_internal.h:92
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
int max_locks_per_xact
Definition: pg_control.h:183
int gettimeofday(struct timeval *tp, struct timezone *tzp)
Definition: gettimeofday.c:105
int max_prepared_xacts
Definition: pg_control.h:182
int64 pg_time_t
Definition: pgtime.h:23
pg_time_t time
Definition: pg_control.h:129
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:837
uint32 oidCount
Definition: transam.h:112
#define write(a, b, c)
Definition: win32.h:19
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:200
int max_worker_processes
Definition: pg_control.h:181
uint32 pg_crc32c
Definition: pg_crc32c.h:38
TransactionId oldestActiveXid
Definition: pg_control.h:60
int wal_level
Definition: xlog.c:103
int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
Definition: xlog.c:3141
void BootStrapMultiXact(void)
Definition: multixact.c:1866
MultiXactId oldestMulti
Definition: pg_control.h:46
TimeLineID PrevTimeLineID
Definition: pg_control.h:36
RmgrId xl_rmid
Definition: xlogrecord.h:47
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:57
CheckPoint checkPointCopy
Definition: pg_control.h:133
TransactionId oldestXid
Definition: pg_control.h:44
TransactionId nextXid
Definition: pg_control.h:40
pg_time_t time
Definition: pg_control.h:48
#define PANIC
Definition: elog.h:53
uint32 bootstrap_data_checksum_version
Definition: bootstrap.c:48
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition: multixact.c:2191
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:74
bool fullPageWrites
Definition: xlog.c:96
void BootStrapSUBTRANS(void)
Definition: subtrans.c:200
MultiXactOffset nextMultiOffset
Definition: pg_control.h:43
TransactionId oldestCommitTsXid
Definition: pg_control.h:49
void pfree(void *pointer)
Definition: mcxt.c:992
#define FirstNormalTransactionId
Definition: transam.h:34
int max_prepared_xacts
Definition: twophase.c:99
uint64 system_identifier
Definition: pg_control.h:107
uint32 xl_tot_len
Definition: xlogrecord.h:43
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
TransactionId nextXid
Definition: transam.h:117
uint32 nextXidEpoch
Definition: pg_control.h:39
bool track_commit_timestamp
Definition: commit_ts.c:103
#define TemplateDbOid
Definition: pg_database.h:80
uint32 data_checksum_version
Definition: pg_control.h:223
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:64
XLogRecPtr unloggedLSN
Definition: pg_control.h:135
int errcode_for_file_access(void)
Definition: elog.c:598
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define InvalidTransactionId
Definition: transam.h:31
#define FirstBootstrapObjectId
Definition: transam.h:93
#define FirstMultiXactId
Definition: multixact.h:24
#define ereport(elevel, rest)
Definition: elog.h:122
int max_locks_per_xact
Definition: lock.c:54
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
TransactionId newestCommitTsXid
Definition: pg_control.h:51
int MaxConnections
Definition: globals.c:123
Oid oldestMultiDB
Definition: pg_control.h:47
static int openLogFile
Definition: xlog.c:761
static ControlFileData * ControlFile
Definition: xlog.c:708
TimeLineID ThisTimeLineID
Definition: xlog.c:178
Oid nextOid
Definition: pg_control.h:41
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:577
bool fullPageWrites
Definition: pg_control.h:38
bool wal_log_hints
Definition: xlog.c:97
void BootStrapCLOG(void)
Definition: clog.c:463
#define NULL
Definition: c.h:226
bool track_commit_timestamp
Definition: pg_control.h:184
#define Assert(condition)
Definition: c.h:671
#define XLP_LONG_HEADER
Definition: xlog_internal.h:79
Oid oldestXidDB
Definition: pg_control.h:45
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:267
uint8 xl_info
Definition: xlogrecord.h:46
MultiXactId nextMulti
Definition: pg_control.h:42
pg_crc32c xl_crc
Definition: xlogrecord.h:49
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:223
TransactionId xl_xid
Definition: xlogrecord.h:44
TimeLineID ThisTimeLineID
Definition: pg_control.h:35
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
int max_worker_processes
Definition: globals.c:124
int pg_fsync(int fd)
Definition: fd.c:333
#define close(a)
Definition: win32.h:17
void BootStrapCommitTs(void)
Definition: commit_ts.c:523
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:73
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:78
XLogRecPtr checkPoint
Definition: pg_control.h:130
XLogRecPtr redo
Definition: pg_control.h:33
#define offsetof(type, field)
Definition: c.h:551
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:72
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2160
bool CheckPromoteSignal ( void  )

Definition at line 11959 of file xlog.c.

References FALLBACK_PROMOTE_SIGNAL_FILE, and PROMOTE_SIGNAL_FILE.

Referenced by sigusr1_handler().

11960 {
11961  struct stat stat_buf;
11962 
11963  if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0 ||
11965  return true;
11966 
11967  return false;
11968 }
#define FALLBACK_PROMOTE_SIGNAL_FILE
Definition: xlog.c:84
struct stat stat_buf
Definition: pg_standby.c:101
#define PROMOTE_SIGNAL_FILE
Definition: xlog.c:83
void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3743 of file xlog.c.

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, and XLogFileName.

Referenced by perform_base_backup(), and XLogRead().

3744 {
3745  XLogSegNo lastRemovedSegNo;
3746 
3748  lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3750 
3751  if (segno <= lastRemovedSegNo)
3752  {
3753  char filename[MAXFNAMELEN];
3754 
3755  XLogFileName(filename, tli, segno);
3756  ereport(ERROR,
3758  errmsg("requested WAL segment %s has already been removed",
3759  filename)));
3760  }
3761 }
slock_t info_lck
Definition: xlog.c:697
#define XLogFileName(fname, tli, logSegNo)
XLogSegNo lastRemovedSegNo
Definition: xlog.c:579
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define ERROR
Definition: elog.h:43
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
#define MAXFNAMELEN
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:700
static char * filename
Definition: pg_dumpall.c:84
int errmsg(const char *fmt,...)
Definition: elog.c:797
void CreateCheckPoint ( int  flags)

Definition at line 8449 of file xlog.c.

References ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, XLogCtlData::ckptXid, XLogCtlData::ckptXidEpoch, XLogCtlInsert::CurrBytePos, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, elog, END_CRIT_SECTION, ereport, errmsg(), ERROR, CheckPoint::fullPageWrites, XLogCtlInsert::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestXmin(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, InitXLogInsert(), Insert(), XLogCtlData::Insert, INSERT_FREESPACE, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, CheckPoint::newestCommitTsXid, VariableCacheData::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, VariableCacheData::nextXid, CheckPoint::nextXidEpoch, NULL, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, VariableCacheData::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, VariableCacheData::oldestXid, CheckPoint::oldestXidDB, VariableCacheData::oldestXidDB, PANIC, pfree(), pg_usleep(), PreallocXlogFiles(), ControlFileData::prevCheckPoint, CheckPoint::PrevTimeLineID, XLogCtlData::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), ShmemVariableCache, SizeOfXLogLongPHD, SizeOfXLogShortPHD, smgrpostckpt(), smgrpreckpt(), SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, CheckPoint::ThisTimeLineID, ThisTimeLineID, CheckPoint::time, ControlFileData::time, TruncateSUBTRANS(), XLogCtlData::ulsn_lck, ControlFileData::unloggedLSN, XLogCtlData::unloggedLSN, UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegSize, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), ShutdownXLOG(), and StartupXLOG().

8450 {
8451  bool shutdown;
8452  CheckPoint checkPoint;
8453  XLogRecPtr recptr;
8455  uint32 freespace;
8456  XLogRecPtr PriorRedoPtr;
8457  XLogRecPtr curInsert;
8458  XLogRecPtr last_important_lsn;
8459  VirtualTransactionId *vxids;
8460  int nvxids;
8461 
8462  /*
8463  * An end-of-recovery checkpoint is really a shutdown checkpoint, just
8464  * issued at a different time.
8465  */
8467  shutdown = true;
8468  else
8469  shutdown = false;
8470 
8471  /* sanity check */
8472  if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
8473  elog(ERROR, "can't create a checkpoint during recovery");
8474 
8475  /*
8476  * Initialize InitXLogInsert working areas before entering the critical
8477  * section. Normally, this is done by the first call to
8478  * RecoveryInProgress() or LocalSetXLogInsertAllowed(), but when creating
8479  * an end-of-recovery checkpoint, the LocalSetXLogInsertAllowed call is
8480  * done below in a critical section, and InitXLogInsert cannot be called
8481  * in a critical section.
8482  */
8483  InitXLogInsert();
8484 
8485  /*
8486  * Acquire CheckpointLock to ensure only one checkpoint happens at a time.
8487  * (This is just pro forma, since in the present system structure there is
8488  * only one process that is allowed to issue checkpoints at any given
8489  * time.)
8490  */
8491  LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
8492 
8493  /*
8494  * Prepare to accumulate statistics.
8495  *
8496  * Note: because it is possible for log_checkpoints to change while a
8497  * checkpoint proceeds, we always accumulate stats, even if
8498  * log_checkpoints is currently off.
8499  */
8500  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
8502 
8503  /*
8504  * Use a critical section to force system panic if we have trouble.
8505  */
8507 
8508  if (shutdown)
8509  {
8510  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8512  ControlFile->time = (pg_time_t) time(NULL);
8514  LWLockRelease(ControlFileLock);
8515  }
8516 
8517  /*
8518  * Let smgr prepare for checkpoint; this has to happen before we determine
8519  * the REDO pointer. Note that smgr must not do anything that'd have to
8520  * be undone if we decide no checkpoint is needed.
8521  */
8522  smgrpreckpt();
8523 
8524  /* Begin filling in the checkpoint WAL record */
8525  MemSet(&checkPoint, 0, sizeof(checkPoint));
8526  checkPoint.time = (pg_time_t) time(NULL);
8527 
8528  /*
8529  * For Hot Standby, derive the oldestActiveXid before we fix the redo
8530  * pointer. This allows us to begin accumulating changes to assemble our
8531  * starting snapshot of locks and transactions.
8532  */
8533  if (!shutdown && XLogStandbyInfoActive())
8535  else
8537 
8538  /*
8539  * Get location of last important record before acquiring insert locks (as
8540  * GetLastImportantRecPtr() also locks WAL locks).
8541  */
8542  last_important_lsn = GetLastImportantRecPtr();
8543 
8544  /*
8545  * We must block concurrent insertions while examining insert state to
8546  * determine the checkpoint REDO pointer.
8547  */
8549  curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
8550 
8551  /*
8552  * If this isn't a shutdown or forced checkpoint, and if there has been no
8553  * WAL activity requiring a checkpoint, skip it. The idea here is to
8554  * avoid inserting duplicate checkpoints when the system is idle.
8555  */
8556  if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
8557  CHECKPOINT_FORCE)) == 0)
8558  {
8559  if (last_important_lsn == ControlFile->checkPoint)
8560  {
8562  LWLockRelease(CheckpointLock);
8563  END_CRIT_SECTION();
8564  ereport(DEBUG1,
8565  (errmsg("checkpoint skipped due to an idle system")));
8566  return;
8567  }
8568  }
8569 
8570  /*
8571  * An end-of-recovery checkpoint is created before anyone is allowed to
8572  * write WAL. To allow us to write the checkpoint record, temporarily
8573  * enable XLogInsertAllowed. (This also ensures ThisTimeLineID is
8574  * initialized, which we need here and in AdvanceXLInsertBuffer.)
8575  */
8576  if (flags & CHECKPOINT_END_OF_RECOVERY)
8578 
8579  checkPoint.ThisTimeLineID = ThisTimeLineID;
8580  if (flags & CHECKPOINT_END_OF_RECOVERY)
8581  checkPoint.PrevTimeLineID = XLogCtl->PrevTimeLineID;
8582  else
8583  checkPoint.PrevTimeLineID = ThisTimeLineID;
8584 
8585  checkPoint.fullPageWrites = Insert->fullPageWrites;
8586 
8587  /*
8588  * Compute new REDO record ptr = location of next XLOG record.
8589  *
8590  * NB: this is NOT necessarily where the checkpoint record itself will be,
8591  * since other backends may insert more XLOG records while we're off doing
8592  * the buffer flush work. Those XLOG records are logically after the
8593  * checkpoint, even though physically before it. Got that?
8594  */
8595  freespace = INSERT_FREESPACE(curInsert);
8596  if (freespace == 0)
8597  {
8598  if (curInsert % XLogSegSize == 0)
8599  curInsert += SizeOfXLogLongPHD;
8600  else
8601  curInsert += SizeOfXLogShortPHD;
8602  }
8603  checkPoint.redo = curInsert;
8604 
8605  /*
8606  * Here we update the shared RedoRecPtr for future XLogInsert calls; this
8607  * must be done while holding all the insertion locks.
8608  *
8609  * Note: if we fail to complete the checkpoint, RedoRecPtr will be left
8610  * pointing past where it really needs to point. This is okay; the only
8611  * consequence is that XLogInsert might back up whole buffers that it
8612  * didn't really need to. We can't postpone advancing RedoRecPtr because
8613  * XLogInserts that happen while we are dumping buffers must assume that
8614  * their buffer changes are not included in the checkpoint.
8615  */
8616  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
8617 
8618  /*
8619  * Now we can release the WAL insertion locks, allowing other xacts to
8620  * proceed while we are flushing disk buffers.
8621  */
8623 
8624  /* Update the info_lck-protected copy of RedoRecPtr as well */
8626  XLogCtl->RedoRecPtr = checkPoint.redo;
8628 
8629  /*
8630  * If enabled, log checkpoint start. We postpone this until now so as not
8631  * to log anything if we decided to skip the checkpoint.
8632  */
8633  if (log_checkpoints)
8634  LogCheckpointStart(flags, false);
8635 
8636  TRACE_POSTGRESQL_CHECKPOINT_START(flags);
8637 
8638  /*
8639  * Get the other info we need for the checkpoint record.
8640  */
8641  LWLockAcquire(XidGenLock, LW_SHARED);
8642  checkPoint.nextXid = ShmemVariableCache->nextXid;
8643  checkPoint.oldestXid = ShmemVariableCache->oldestXid;
8645  LWLockRelease(XidGenLock);
8646 
8647  LWLockAcquire(CommitTsLock, LW_SHARED);
8650  LWLockRelease(CommitTsLock);
8651 
8652  /* Increase XID epoch if we've wrapped around since last checkpoint */
8654  if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid)
8655  checkPoint.nextXidEpoch++;
8656 
8657  LWLockAcquire(OidGenLock, LW_SHARED);
8658  checkPoint.nextOid = ShmemVariableCache->nextOid;
8659  if (!shutdown)
8660  checkPoint.nextOid += ShmemVariableCache->oidCount;
8661  LWLockRelease(OidGenLock);
8662 
8663  MultiXactGetCheckptMulti(shutdown,
8664  &checkPoint.nextMulti,
8665  &checkPoint.nextMultiOffset,
8666  &checkPoint.oldestMulti,
8667  &checkPoint.oldestMultiDB);
8668 
8669  /*
8670  * Having constructed the checkpoint record, ensure all shmem disk buffers
8671  * and commit-log buffers are flushed to disk.
8672  *
8673  * This I/O could fail for various reasons. If so, we will fail to
8674  * complete the checkpoint, but there is no reason to force a system
8675  * panic. Accordingly, exit critical section while doing it.
8676  */
8677  END_CRIT_SECTION();
8678 
8679  /*
8680  * In some cases there are groups of actions that must all occur on one
8681  * side or the other of a checkpoint record. Before flushing the
8682  * checkpoint record we must explicitly wait for any backend currently
8683  * performing those groups of actions.
8684  *
8685  * One example is end of transaction, so we must wait for any transactions
8686  * that are currently in commit critical sections. If an xact inserted
8687  * its commit record into XLOG just before the REDO point, then a crash
8688  * restart from the REDO point would not replay that record, which means
8689  * that our flushing had better include the xact's update of pg_clog. So
8690  * we wait till he's out of his commit critical section before proceeding.
8691  * See notes in RecordTransactionCommit().
8692  *
8693  * Because we've already released the insertion locks, this test is a bit
8694  * fuzzy: it is possible that we will wait for xacts we didn't really need
8695  * to wait for. But the delay should be short and it seems better to make
8696  * checkpoint take a bit longer than to hold off insertions longer than
8697  * necessary. (In fact, the whole reason we have this issue is that xact.c
8698  * does commit record XLOG insertion and clog update as two separate steps
8699  * protected by different locks, but again that seems best on grounds of
8700  * minimizing lock contention.)
8701  *
8702  * A transaction that has not yet set delayChkpt when we look cannot be at
8703  * risk, since he's not inserted his commit record yet; and one that's
8704  * already cleared it is not at risk either, since he's done fixing clog
8705  * and we will correctly flush the update below. So we cannot miss any
8706  * xacts we need to wait for.
8707  */
8708  vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
8709  if (nvxids > 0)
8710  {
8711  do
8712  {
8713  pg_usleep(10000L); /* wait for 10 msec */
8714  } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
8715  }
8716  pfree(vxids);
8717 
8718  CheckPointGuts(checkPoint.redo, flags);
8719 
8720  /*
8721  * Take a snapshot of running transactions and write this to WAL. This
8722  * allows us to reconstruct the state of running transactions during
8723  * archive recovery, if required. Skip, if this info disabled.
8724  *
8725  * If we are shutting down, or Startup process is completing crash
8726  * recovery we don't need to write running xact data.
8727  */
8728  if (!shutdown && XLogStandbyInfoActive())
8730 
8732 
8733  /*
8734  * Now insert the checkpoint record into XLOG.
8735  */
8736  XLogBeginInsert();
8737  XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
8738  recptr = XLogInsert(RM_XLOG_ID,
8739  shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
8741 
8742  XLogFlush(recptr);
8743 
8744  /*
8745  * We mustn't write any new WAL after a shutdown checkpoint, or it will be
8746  * overwritten at next startup. No-one should even try, this just allows
8747  * sanity-checking. In the case of an end-of-recovery checkpoint, we want
8748  * to just temporarily disable writing until the system has exited
8749  * recovery.
8750  */
8751  if (shutdown)
8752  {
8753  if (flags & CHECKPOINT_END_OF_RECOVERY)
8754  LocalXLogInsertAllowed = -1; /* return to "check" state */
8755  else
8756  LocalXLogInsertAllowed = 0; /* never again write WAL */
8757  }
8758 
8759  /*
8760  * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
8761  * = end of actual checkpoint record.
8762  */
8763  if (shutdown && checkPoint.redo != ProcLastRecPtr)
8764  ereport(PANIC,
8765  (errmsg("concurrent transaction log activity while database system is shutting down")));
8766 
8767  /*
8768  * Remember the prior checkpoint's redo pointer, used later to determine
8769  * the point where the log can be truncated.
8770  */
8771  PriorRedoPtr = ControlFile->checkPointCopy.redo;
8772 
8773  /*
8774  * Update the control file.
8775  */
8776  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8777  if (shutdown)
8781  ControlFile->checkPointCopy = checkPoint;
8782  ControlFile->time = (pg_time_t) time(NULL);
8783  /* crash recovery should always recover to the end of WAL */
8786 
8787  /*
8788  * Persist unloggedLSN value. It's reset on crash recovery, so this goes
8789  * unused on non-shutdown checkpoints, but seems useful to store it always
8790  * for debugging purposes.
8791  */
8795 
8797  LWLockRelease(ControlFileLock);
8798 
8799  /* Update shared-memory copy of checkpoint XID/epoch */
8801  XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
8802  XLogCtl->ckptXid = checkPoint.nextXid;
8804 
8805  /*
8806  * We are now done with critical updates; no need for system panic if we
8807  * have trouble while fooling with old log segments.
8808  */
8809  END_CRIT_SECTION();
8810 
8811  /*
8812  * Let smgr do post-checkpoint cleanup (eg, deleting old files).
8813  */
8814  smgrpostckpt();
8815 
8816  /*
8817  * Delete old log files (those no longer needed even for previous
8818  * checkpoint or the standbys in XLOG streaming).
8819  */
8820  if (PriorRedoPtr != InvalidXLogRecPtr)
8821  {
8822  XLogSegNo _logSegNo;
8823 
8824  /* Update the average distance between checkpoints. */
8826 
8827  XLByteToSeg(PriorRedoPtr, _logSegNo);
8828  KeepLogSeg(recptr, &_logSegNo);
8829  _logSegNo--;
8830  RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, recptr);
8831  }
8832 
8833  /*
8834  * Make more log segments if needed. (Do this after recycling old log
8835  * segments, since that may supply some of the needed files.)
8836  */
8837  if (!shutdown)
8838  PreallocXlogFiles(recptr);
8839 
8840  /*
8841  * Truncate pg_subtrans if possible. We can throw away all data before
8842  * the oldest XMIN of any running transaction. No future transaction will
8843  * attempt to reference any pg_subtrans entry older than that (see Asserts
8844  * in subtrans.c). During recovery, though, we mustn't do this because
8845  * StartupSUBTRANS hasn't been called yet.
8846  */
8847  if (!RecoveryInProgress())
8849 
8850  /* Real work is done, but log and update stats before releasing lock. */
8851  LogCheckpointEnd(false);
8852 
8853  TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
8854  NBuffers,
8858 
8859  LWLockRelease(CheckpointLock);
8860 }
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:8172
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:8387
#define XLogSegSize
Definition: xlog_internal.h:92
static int LocalXLogInsertAllowed
Definition: xlog.c:232
bool log_checkpoints
Definition: xlog.c:101
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
int64 pg_time_t
Definition: pgtime.h:23
TransactionId ckptXid
Definition: xlog.c:575
static void WALInsertLockRelease(void)
Definition: xlog.c:1635
pg_time_t time
Definition: pg_control.h:129
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:65
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
uint32 oidCount
Definition: transam.h:112
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1569
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1904
XLogRecPtr unloggedLSN
Definition: xlog.c:583
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:335
TransactionId oldestActiveXid
Definition: pg_control.h:60
void InitXLogInsert(void)
Definition: xloginsert.c:1029
TimestampTz ckpt_start_t
Definition: xlog.h:199
slock_t info_lck
Definition: xlog.c:697
#define END_CRIT_SECTION()
Definition: miscadmin.h:132
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids)
Definition: procarray.c:2237
MultiXactId oldestMulti
Definition: pg_control.h:46
TimeLineID PrevTimeLineID
Definition: xlog.c:624
TimeLineID PrevTimeLineID
Definition: pg_control.h:36
#define START_CRIT_SECTION()
Definition: miscadmin.h:130
int ckpt_segs_recycled
Definition: xlog.h:209
TransactionId oldestXid
Definition: transam.h:119
#define MemSet(start, val, len)
Definition: c.h:853
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
Definition: xlog.c:3808
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2118
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:8921
CheckPoint checkPointCopy
Definition: pg_control.h:133
XLogCtlInsert Insert
Definition: xlog.c:569
TransactionId oldestXid
Definition: pg_control.h:44
bool RecoveryInProgress(void)
Definition: xlog.c:7805
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:342
uint32 ckptXidEpoch
Definition: xlog.c:574
TransactionId nextXid
Definition: pg_control.h:40
pg_time_t time
Definition: pg_control.h:48
#define PANIC
Definition: elog.h:53
bool fullPageWrites
Definition: xlog.c:543
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2745
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
MultiXactOffset nextMultiOffset
Definition: pg_control.h:43
void UpdateControlFile(void)
Definition: xlog.c:4616
TransactionId oldestCommitTsXid
Definition: pg_control.h:49
void pfree(void *pointer)
Definition: mcxt.c:992
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:913
#define ERROR
Definition: elog.h:43
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:8302
TransactionId nextXid
Definition: transam.h:117
uint32 nextXidEpoch
Definition: pg_control.h:39
static XLogRecPtr RedoRecPtr
Definition: xlog.c:349
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:64
XLogRecPtr unloggedLSN
Definition: pg_control.h:135
static void PreallocXlogFiles(XLogRecPtr endptr)
Definition: xlog.c:3717
uint64 XLogSegNo
Definition: xlogdefs.h:34
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:176
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define InvalidTransactionId
Definition: transam.h:31
uint64 CurrBytePos
Definition: xlog.c:519
unsigned int uint32
Definition: c.h:265
XLogRecPtr RedoRecPtr
Definition: xlog.c:573
int ckpt_segs_removed
Definition: xlog.h:208
#define CHECKPOINT_FORCE
Definition: xlog.h:180
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:714
#define ereport(elevel, rest)
Definition: elog.h:122
TransactionId oldestCommitTsXid
Definition: transam.h:129
static void Insert(File file)
Definition: fd.c:1007
int ckpt_bufs_written
Definition: xlog.h:205
static void LocalSetXLogInsertAllowed(void)
Definition: xlog.c:7931
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
TransactionId newestCommitTsXid
Definition: pg_control.h:51
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:9245
Oid oldestMultiDB
Definition: pg_control.h:47
#define XLogStandbyInfoActive()
Definition: xlog.h:159
XLogRecPtr prevCheckPoint
Definition: pg_control.h:131
static ControlFileData * ControlFile
Definition: xlog.c:708
TimeLineID ThisTimeLineID
Definition: xlog.c:178
Oid nextOid
Definition: pg_control.h:41
bool fullPageWrites
Definition: pg_control.h:38
void smgrpreckpt(void)
Definition: smgr.c:744
TransactionId GetOldestXmin(Relation rel, bool ignoreVacuum)
Definition: procarray.c:1305
#define XLByteToSeg(xlrp, logSegNo)
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
Oid oldestXidDB
Definition: pg_control.h:45
TransactionId newestCommitTsXid
Definition: transam.h:130
CheckpointStatsData CheckpointStats
Definition: xlog.c:172
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:55
MultiXactId nextMulti
Definition: pg_control.h:42
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1606
static XLogCtlData * XLogCtl
Definition: xlog.c:700
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
int ckpt_segs_added
Definition: xlog.h:207
slock_t ulsn_lck
Definition: xlog.c:584
TimeLineID ThisTimeLineID
Definition: pg_control.h:35
int errmsg(const char *fmt,...)
Definition: elog.c:797
TransactionId GetOldestActiveTransactionId(void)
Definition: procarray.c:2083
int NBuffers
Definition: globals.c:122
#define elog
Definition: elog.h:219
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
Definition: procarray.c:2282
void XLogBeginInsert(void)
Definition: xloginsert.c:120
XLogRecPtr RedoRecPtr
Definition: xlog.c:541
void smgrpostckpt(void)
Definition: smgr.c:774
XLogRecPtr checkPoint
Definition: pg_control.h:130
XLogRecPtr redo
Definition: pg_control.h:33
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:8284
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:175
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:72
bool CreateRestartPoint ( int  flags)

Definition at line 8991 of file xlog.c.

References XLogCtlData::archiveCleanupCommand, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStatsData::ckpt_start_t, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestXmin(), GetWalRcvWriteRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, XLogCtlData::Insert, InvalidXLogRecPtr, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, minRecoveryPoint, ControlFileData::minRecoveryPointTLI, minRecoveryPointTLI, NULL, PreallocXlogFiles(), ControlFileData::prevCheckPoint, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, ThisTimeLineID, ControlFileData::time, timestamptz_to_str(), TruncateSUBTRANS(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, and XLogRecPtrIsInvalid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

8992 {
8993  XLogRecPtr lastCheckPointRecPtr;
8994  XLogRecPtr lastCheckPointEndPtr;
8995  CheckPoint lastCheckPoint;
8996  XLogRecPtr PriorRedoPtr;
8997  TimestampTz xtime;
8998 
8999  /*
9000  * Acquire CheckpointLock to ensure only one restartpoint or checkpoint
9001  * happens at a time.
9002  */
9003  LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
9004 
9005  /* Get a local copy of the last safe checkpoint record. */
9007  lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
9008  lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
9009  lastCheckPoint = XLogCtl->lastCheckPoint;
9011 
9012  /*
9013  * Check that we're still in recovery mode. It's ok if we exit recovery
9014  * mode after this check, the restart point is valid anyway.
9015  */
9016  if (!RecoveryInProgress())
9017  {
9018  ereport(DEBUG2,
9019  (errmsg("skipping restartpoint, recovery has already ended")));
9020  LWLockRelease(CheckpointLock);
9021  return false;
9022  }
9023 
9024  /*
9025  * If the last checkpoint record we've replayed is already our last
9026  * restartpoint, we can't perform a new restart point. We still update
9027  * minRecoveryPoint in that case, so that if this is a shutdown restart
9028  * point, we won't start up earlier than before. That's not strictly
9029  * necessary, but when hot standby is enabled, it would be rather weird if
9030  * the database opened up for read-only connections at a point-in-time
9031  * before the last shutdown. Such time travel is still possible in case of
9032  * immediate shutdown, though.
9033  *
9034  * We don't explicitly advance minRecoveryPoint when we do create a
9035  * restartpoint. It's assumed that flushing the buffers will do that as a
9036  * side-effect.
9037  */
9038  if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
9039  lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
9040  {
9041  ereport(DEBUG2,
9042  (errmsg("skipping restartpoint, already performed at %X/%X",
9043  (uint32) (lastCheckPoint.redo >> 32),
9044  (uint32) lastCheckPoint.redo)));
9045 
9047  if (flags & CHECKPOINT_IS_SHUTDOWN)
9048  {
9049  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9051  ControlFile->time = (pg_time_t) time(NULL);
9053  LWLockRelease(ControlFileLock);
9054  }
9055  LWLockRelease(CheckpointLock);
9056  return false;
9057  }
9058 
9059  /*
9060  * Update the shared RedoRecPtr so that the startup process can calculate
9061  * the number of segments replayed since last restartpoint, and request a
9062  * restartpoint if it exceeds CheckPointSegments.
9063  *
9064  * Like in CreateCheckPoint(), hold off insertions to update it, although
9065  * during recovery this is just pro forma, because no WAL insertions are
9066  * happening.
9067  */
9069  RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
9071 
9072  /* Also update the info_lck-protected copy */
9074  XLogCtl->RedoRecPtr = lastCheckPoint.redo;
9076 
9077  /*
9078  * Prepare to accumulate statistics.
9079  *
9080  * Note: because it is possible for log_checkpoints to change while a
9081  * checkpoint proceeds, we always accumulate stats, even if
9082  * log_checkpoints is currently off.
9083  */
9084  MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
9086 
9087  if (log_checkpoints)
9088  LogCheckpointStart(flags, true);
9089 
9090  CheckPointGuts(lastCheckPoint.redo, flags);
9091 
9092  /*
9093  * Remember the prior checkpoint's redo pointer, used later to determine
9094  * the point at which we can truncate the log.
9095  */
9096  PriorRedoPtr = ControlFile->checkPointCopy.redo;
9097 
9098  /*
9099  * Update pg_control, using current time. Check that it still shows
9100  * IN_ARCHIVE_RECOVERY state and an older checkpoint, else do nothing;
9101  * this is a quick hack to make sure nothing really bad happens if somehow
9102  * we get here after the end-of-recovery checkpoint.
9103  */
9104  LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9106  ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
9107  {
9109  ControlFile->checkPoint = lastCheckPointRecPtr;
9110  ControlFile->checkPointCopy = lastCheckPoint;
9111  ControlFile->time = (pg_time_t) time(NULL);
9112 
9113  /*
9114  * Ensure minRecoveryPoint is past the checkpoint record. Normally,
9115  * this will have happened already while writing out dirty buffers,
9116  * but not necessarily - e.g. because no buffers were dirtied. We do
9117  * this because a non-exclusive base backup uses minRecoveryPoint to
9118  * determine which WAL files must be included in the backup, and the
9119  * file (or files) containing the checkpoint record must be included,
9120  * at a minimum. Note that for an ordinary restart of recovery there's
9121  * no value in having the minimum recovery point any earlier than this
9122  * anyway, because redo will begin just after the checkpoint record.
9123  */
9124  if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
9125  {
9126  ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
9128 
9129  /* update local copy */
9132  }
9133  if (flags & CHECKPOINT_IS_SHUTDOWN)
9136  }
9137  LWLockRelease(ControlFileLock);
9138 
9139  /*
9140  * Delete old log files (those no longer needed even for previous
9141  * checkpoint/restartpoint) to prevent the disk holding the xlog from
9142  * growing full.
9143  */
9144  if (PriorRedoPtr != InvalidXLogRecPtr)
9145  {
9146  XLogRecPtr receivePtr;
9147  XLogRecPtr replayPtr;
9148  TimeLineID replayTLI;
9149  XLogRecPtr endptr;
9150  XLogSegNo _logSegNo;
9151 
9152  /* Update the average distance between checkpoints/restartpoints. */
9154 
9155  XLByteToSeg(PriorRedoPtr, _logSegNo);
9156 
9157  /*
9158  * Get the current end of xlog replayed or received, whichever is
9159  * later.
9160  */
9161  receivePtr = GetWalRcvWriteRecPtr(NULL, NULL);
9162  replayPtr = GetXLogReplayRecPtr(&replayTLI);
9163  endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
9164 
9165  KeepLogSeg(endptr, &_logSegNo);
9166  _logSegNo--;
9167 
9168  /*
9169  * Try to recycle segments on a useful timeline. If we've been
9170  * promoted since the beginning of this restartpoint, use the new
9171  * timeline chosen at end of recovery (RecoveryInProgress() sets
9172  * ThisTimeLineID in that case). If we're still in recovery, use the
9173  * timeline we're currently replaying.
9174  *
9175  * There is no guarantee that the WAL segments will be useful on the
9176  * current timeline; if recovery proceeds to a new timeline right
9177  * after this, the pre-allocated WAL segments on this timeline will
9178  * not be used, and will go wasted until recycled on the next
9179  * restartpoint. We'll live with that.
9180  */
9181  if (RecoveryInProgress())
9182  ThisTimeLineID = replayTLI;
9183 
9184  RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, endptr);
9185 
9186  /*
9187  * Make more log segments if needed. (Do this after recycling old log
9188  * segments, since that may supply some of the needed files.)
9189  */
9190  PreallocXlogFiles(endptr);
9191 
9192  /*
9193  * ThisTimeLineID is normally not set when we're still in recovery.
9194  * However, recycling/preallocating segments above needed
9195  * ThisTimeLineID to determine which timeline to install the segments
9196  * on. Reset it now, to restore the normal state of affairs for
9197  * debugging purposes.
9198  */
9199  if (RecoveryInProgress())
9200  ThisTimeLineID = 0;
9201  }
9202 
9203  /*
9204  * Truncate pg_subtrans if possible. We can throw away all data before
9205  * the oldest XMIN of any running transaction. No future transaction will
9206  * attempt to reference any pg_subtrans entry older than that (see Asserts
9207  * in subtrans.c). When hot standby is disabled, though, we mustn't do
9208  * this because StartupSUBTRANS hasn't been called yet.
9209  */
9210  if (EnableHotStandby)
9212 
9213  /* Real work is done, but log and update before releasing lock. */
9214  LogCheckpointEnd(true);
9215 
9216  xtime = GetLatestXTime();
9218  (errmsg("recovery restart point at %X/%X",
9219  (uint32) (lastCheckPoint.redo >> 32), (uint32) lastCheckPoint.redo),
9220  xtime ? errdetail("last completed transaction was at log time %s",
9221  timestamptz_to_str(xtime)) : 0));
9222 
9223  LWLockRelease(CheckpointLock);
9224 
9225  /*
9226  * Finally, execute archive_cleanup_command, if any.
9227  */
9230  "archive_cleanup_command",
9231  false);
9232 
9233  return true;
9234 }
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:8387
bool log_checkpoints
Definition: xlog.c:101
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void WALInsertLockRelease(void)
Definition: xlog.c:1635
pg_time_t time
Definition: pg_control.h:129
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1569
int64 TimestampTz
Definition: timestamp.h:39
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2670
TimestampTz ckpt_start_t
Definition: xlog.h:199
slock_t info_lck
Definition: xlog.c:697
#define MemSet(start, val, len)
Definition: c.h:853
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
Definition: xlog.c:3808
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:8921
TimestampTz GetLatestXTime(void)
Definition: xlog.c:6004
CheckPoint checkPointCopy
Definition: pg_control.h:133
XLogCtlInsert Insert
Definition: xlog.c:569
#define LOG
Definition: elog.h:26
bool RecoveryInProgress(void)
Definition: xlog.c:7805
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:342
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:666
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define SpinLockAcquire(lock)
Definition: spin.h:62
void UpdateControlFile(void)
Definition: xlog.c:4616
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:8302
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
Definition: xlog.c:10984
#define DEBUG2
Definition: elog.h:24
static XLogRecPtr RedoRecPtr
Definition: xlog.c:349
static void PreallocXlogFiles(XLogRecPtr endptr)
Definition: xlog.c:3717
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errdetail(const char *fmt,...)
Definition: elog.c:873
unsigned int uint32
Definition: c.h:265
XLogRecPtr RedoRecPtr
Definition: xlog.c:573
#define ereport(elevel, rest)
Definition: elog.h:122
CheckPoint lastCheckPoint
Definition: xlog.c:668
void ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
Definition: xlogarchive.c:330
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define SpinLockRelease(lock)
Definition: spin.h:64
static TimeLineID minRecoveryPointTLI
Definition: xlog.c:813
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:9245
XLogRecPtr prevCheckPoint
Definition: pg_control.h:131
static ControlFileData * ControlFile
Definition: xlog.c:708
TimeLineID ThisTimeLineID
Definition: xlog.c:178
TransactionId GetOldestXmin(Relation rel, bool ignoreVacuum)
Definition: procarray.c:1305
#define XLByteToSeg(xlrp, logSegNo)
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
CheckpointStatsData CheckpointStats
Definition: xlog.c:172
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1606
static XLogCtlData * XLogCtl
Definition: xlog.c:700
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
char archiveCleanupCommand[MAXPGPATH]
Definition: xlog.c:630
bool EnableHotStandby
Definition: xlog.c:95
TimeLineID ThisTimeLineID
Definition: pg_control.h:35
int errmsg(const char *fmt,...)
Definition: elog.c:797
XLogRecPtr RedoRecPtr
Definition: xlog.c:541
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:667
XLogRecPtr checkPoint
Definition: pg_control.h:130
XLogRecPtr redo
Definition: pg_control.h:33
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:8284
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:175
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
static XLogRecPtr minRecoveryPoint
Definition: xlog.c:811
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1709
bool DataChecksumsEnabled ( void  )

Definition at line 4671 of file xlog.c.

References Assert, ControlFileData::data_checksum_version, and NULL.

Referenced by PageIsVerified(), PageSetChecksumCopy(), PageSetChecksumInplace(), and ReadControlFile().

4672 {
4673  Assert(ControlFile != NULL);
4674  return (ControlFile->data_checksum_version > 0);
4675 }
uint32 data_checksum_version
Definition: pg_control.h:223
static ControlFileData * ControlFile
Definition: xlog.c:708
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
void do_pg_abort_backup ( void  )

Definition at line 10964 of file xlog.c.

References Assert, EXCLUSIVE_BACKUP_NONE, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, XLogCtlData::Insert, XLogCtlInsert::nonExclusiveBackups, WALInsertLockAcquireExclusive(), and WALInsertLockRelease().

Referenced by base_backup_cleanup(), and nonexclusive_base_backup_cleanup().

10965 {
10969 
10972  {
10973  XLogCtl->Insert.forcePageWrites = false;
10974  }
10976 }
static void WALInsertLockRelease(void)
Definition: xlog.c:1635
XLogCtlInsert Insert
Definition: xlog.c:569
bool forcePageWrites
Definition: xlog.c:542
int nonExclusiveBackups
Definition: xlog.c:555
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:554
#define Assert(condition)
Definition: c.h:671
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1606
static XLogCtlData * XLogCtl
Definition: xlog.c:700
XLogRecPtr do_pg_start_backup ( const char *  backupidstr,
bool  fast,
TimeLineID starttli_p,
StringInfo  labelfile,
DIR tblspcdir,
List **  tablespaces,
StringInfo  tblspcmapfile,
bool  infotbssize,
bool  needtblspcmapfile 
)

Definition at line 10087 of file xlog.c.

References AllocateFile(), appendStringInfo(), appendStringInfoChar(), BACKUP_LABEL_FILE, backup_started_in_recovery, BoolGetDatum, ControlFileData::checkPoint, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, dirent::d_name, StringInfoData::data, DataDir, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_NONE, EXCLUSIVE_BACKUP_STARTING, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, FreeFile(), CheckPoint::fullPageWrites, XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, StringInfoData::len, log_timezone, LW_SHARED, LWLockAcquire(), LWLockRelease(), makeStringInfo(), MAXFNAMELEN, MAXPGPATH, XLogCtlInsert::nonExclusiveBackups, NULL, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, pg_fsync(), pg_localtime(), pg_start_backup_callback(), pg_strftime(), pstrdup(), ReadDir(), RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, sendTablespace(), tablespaceinfo::size, snprintf(), SpinLockAcquire, SpinLockRelease, TABLESPACE_MAP, CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLByteToSeg, XLogFileName, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_start_backup().

10091 {
10092  bool exclusive = (labelfile == NULL);
10093  bool backup_started_in_recovery = false;
10094  XLogRecPtr checkpointloc;
10095  XLogRecPtr startpoint;
10096  TimeLineID starttli;
10097  pg_time_t stamp_time;
10098  char strfbuf[128];
10099  char xlogfilename[MAXFNAMELEN];
10100  XLogSegNo _logSegNo;
10101  struct stat stat_buf;
10102  FILE *fp;
10103 
10104  backup_started_in_recovery = RecoveryInProgress();
10105 
10106  /*
10107  * Currently only non-exclusive backup can be taken during recovery.
10108  */
10109  if (backup_started_in_recovery && exclusive)
10110  ereport(ERROR,
10111  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10112  errmsg("recovery is in progress"),
10113  errhint("WAL control functions cannot be executed during recovery.")));
10114 
10115  /*
10116  * During recovery, we don't need to check WAL level. Because, if WAL
10117  * level is not sufficient, it's impossible to get here during recovery.
10118  */
10119  if (!backup_started_in_recovery && !XLogIsNeeded())
10120  ereport(ERROR,
10121  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10122  errmsg("WAL level not sufficient for making an online backup"),
10123  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
10124 
10125  if (strlen(backupidstr) > MAXPGPATH)
10126  ereport(ERROR,
10127  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
10128  errmsg("backup label too long (max %d bytes)",
10129  MAXPGPATH)));
10130 
10131  /*
10132  * Mark backup active in shared memory. We must do full-page WAL writes
10133  * during an on-line backup even if not doing so at other times, because
10134  * it's quite possible for the backup dump to obtain a "torn" (partially
10135  * written) copy of a database page if it reads the page concurrently with
10136  * our write to the same page. This can be fixed as long as the first
10137  * write to the page in the WAL sequence is a full-page write. Hence, we
10138  * turn on forcePageWrites and then force a CHECKPOINT, to ensure there
10139  * are no dirty pages in shared memory that might get dumped while the
10140  * backup is in progress without having a corresponding WAL record. (Once
10141  * the backup is complete, we need not force full-page writes anymore,
10142  * since we expect that any pages not modified during the backup interval
10143  * must have been correctly captured by the backup.)
10144  *
10145  * Note that forcePageWrites has no effect during an online backup from
10146  * the standby.
10147  *
10148  * We must hold all the insertion locks to change the value of
10149  * forcePageWrites, to ensure adequate interlocking against
10150  * XLogInsertRecord().
10151  */
10153  if (exclusive)
10154  {
10155  /*
10156  * At first, mark that we're now starting an exclusive backup,
10157  * to ensure that there are no other sessions currently running
10158  * pg_start_backup() or pg_stop_backup().
10159  */
10161  {
10163  ereport(ERROR,
10164  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10165  errmsg("a backup is already in progress"),
10166  errhint("Run pg_stop_backup() and try again.")));
10167  }
10169  }
10170  else
10172  XLogCtl->Insert.forcePageWrites = true;
10174 
10175  /* Ensure we release forcePageWrites if fail below */
10177  {
10178  bool gotUniqueStartpoint = false;
10179  struct dirent *de;
10180  tablespaceinfo *ti;
10181  int datadirpathlen;
10182 
10183  /*
10184  * Force an XLOG file switch before the checkpoint, to ensure that the
10185  * WAL segment the checkpoint is written to doesn't contain pages with
10186  * old timeline IDs. That would otherwise happen if you called
10187  * pg_start_backup() right after restoring from a PITR archive: the
10188  * first WAL segment containing the startup checkpoint has pages in
10189  * the beginning with the old timeline ID. That can cause trouble at
10190  * recovery: we won't have a history file covering the old timeline if
10191  * pg_wal directory was not included in the base backup and the WAL
10192  * archive was cleared too before starting the backup.
10193  *
10194  * This also ensures that we have emitted a WAL page header that has
10195  * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
10196  * Therefore, if a WAL archiver (such as pglesslog) is trying to
10197  * compress out removable backup blocks, it won't remove any that
10198  * occur after this point.
10199  *
10200  * During recovery, we skip forcing XLOG file switch, which means that
10201  * the backup taken during recovery is not available for the special
10202  * recovery case described above.
10203  */
10204  if (!backup_started_in_recovery)
10205  RequestXLogSwitch(false);
10206 
10207  do
10208  {
10209  bool checkpointfpw;
10210 
10211  /*
10212  * Force a CHECKPOINT. Aside from being necessary to prevent torn
10213  * page problems, this guarantees that two successive backup runs
10214  * will have different checkpoint positions and hence different
10215  * history file names, even if nothing happened in between.
10216  *
10217  * During recovery, establish a restartpoint if possible. We use
10218  * the last restartpoint as the backup starting checkpoint. This
10219  * means that two successive backup runs can have same checkpoint
10220  * positions.
10221  *
10222  * Since the fact that we are executing do_pg_start_backup()
10223  * during recovery means that checkpointer is running, we can use
10224  * RequestCheckpoint() to establish a restartpoint.
10225  *
10226  * We use CHECKPOINT_IMMEDIATE only if requested by user (via
10227  * passing fast = true). Otherwise this can take awhile.
10228  */
10230  (fast ? CHECKPOINT_IMMEDIATE : 0));
10231 
10232  /*
10233  * Now we need to fetch the checkpoint record location, and also
10234  * its REDO pointer. The oldest point in WAL that would be needed
10235  * to restore starting from the checkpoint is precisely the REDO
10236  * pointer.
10237  */
10238  LWLockAcquire(ControlFileLock, LW_SHARED);
10239  checkpointloc = ControlFile->checkPoint;
10240  startpoint = ControlFile->checkPointCopy.redo;
10242  checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
10243  LWLockRelease(ControlFileLock);
10244 
10245  if (backup_started_in_recovery)
10246  {
10247  XLogRecPtr recptr;
10248 
10249  /*
10250  * Check to see if all WAL replayed during online backup
10251  * (i.e., since last restartpoint used as backup starting
10252  * checkpoint) contain full-page writes.
10253  */
10255  recptr = XLogCtl->lastFpwDisableRecPtr;
10257 
10258  if (!checkpointfpw || startpoint <= recptr)
10259  ereport(ERROR,
10260  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10261  errmsg("WAL generated with full_page_writes=off was replayed "
10262  "since last restartpoint"),
10263  errhint("This means that the backup being taken on the standby "
10264  "is corrupt and should not be used. "
10265  "Enable full_page_writes and run CHECKPOINT on the master, "
10266  "and then try an online backup again.")));
10267 
10268  /*
10269  * During recovery, since we don't use the end-of-backup WAL
10270  * record and don't write the backup history file, the
10271  * starting WAL location doesn't need to be unique. This means
10272  * that two base backups started at the same time might use
10273  * the same checkpoint as starting locations.
10274  */
10275  gotUniqueStartpoint = true;
10276  }
10277 
10278  /*
10279  * If two base backups are started at the same time (in WAL sender
10280  * processes), we need to make sure that they use different
10281  * checkpoints as starting locations, because we use the starting
10282  * WAL location as a unique identifier for the base backup in the
10283  * end-of-backup WAL record and when we write the backup history
10284  * file. Perhaps it would be better generate a separate unique ID
10285  * for each backup instead of forcing another checkpoint, but
10286  * taking a checkpoint right after another is not that expensive
10287  * either because only few buffers have been dirtied yet.
10288  */
10290  if (XLogCtl->Insert.lastBackupStart < startpoint)
10291  {
10292  XLogCtl->Insert.lastBackupStart = startpoint;
10293  gotUniqueStartpoint = true;
10294  }
10296  } while (!gotUniqueStartpoint);
10297 
10298  XLByteToSeg(startpoint, _logSegNo);
10299  XLogFileName(xlogfilename, starttli, _logSegNo);
10300 
10301  /*
10302  * Construct tablespace_map file
10303  */
10304  if (exclusive)
10305  tblspcmapfile = makeStringInfo();
10306 
10307  datadirpathlen = strlen(DataDir);
10308 
10309  /* Collect information about all tablespaces */
10310  while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
10311  {
10312  char fullpath[MAXPGPATH];
10313  char linkpath[MAXPGPATH];
10314  char *relpath = NULL;
10315  int rllen;
10316  StringInfoData buflinkpath;
10317  char *s = linkpath;
10318 
10319  /* Skip special stuff */
10320  if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
10321  continue;
10322 
10323  snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
10324 
10325 #if defined(HAVE_READLINK) || defined(WIN32)
10326  rllen = readlink(fullpath, linkpath, sizeof(linkpath));
10327  if (rllen < 0)
10328  {
10329  ereport(WARNING,
10330  (errmsg("could not read symbolic link \"%s\": %m",
10331  fullpath)));
10332  continue;
10333  }
10334  else if (rllen >= sizeof(linkpath))
10335  {
10336  ereport(WARNING,
10337  (errmsg("symbolic link \"%s\" target is too long",
10338  fullpath)));
10339  continue;
10340  }
10341  linkpath[rllen] = '\0';
10342 
10343  /*
10344  * Add the escape character '\\' before newline in a string to
10345  * ensure that we can distinguish between the newline in the
10346  * tablespace path and end of line while reading tablespace_map
10347  * file during archive recovery.
10348  */
10349  initStringInfo(&buflinkpath);
10350 
10351  while (*s)
10352  {
10353  if ((*s == '\n' || *s == '\r') && needtblspcmapfile)
10354  appendStringInfoChar(&buflinkpath, '\\');
10355  appendStringInfoChar(&buflinkpath, *s++);
10356  }
10357 
10358 
10359  /*
10360  * Relpath holds the relative path of the tablespace directory
10361  * when it's located within PGDATA, or NULL if it's located
10362  * elsewhere.
10363  */
10364  if (rllen > datadirpathlen &&
10365  strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
10366  IS_DIR_SEP(linkpath[datadirpathlen]))
10367  relpath = linkpath + datadirpathlen + 1;
10368 
10369  ti = palloc(sizeof(tablespaceinfo));
10370  ti->oid = pstrdup(de->d_name);
10371  ti->path = pstrdup(buflinkpath.data);
10372  ti->rpath = relpath ? pstrdup(relpath) : NULL;
10373  ti->size = infotbssize ? sendTablespace(fullpath, true) : -1;
10374 
10375  if (tablespaces)
10376  *tablespaces = lappend(*tablespaces, ti);
10377 
10378  appendStringInfo(tblspcmapfile, "%s %s\n", ti->oid, ti->path);
10379 
10380  pfree(buflinkpath.data);
10381 #else
10382 
10383  /*
10384  * If the platform does not have symbolic links, it should not be
10385  * possible to have tablespaces - clearly somebody else created
10386  * them. Warn about it and ignore.
10387  */
10388  ereport(WARNING,
10389  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
10390  errmsg("tablespaces are not supported on this platform")));
10391 #endif
10392  }
10393 
10394  /*
10395  * Construct backup label file
10396  */
10397  if (exclusive)
10398  labelfile = makeStringInfo();
10399 
10400  /* Use the log timezone here, not the session timezone */
10401  stamp_time = (pg_time_t) time(NULL);
10402  pg_strftime(strfbuf, sizeof(strfbuf),
10403  "%Y-%m-%d %H:%M:%S %Z",
10404  pg_localtime(&stamp_time, log_timezone));
10405  appendStringInfo(labelfile, "START WAL LOCATION: %X/%X (file %s)\n",
10406  (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename);
10407  appendStringInfo(labelfile, "CHECKPOINT LOCATION: %X/%X\n",
10408  (uint32) (checkpointloc >> 32), (uint32) checkpointloc);
10409  appendStringInfo(labelfile, "BACKUP METHOD: %s\n",
10410  exclusive ? "pg_start_backup" : "streamed");
10411  appendStringInfo(labelfile, "BACKUP FROM: %s\n",
10412  backup_started_in_recovery ? "standby" : "master");
10413  appendStringInfo(labelfile, "START TIME: %s\n", strfbuf);
10414  appendStringInfo(labelfile, "LABEL: %s\n", backupidstr);
10415 
10416  /*
10417  * Okay, write the file, or return its contents to caller.
10418  */
10419  if (exclusive)
10420  {
10421  /*
10422  * Check for existing backup label --- implies a backup is already
10423  * running. (XXX given that we checked exclusiveBackupState above,
10424  * maybe it would be OK to just unlink any such label file?)
10425  */
10426  if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
10427  {
10428  if (errno != ENOENT)
10429  ereport(ERROR,
10431  errmsg("could not stat file \"%s\": %m",
10432  BACKUP_LABEL_FILE)));
10433  }
10434  else
10435  ereport(ERROR,
10436  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10437  errmsg("a backup is already in progress"),
10438  errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
10439  BACKUP_LABEL_FILE)));
10440 
10441  fp = AllocateFile(BACKUP_LABEL_FILE, "w");
10442 
10443  if (!fp)
10444  ereport(ERROR,
10446  errmsg("could not create file \"%s\": %m",
10447  BACKUP_LABEL_FILE)));
10448  if (fwrite(labelfile->data, labelfile->len, 1, fp) != 1 ||
10449  fflush(fp) != 0 ||
10450  pg_fsync(fileno(fp)) != 0 ||
10451  ferror(fp) ||
10452  FreeFile(fp))
10453  ereport(ERROR,
10455  errmsg("could not write file \"%s\": %m",
10456  BACKUP_LABEL_FILE)));
10457  /* Allocated locally for exclusive backups, so free separately */
10458  pfree(labelfile->data);
10459  pfree(labelfile);
10460 
10461  /* Write backup tablespace_map file. */
10462  if (tblspcmapfile->len > 0)
10463  {
10464  if (stat(TABLESPACE_MAP, &stat_buf) != 0)
10465  {
10466  if (errno != ENOENT)
10467  ereport(ERROR,
10469  errmsg("could not stat file \"%s\": %m",
10470  TABLESPACE_MAP)));
10471  }
10472  else
10473  ereport(ERROR,
10474  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10475  errmsg("a backup is already in progress"),
10476  errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
10477  TABLESPACE_MAP)));
10478 
10479  fp = AllocateFile(TABLESPACE_MAP, "w");
10480 
10481  if (!fp)
10482  ereport(ERROR,
10484  errmsg("could not create file \"%s\": %m",
10485  TABLESPACE_MAP)));
10486  if (fwrite(tblspcmapfile->data, tblspcmapfile->len, 1, fp) != 1 ||
10487  fflush(fp) != 0 ||
10488  pg_fsync(fileno(fp)) != 0 ||
10489  ferror(fp) ||
10490  FreeFile(fp))
10491  ereport(ERROR,
10493  errmsg("could not write file \"%s\": %m",
10494  TABLESPACE_MAP)));
10495  }
10496 
10497  /* Allocated locally for exclusive backups, so free separately */
10498  pfree(tblspcmapfile->data);
10499  pfree(tblspcmapfile);
10500  }
10501  }
10503 
10504  /*
10505  * Mark that start phase has correctly finished for an exclusive backup.
10506  */
10507  if (exclusive)
10508  {
10512  }
10513 
10514  /*
10515  * We're done. As a convenience, return the starting WAL location.
10516  */
10517  if (starttli_p)
10518  *starttli_p = starttli;
10519  return startpoint;
10520 }
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:124
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:9322
int errhint(const char *fmt,...)
Definition: elog.c:987
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1635
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:695
XLogRecPtr lastBackupStart
Definition: xlog.c:556
char * pstrdup(const char *in)
Definition: mcxt.c:1165
#define XLogIsNeeded()
Definition: xlog.h:145
char * rpath
Definition: basebackup.h:28
StringInfo makeStringInfo(void)
Definition: stringinfo.c:29
slock_t info_lck
Definition: xlog.c:697
#define XLogFileName(fname, tli, logSegNo)
int errcode(int sqlerrcode)
Definition: elog.c:575
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
CheckPoint checkPointCopy
Definition: pg_control.h:133
XLogCtlInsert Insert
Definition: xlog.c:569
bool RecoveryInProgress(void)
Definition: xlog.c:7805
static bool backup_started_in_recovery
Definition: basebackup.c:73
Definition: dirent.h:9
#define IS_DIR_SEP(ch)
Definition: port.h:75
pg_tz * log_timezone
Definition: pgtz.c:30
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define TABLESPACE_MAP
Definition: xlog.h:304
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
void pfree(void *pointer)
Definition: mcxt.c:992
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:110
bool forcePageWrites
Definition: xlog.c:542
#define ERROR
Definition: elog.h:43
struct stat stat_buf
Definition: pg_standby.c:101
#define MAXPGPATH
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errcode_for_file_access(void)
Definition: elog.c:598
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2043
unsigned int uint32
Definition: c.h:265
int64 sendTablespace(char *path, bool sizeonly)
Definition: basebackup.c:903
#define CHECKPOINT_FORCE
Definition: xlog.h:180
#define ereport(elevel, rest)
Definition: elog.h:122
List * lappend(List *list, void *datum)
Definition: list.c:128
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:201
void initStringInfo(StringInfo str)
Definition: stringinfo.c:65
#define WARNING
Definition: elog.h:40
int nonExclusiveBackups
Definition: xlog.c:555
#define MAXFNAMELEN
#define SpinLockRelease(lock)
Definition: spin.h:64
static void pg_start_backup_callback(int code, Datum arg)
Definition: xlog.c:10524
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:554
uintptr_t Datum
Definition: postgres.h:374
static ControlFileData * ControlFile
Definition: xlog.c:708
#define BoolGetDatum(X)
Definition: postgres.h:410
bool fullPageWrites
Definition: pg_control.h:38
#define CHECKPOINT_WAIT
Definition: xlog.h:184
#define XLByteToSeg(xlrp, logSegNo)
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2350
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1606
static XLogCtlData * XLogCtl
Definition: xlog.c:700
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1254
int FreeFile(FILE *file)
Definition: fd.c:2226
void * palloc(Size size)
Definition: mcxt.c:891
TimeLineID ThisTimeLineID
Definition: pg_control.h:35
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:179
#define relpath(rnode, forknum)
Definition: relpath.h:71
char * DataDir
Definition: globals.c:59
#define BACKUP_LABEL_FILE
Definition: xlog.h:301
int pg_fsync(int fd)
Definition: fd.c:333
char d_name[MAX_PATH]
Definition: dirent.h:14
XLogRecPtr checkPoint
Definition: pg_control.h:130
XLogRecPtr redo
Definition: pg_control.h:33
void RequestCheckpoint(int flags)
Definition: checkpointer.c:967
XLogRecPtr do_pg_stop_backup ( char *  labelfile,
bool  waitforarchive,
TimeLineID stoptli_p 
)

Definition at line 10581 of file xlog.c.

References AllocateFile(), Assert, BACKUP_LABEL_FILE, backup_started_in_recovery, BackupHistoryFileName, BackupHistoryFilePath, BoolGetDatum, CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXCLUSIVE_BACKUP_IN_PROGRESS, EXCLUSIVE_BACKUP_NONE, EXCLUSIVE_BACKUP_STOPPING, XLogCtlInsert::exclusiveBackupState, XLogCtlInsert::forcePageWrites, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::lastFpwDisableRecPtr, log_timezone, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlInsert::nonExclusiveBackups, NOTICE, NULL, palloc(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, pg_localtime(), pg_stop_backup_callback(), pg_strftime(), pg_usleep(), RecoveryInProgress(), remaining, RequestXLogSwitch(), SpinLockAcquire, SpinLockRelease, TABLESPACE_MAP, ThisTimeLineID, unlink(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogBeginInsert(), XLogFileName, XLogInsert(), XLogIsNeeded, XLogRegisterData(), and XLogSegSize.

Referenced by perform_base_backup(), pg_stop_backup(), and pg_stop_backup_v2().

10582 {
10583  bool exclusive = (labelfile == NULL);
10584  bool backup_started_in_recovery = false;
10585  XLogRecPtr startpoint;
10586  XLogRecPtr stoppoint;
10587  TimeLineID stoptli;
10588  pg_time_t stamp_time;
10589  char strfbuf[128];
10590  char histfilepath[MAXPGPATH];
10591  char startxlogfilename[MAXFNAMELEN];
10592  char stopxlogfilename[MAXFNAMELEN];
10593  char lastxlogfilename[MAXFNAMELEN];
10594  char histfilename[MAXFNAMELEN];
10595  char backupfrom[20];
10596  XLogSegNo _logSegNo;
10597  FILE *lfp;
10598  FILE *fp;
10599  char ch;
10600  int seconds_before_warning;
10601  int waits = 0;
10602  bool reported_waiting = false;
10603  char *remaining;
10604  char *ptr;
10605  uint32 hi,
10606  lo;
10607 
10608  backup_started_in_recovery = RecoveryInProgress();
10609 
10610  /*
10611  * Currently only non-exclusive backup can be taken during recovery.
10612  */
10613  if (backup_started_in_recovery && exclusive)
10614  ereport(ERROR,
10615  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10616  errmsg("recovery is in progress"),
10617  errhint("WAL control functions cannot be executed during recovery.")));
10618 
10619  /*
10620  * During recovery, we don't need to check WAL level. Because, if WAL
10621  * level is not sufficient, it's impossible to get here during recovery.
10622  */
10623  if (!backup_started_in_recovery && !XLogIsNeeded())
10624  ereport(ERROR,
10625  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10626  errmsg("WAL level not sufficient for making an online backup"),
10627  errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
10628 
10629  if (exclusive)
10630  {
10631  /*
10632  * At first, mark that we're now stopping an exclusive backup,
10633  * to ensure that there are no other sessions currently running
10634  * pg_start_backup() or pg_stop_backup().
10635  */
10638  {
10640  ereport(ERROR,
10641  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10642  errmsg("exclusive backup not in progress")));
10643  }
10646 
10647  /*
10648  * Remove backup_label. In case of failure, the state for an exclusive
10649  * backup is switched back to in-progress.
10650  */
10652  {
10653  /*
10654  * Read the existing label file into memory.
10655  */
10656  struct stat statbuf;
10657  int r;
10658 
10659  if (stat(BACKUP_LABEL_FILE, &statbuf))
10660  {
10661  /* should not happen per the upper checks */
10662  if (errno != ENOENT)
10663  ereport(ERROR,
10665  errmsg("could not stat file \"%s\": %m",
10666  BACKUP_LABEL_FILE)));
10667  ereport(ERROR,
10668  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10669  errmsg("a backup is not in progress")));
10670  }
10671 
10672  lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
10673  if (!lfp)
10674  {
10675  ereport(ERROR,
10677  errmsg("could not read file \"%s\": %m",
10678  BACKUP_LABEL_FILE)));
10679  }
10680  labelfile = palloc(statbuf.st_size + 1);
10681  r = fread(labelfile, statbuf.st_size, 1, lfp);
10682  labelfile[statbuf.st_size] = '\0';
10683 
10684  /*
10685  * Close and remove the backup label file
10686  */
10687  if (r != 1 || ferror(lfp) || FreeFile(lfp))
10688  ereport(ERROR,
10690  errmsg("could not read file \"%s\": %m",
10691  BACKUP_LABEL_FILE)));
10692  if (unlink(BACKUP_LABEL_FILE) != 0)
10693  ereport(ERROR,
10695  errmsg("could not remove file \"%s\": %m",
10696  BACKUP_LABEL_FILE)));
10697 
10698  /*
10699  * Remove tablespace_map file if present, it is created only if there
10700  * are tablespaces.
10701  */
10703  }
10705  }
10706 
10707  /*
10708  * OK to update backup counters and forcePageWrites
10709  */
10711  if (exclusive)
10712  {
10714  }
10715  else
10716  {
10717  /*
10718  * The user-visible pg_start/stop_backup() functions that operate on
10719  * exclusive backups can be called at any time, but for non-exclusive
10720  * backups, it is expected that each do_pg_start_backup() call is
10721  * matched by exactly one do_pg_stop_backup() call.
10722  */
10725  }
10726 
10729  {
10730  XLogCtl->Insert.forcePageWrites = false;
10731  }
10733 
10734  /*
10735  * Read and parse the START WAL LOCATION line (this code is pretty crude,
10736  * but we are not expecting any variability in the file format).
10737  */
10738  if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c",
10739  &hi, &lo, startxlogfilename,
10740  &ch) != 4 || ch != '\n')
10741  ereport(ERROR,
10742  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10743  errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
10744  startpoint = ((uint64) hi) << 32 | lo;
10745  remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */
10746 
10747  /*
10748  * Parse the BACKUP FROM line. If we are taking an online backup from the
10749  * standby, we confirm that the standby has not been promoted during the
10750  * backup.
10751  */
10752  ptr = strstr(remaining, "BACKUP FROM:");
10753  if (!ptr || sscanf(ptr, "BACKUP FROM: %19s\n", backupfrom) != 1)
10754  ereport(ERROR,
10755  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10756  errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
10757  if (strcmp(backupfrom, "standby") == 0 && !backup_started_in_recovery)
10758  ereport(ERROR,
10759  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10760  errmsg("the standby was promoted during online backup"),
10761  errhint("This means that the backup being taken is corrupt "
10762  "and should not be used. "
10763  "Try taking another online backup.")));
10764 
10765  /*
10766  * During recovery, we don't write an end-of-backup record. We assume that
10767  * pg_control was backed up last and its minimum recovery point can be
10768  * available as the backup end location. Since we don't have an
10769  * end-of-backup record, we use the pg_control value to check whether
10770  * we've reached the end of backup when starting recovery from this
10771  * backup. We have no way of checking if pg_control wasn't backed up last
10772  * however.
10773  *
10774  * We don't force a switch to new WAL file and wait for all the required
10775  * files to be archived. This is okay if we use the backup to start the
10776  * standby. But, if it's for an archive recovery, to ensure all the
10777  * required files are available, a user should wait for them to be
10778  * archived, or include them into the backup.
10779  *
10780  * We return the current minimum recovery point as the backup end
10781  * location. Note that it can be greater than the exact backup end
10782  * location if the minimum recovery point is updated after the backup of
10783  * pg_control. This is harmless for current uses.
10784  *
10785  * XXX currently a backup history file is for informational and debug
10786  * purposes only. It's not essential for an online backup. Furthermore,
10787  * even if it's created, it will not be archived during recovery because
10788  * an archiver is not invoked. So it doesn't seem worthwhile to write a
10789  * backup history file during recovery.
10790  */
10791  if (backup_started_in_recovery)
10792  {
10793  XLogRecPtr recptr;
10794 
10795  /*
10796  * Check to see if all WAL replayed during online backup contain
10797  * full-page writes.
10798  */
10800  recptr = XLogCtl->lastFpwDisableRecPtr;
10802 
10803  if (startpoint <= recptr)
10804  ereport(ERROR,
10805  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
10806  errmsg("WAL generated with full_page_writes=off was replayed "
10807  "during online backup"),
10808  errhint("This means that the backup being taken on the standby "
10809  "is corrupt and should not be used. "
10810  "Enable full_page_writes and run CHECKPOINT on the master, "
10811  "and then try an online backup again.")));
10812 
10813 
10814  LWLockAcquire(ControlFileLock, LW_SHARED);
10815  stoppoint = ControlFile->minRecoveryPoint;
10816  stoptli = ControlFile->minRecoveryPointTLI;
10817  LWLockRelease(ControlFileLock);
10818 
10819  if (stoptli_p)
10820  *stoptli_p = stoptli;
10821  return stoppoint;
10822  }
10823 
10824  /*
10825  * Write the backup-end xlog record
10826  */
10827  XLogBeginInsert();
10828  XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
10829  stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
10830  stoptli = ThisTimeLineID;
10831 
10832  /*
10833  * Force a switch to a new xlog segment file, so that the backup is valid
10834  * as soon as archiver moves out the current segment file.
10835  */
10836  RequestXLogSwitch(false);
10837 
10838  XLByteToPrevSeg(stoppoint, _logSegNo);
10839  XLogFileName(stopxlogfilename, ThisTimeLineID, _logSegNo);
10840 
10841  /* Use the log timezone here, not the session timezone */
10842  stamp_time = (pg_time_t) time(NULL);
10843  pg_strftime(strfbuf, sizeof(strfbuf),
10844  "%Y-%m-%d %H:%M:%S %Z",
10845  pg_localtime(&stamp_time, log_timezone));
10846 
10847  /*
10848  * Write the backup history file
10849  */
10850  XLByteToSeg(startpoint, _logSegNo);
10851  BackupHistoryFilePath(histfilepath, ThisTimeLineID, _logSegNo,
10852  (uint32) (startpoint % XLogSegSize));
10853  fp = AllocateFile(histfilepath, "w");
10854  if (!fp)
10855  ereport(ERROR,
10857  errmsg("could not create file \"%s\": %m",
10858  histfilepath)));
10859  fprintf(fp, "START WAL LOCATION: %X/%X (file %s)\n",
10860  (uint32) (startpoint >> 32), (uint32) startpoint, startxlogfilename);
10861  fprintf(fp, "STOP WAL LOCATION: %X/%X (file %s)\n",
10862  (uint32) (stoppoint >> 32), (uint32) stoppoint, stopxlogfilename);
10863  /* transfer remaining lines from label to history file */
10864  fprintf(fp, "%s", remaining);
10865  fprintf(fp, "STOP TIME: %s\n", strfbuf);
10866  if (fflush(fp) || ferror(fp) || FreeFile(fp))
10867  ereport(ERROR,
10869  errmsg("could not write file \"%s\": %m",
10870  histfilepath)));
10871 
10872  /*
10873  * Clean out any no-longer-needed history files. As a side effect, this
10874  * will post a .ready file for the newly created history file, notifying
10875  * the archiver that history file may be archived immediately.
10876  */
10878 
10879  /*
10880  * If archiving is enabled, wait for all the required WAL files to be
10881  * archived before returning. If archiving isn't enabled, the required WAL
10882  * needs to be transported via streaming replication (hopefully with
10883  * wal_keep_segments set high enough), or some more exotic mechanism like
10884  * polling and copying files from pg_wal with script. We have no
10885  * knowledge of those mechanisms, so it's up to the user to ensure that he
10886  * gets all the required WAL.
10887  *
10888  * We wait until both the last WAL file filled during backup and the
10889  * history file have been archived, and assume that the alphabetic sorting
10890  * property of the WAL files ensures any earlier WAL files are safely
10891  * archived as well.
10892  *
10893  * We wait forever, since archive_command is supposed to work and we
10894  * assume the admin wanted his backup to work completely. If you don't
10895  * wish to wait, you can set statement_timeout. Also, some notices are
10896  * issued to clue in anyone who might be doing this interactively.
10897  */
10898  if (waitforarchive && XLogArchivingActive())
10899  {
10900  XLByteToPrevSeg(stoppoint, _logSegNo);
10901  XLogFileName(lastxlogfilename, ThisTimeLineID, _logSegNo);
10902 
10903  XLByteToSeg(startpoint, _logSegNo);
10904  BackupHistoryFileName(histfilename, ThisTimeLineID, _logSegNo,
10905  (uint32) (startpoint % XLogSegSize));
10906 
10907  seconds_before_warning = 60;
10908  waits = 0;
10909 
10910  while (XLogArchiveIsBusy(lastxlogfilename) ||
10911  XLogArchiveIsBusy(histfilename))
10912  {
10914 
10915  if (!reported_waiting && waits > 5)
10916  {
10917  ereport(NOTICE,
10918  (errmsg("pg_stop_backup cleanup done, waiting for required WAL segments to be archived")));
10919  reported_waiting = true;
10920  }
10921 
10922  pg_usleep(1000000L);
10923 
10924  if (++waits >= seconds_before_warning)
10925  {
10926  seconds_before_warning *= 2; /* This wraps in >10 years... */
10927  ereport(WARNING,
10928  (errmsg("pg_stop_backup still waiting for all required WAL segments to be archived (%d seconds elapsed)",
10929  waits),
10930  errhint("Check that your archive_command is executing properly. "
10931  "pg_stop_backup can be canceled safely, "
10932  "but the database backup will not be usable without all the WAL segments.")));
10933  }
10934  }
10935 
10936  ereport(NOTICE,
10937  (errmsg("pg_stop_backup complete, all required WAL segments have been archived")));
10938  }
10939  else if (waitforarchive)
10940  ereport(NOTICE,
10941  (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
10942 
10943  /*
10944  * We're done. As a convenience, return the ending WAL location.
10945  */
10946  if (stoptli_p)
10947  *stoptli_p = stoptli;
10948  return stoppoint;
10949 }
int remaining
Definition: informix.c:692
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:124
#define XLogSegSize
Definition: xlog_internal.h:92
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:9322
int errhint(const char *fmt,...)
Definition: elog.c:987
uint32 TimeLineID
Definition: xlogdefs.h:45
int64 pg_time_t
Definition: pgtime.h:23
static void WALInsertLockRelease(void)
Definition: xlog.c:1635
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:695
#define XLogIsNeeded()
Definition: xlog.h:145
slock_t info_lck
Definition: xlog.c:697
#define XLogFileName(fname, tli, logSegNo)
int errcode(int sqlerrcode)
Definition: elog.c:575
XLogCtlInsert Insert
Definition: xlog.c:569
bool RecoveryInProgress(void)
Definition: xlog.c:7805
static bool backup_started_in_recovery
Definition: basebackup.c:73
pg_tz * log_timezone
Definition: pgtz.c:30
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define TABLESPACE_MAP
Definition: xlog.h:304
#define SpinLockAcquire(lock)
Definition: spin.h:62
void pg_usleep(long microsec)
Definition: signal.c:53
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
bool forcePageWrites
Definition: xlog.c:542
#define ERROR
Definition: elog.h:43
static void CleanupBackupHistory(void)
Definition: xlog.c:4081
#define MAXPGPATH
uint64 XLogSegNo
Definition: xlogdefs.h:34
int errcode_for_file_access(void)
Definition: elog.c:598
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2043
unsigned int uint32
Definition: c.h:265
#define XLByteToPrevSeg(xlrp, logSegNo)
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
#define XLOG_BACKUP_END
Definition: pg_control.h:69
#define WARNING
Definition: elog.h:40
int nonExclusiveBackups
Definition: xlog.c:555
#define MAXFNAMELEN
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
ExclusiveBackupState exclusiveBackupState
Definition: xlog.c:554
uintptr_t Datum
Definition: postgres.h:374
static ControlFileData * ControlFile
Definition: xlog.c:708
#define BoolGetDatum(X)
Definition: postgres.h:410
TimeLineID ThisTimeLineID
Definition: xlog.c:178
#define NOTICE
Definition: elog.h:37
#define XLByteToSeg(xlrp, logSegNo)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:658
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:671
#define XLogArchivingActive()
Definition: xlog.h:134
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1606
static XLogCtlData * XLogCtl
Definition: xlog.c:700
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
static void pg_stop_backup_callback(int code, Datum arg)
Definition: xlog.c:10553
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1254
int FreeFile(FILE *file)
Definition: fd.c:2226
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define BackupHistoryFilePath(path, tli, logSegNo, offset)
#define BACKUP_LABEL_FILE
Definition: xlog.h:301
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define BackupHistoryFileName(fname, tli, logSegNo, offset)
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
TimestampTz GetCurrentChunkReplayStartTime ( void  )

Definition at line 6034 of file xlog.c.

References XLogCtlData::currentChunkStartTime, XLogCtlData::info_lck, SpinLockAcquire, and SpinLockRelease.

Referenced by GetReplicationApplyDelay().

6035 {
6036  TimestampTz xtime;
6037 
6039  xtime = XLogCtl->currentChunkStartTime;
6041 
6042  return xtime;
6043 }
int64 TimestampTz
Definition: timestamp.h:39
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:700
TimestampTz currentChunkStartTime
Definition: xlog.c:687
XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4687 of file xlog.c.

References SpinLockAcquire, SpinLockRelease, XLogCtlData::ulsn_lck, and XLogCtlData::unloggedLSN.

Referenced by gistGetFakeLSN().

4688 {
4689  XLogRecPtr nextUnloggedLSN;
4690 
4691  /* increment the unloggedLSN counter, need SpinLock */
4693  nextUnloggedLSN = XLogCtl->unloggedLSN++;
4695 
4696  return nextUnloggedLSN;
4697 }
XLogRecPtr unloggedLSN
Definition: xlog.c:583
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static XLogCtlData * XLogCtl
Definition: xlog.c:700
slock_t ulsn_lck
Definition: xlog.c:584
XLogRecPtr GetFlushRecPtr ( void  )

Definition at line 8154 of file xlog.c.

References XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::LogwrtResult, SpinLockAcquire, and SpinLockRelease.

Referenced by get_flush_position(), IdentifySystem(), pg_current_wal_flush_location(), pg_logical_slot_get_changes_guts(), read_local_xlog_page(), StartReplication(), WalSndWaitForWal(), XLogSendLogical(), and XLogSendPhysical().

8155 {
8159 
8160  return LogwrtResult.Flush;
8161 }
static XLogwrtResult LogwrtResult
Definition: xlog.c:738
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
XLogwrtResult LogwrtResult
Definition: xlog.c:594
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:700
XLogRecPtr Flush
Definition: xlog.c:423
void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool doPageWrites_p 
)

Definition at line 8123 of file xlog.c.

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

8124 {
8125  *RedoRecPtr_p = RedoRecPtr;
8126  *doPageWrites_p = doPageWrites;
8127 }
static bool doPageWrites
Definition: xlog.c:356
static XLogRecPtr RedoRecPtr
Definition: xlog.c:349
XLogRecPtr GetInsertRecPtr ( void  )

Definition at line 8138 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::LogwrtRqst, SpinLockAcquire, SpinLockRelease, and XLogwrtRqst::Write.

Referenced by CheckpointerMain(), and IsCheckpointOnSchedule().

8139 {
8140  XLogRecPtr recptr;
8141 
8143  recptr = XLogCtl->LogwrtRqst.Write;
8145 
8146  return recptr;
8147 }
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
XLogRecPtr Write
Definition: xlog.c:416
XLogwrtRqst LogwrtRqst
Definition: xlog.c:572
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static XLogCtlData * XLogCtl
Definition: xlog.c:700
XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 8172 of file xlog.c.

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and NUM_XLOGINSERT_LOCKS.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

8173 {
8175  int i;
8176 
8177  for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
8178  {
8179  XLogRecPtr last_important;
8180 
8181  /*
8182  * Need to take a lock to prevent torn reads of the LSN, which are
8183  * possible on some of the supported platforms. WAL insert locks only
8184  * support exclusive mode, so we have to use that.
8185  */
8187  last_important = WALInsertLocks[i].l.lastImportantAt;
8188  LWLockRelease(&WALInsertLocks[i].l.lock);
8189 
8190  if (res < last_important)
8191  res = last_important;
8192  }
8193 
8194  return res;
8195 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr lastImportantAt
Definition: xlog.c:466
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:117
WALInsertLock l
Definition: xlog.c:478
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
uint64 XLogRecPtr
Definition: xlogdefs.h:21
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
int i
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:703
TimestampTz GetLatestXTime ( void  )

Definition at line 6004 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::recoveryLastXTime, SpinLockAcquire, and SpinLockRelease.

Referenced by CreateRestartPoint(), pg_last_xact_replay_timestamp(), and StartupXLOG().

6005 {
6006  TimestampTz xtime;
6007 
6009  xtime = XLogCtl->recoveryLastXTime;
6011 
6012  return xtime;
6013 }
int64 TimestampTz
Definition: timestamp.h:39
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
TimestampTz recoveryLastXTime
Definition: xlog.c:681
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:700
void GetNextXidAndEpoch ( TransactionId xid,
uint32 epoch 
)

Definition at line 8223 of file xlog.c.

References XLogCtlData::ckptXid, XLogCtlData::ckptXidEpoch, XLogCtlData::info_lck, ReadNewTransactionId(), SpinLockAcquire, and SpinLockRelease.

Referenced by load_xid_epoch(), ProcessStandbyHSFeedbackMessage(), and XLogWalRcvSendHSFeedback().

8224 {
8225  uint32 ckptXidEpoch;
8226  TransactionId ckptXid;
8227  TransactionId nextXid;
8228 
8229  /* Must read checkpoint info first, else have race condition */
8231  ckptXidEpoch = XLogCtl->ckptXidEpoch;
8232  ckptXid = XLogCtl->ckptXid;
8234 
8235  /* Now fetch current nextXid */
8236  nextXid = ReadNewTransactionId();
8237 
8238  /*
8239  * nextXid is certainly logically later than ckptXid. So if it's
8240  * numerically less, it must have wrapped into the next epoch.
8241  */
8242  if (nextXid < ckptXid)
8243  ckptXidEpoch++;
8244 
8245  *xid = nextXid;
8246  *epoch = ckptXidEpoch;
8247 }
TransactionId ckptXid
Definition: xlog.c:575
uint32 TransactionId
Definition: c.h:394
slock_t info_lck
Definition: xlog.c:697
uint32 ckptXidEpoch
Definition: xlog.c:574
#define SpinLockAcquire(lock)
Definition: spin.h:62
TransactionId ReadNewTransactionId(void)
Definition: varsup.c:250
unsigned int uint32
Definition: c.h:265
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:700
static const unsigned __int64 epoch
Definition: gettimeofday.c:34
XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 8095 of file xlog.c.

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, and SpinLockRelease.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), InitXLOGAccess(), nextval_internal(), ReplicationSlotReserveWal(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

8096 {
8097  XLogRecPtr ptr;
8098 
8099  /*
8100  * The possibly not up-to-date copy in XlogCtl is enough. Even if we
8101  * grabbed a WAL insertion lock to read the master copy, someone might
8102  * update it just after we've released the lock.
8103  */
8105  ptr = XLogCtl->RedoRecPtr;
8107 
8108  if (RedoRecPtr < ptr)
8109  RedoRecPtr = ptr;
8110 
8111  return RedoRecPtr;
8112 }
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
static XLogRecPtr RedoRecPtr
Definition: xlog.c:349
XLogRecPtr RedoRecPtr
Definition: xlog.c:573
#define SpinLockRelease(lock)
Definition: spin.h:64
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static XLogCtlData * XLogCtl
Definition: xlog.c:700
uint64 GetSystemIdentifier ( void  )

Definition at line 4661 of file xlog.c.

References Assert, NULL, and ControlFileData::system_identifier.

Referenced by IdentifySystem(), and WalReceiverMain().

4662 {
4663  Assert(ControlFile != NULL);
4665 }
uint64 system_identifier
Definition: pg_control.h:107
static ControlFileData * ControlFile
Definition: xlog.c:708
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 11003 of file xlog.c.

References XLogCtlInsert::CurrBytePos, Insert(), XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, SpinLockAcquire, SpinLockRelease, and XLogBytePosToRecPtr().

Referenced by GetSnapshotData(), logical_begin_heap_rewrite(), pg_current_wal_insert_location(), and ReplicationSlotReserveWal().

11004 {
11006  uint64 current_bytepos;
11007 
11008  SpinLockAcquire(&Insert->insertpos_lck);
11009  current_bytepos = Insert->CurrBytePos;
11010  SpinLockRelease(&Insert->insertpos_lck);
11011 
11012  return XLogBytePosToRecPtr(current_bytepos);
11013 }
slock_t insertpos_lck
Definition: xlog.c:510
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1904
XLogCtlInsert Insert
Definition: xlog.c:569
#define SpinLockAcquire(lock)
Definition: spin.h:62
uint64 CurrBytePos
Definition: xlog.c:519
static void Insert(File file)
Definition: fd.c:1007
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:700
void GetXLogReceiptTime ( TimestampTz rtime,
bool fromStream 
)

Definition at line 6050 of file xlog.c.

References Assert, InRecovery, XLOG_FROM_STREAM, XLogReceiptSource, and XLogReceiptTime.

Referenced by GetStandbyLimitTime().

6051 {
6052  /*
6053  * This must be executed in the startup process, since we don't export the
6054  * relevant state to shared memory.
6055  */
6056  Assert(InRecovery);
6057 
6058  *rtime = XLogReceiptTime;
6059  *fromStream = (XLogReceiptSource == XLOG_FROM_STREAM);
6060 }
static XLogSource XLogReceiptSource
Definition: xlog.c:805
bool InRecovery
Definition: xlog.c:191
static TimestampTz XLogReceiptTime
Definition: xlog.c:804
#define Assert(condition)
Definition: c.h:671
XLogRecPtr GetXLogReplayRecPtr ( TimeLineID replayTLI)

Definition at line 10984 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::lastReplayedEndRecPtr, XLogCtlData::lastReplayedTLI, SpinLockAcquire, and SpinLockRelease.

Referenced by CheckpointerMain(), CreateRestartPoint(), GetReplicationApplyDelay(), GetStandbyFlushRecPtr(), IsCheckpointOnSchedule(), pg_last_wal_replay_location(), pg_logical_slot_get_changes_guts(), read_local_xlog_page(), WalReceiverMain(), WalSndWaitForWal(), and XLogWalRcvSendReply().

10985 {
10986  XLogRecPtr recptr;
10987  TimeLineID tli;
10988 
10990  recptr = XLogCtl->lastReplayedEndRecPtr;
10991  tli = XLogCtl->lastReplayedTLI;
10993 
10994  if (replayTLI)
10995  *replayTLI = tli;
10996  return recptr;
10997 }
uint32 TimeLineID
Definition: xlogdefs.h:45
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static XLogCtlData * XLogCtl
Definition: xlog.c:700
TimeLineID lastReplayedTLI
Definition: xlog.c:677
XLogRecPtr lastReplayedEndRecPtr
Definition: xlog.c:676
XLogRecPtr GetXLogWriteRecPtr ( void  )

Definition at line 11019 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::LogwrtResult, SpinLockAcquire, SpinLockRelease, and XLogwrtResult::Write.

Referenced by pg_current_wal_location().

11020 {
11024 
11025  return LogwrtResult.Write;
11026 }
static XLogwrtResult LogwrtResult
Definition: xlog.c:738
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
XLogwrtResult LogwrtResult
Definition: xlog.c:594
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:700
XLogRecPtr Write
Definition: xlog.c:422
bool HotStandbyActive ( void  )

Definition at line 7861 of file xlog.c.

References XLogCtlData::info_lck, LocalHotStandbyActive, XLogCtlData::SharedHotStandbyActive, SpinLockAcquire, and SpinLockRelease.

Referenced by XLogWalRcvSendHSFeedback().

7862 {
7863  /*
7864  * We check shared state each time only until Hot Standby is active. We
7865  * can't de-activate Hot Standby, so there's no need to keep checking
7866  * after the shared variable has once been seen true.
7867  */
7869  return true;
7870  else
7871  {
7872  /* spinlock is essential on machines with weak memory ordering! */
7876 
7877  return LocalHotStandbyActive;
7878  }
7879 }
bool SharedHotStandbyActive
Definition: xlog.c:642
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
static bool LocalHotStandbyActive
Definition: xlog.c:220
#define SpinLockRelease(lock)
Definition: spin.h:64
static XLogCtlData * XLogCtl
Definition: xlog.c:700
bool HotStandbyActiveInReplay ( void  )

Definition at line 7886 of file xlog.c.

References AmStartupProcess, Assert, IsPostmasterEnvironment, and LocalHotStandbyActive.

Referenced by btree_xlog_vacuum().

7887 {
7889  return LocalHotStandbyActive;
7890 }
#define AmStartupProcess()
Definition: miscadmin.h:403
bool IsPostmasterEnvironment
Definition: globals.c:99
static bool LocalHotStandbyActive
Definition: xlog.c:220
#define Assert(condition)
Definition: c.h:671
void InitXLOGAccess ( void  )

Definition at line 8072 of file xlog.c.

References Assert, doPageWrites, XLogCtlInsert::forcePageWrites, XLogCtlInsert::fullPageWrites, GetRedoRecPtr(), InitXLogInsert(), Insert(), XLogCtlData::Insert, IsBootstrapProcessingMode, ThisTimeLineID, and XLogCtlData::ThisTimeLineID.

Referenced by AuxiliaryProcessMain(), LocalSetXLogInsertAllowed(), and RecoveryInProgress().

8073 {
8075 
8076  /* ThisTimeLineID doesn't change so we need no lock to copy it */
8079 
8080  /* Use GetRedoRecPtr to copy the RedoRecPtr safely */
8081  (void) GetRedoRecPtr();
8082  /* Also update our copy of doPageWrites. */
8083  doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
8084 
8085  /* Also initialize the working areas for constructing WAL records */
8086  InitXLogInsert();
8087 }
void InitXLogInsert(void)
Definition: xloginsert.c:1029
TimeLineID ThisTimeLineID
Definition: xlog.c:623
XLogCtlInsert Insert
Definition: xlog.c:569
bool fullPageWrites
Definition: xlog.c:543
static bool doPageWrites
Definition: xlog.c:356
bool forcePageWrites
Definition: xlog.c:542
static void Insert(File file)
Definition: fd.c:1007
TimeLineID ThisTimeLineID
Definition: xlog.c:178
#define Assert(condition)
Definition: c.h:671
static XLogCtlData * XLogCtl
Definition: xlog.c:700
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:8095
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:365
void issue_xlog_fsync ( int  fd,
XLogSegNo  segno 
)

Definition at line 9997 of file xlog.c.

References elog, ereport, errcode_for_file_access(), errmsg(), PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), sync_method, SYNC_METHOD_FDATASYNC, SYNC_METHOD_FSYNC, SYNC_METHOD_FSYNC_WRITETHROUGH, SYNC_METHOD_OPEN, SYNC_METHOD_OPEN_DSYNC, ThisTimeLineID, and XLogFileNameP().

Referenced by XLogWalRcvFlush(), and XLogWrite().

9998 {
9999  switch (sync_method)
10000  {
10001  case SYNC_METHOD_FSYNC:
10002  if (pg_fsync_no_writethrough(fd) != 0)
10003  ereport(PANIC,
10005  errmsg("could not fsync log file %s: %m",
10006  XLogFileNameP(ThisTimeLineID, segno))));
10007  break;
10008 #ifdef HAVE_FSYNC_WRITETHROUGH
10010  if (pg_fsync_writethrough(fd) != 0)
10011  ereport(PANIC,
10013  errmsg("could not fsync write-through log file %s: %m",
10014  XLogFileNameP(ThisTimeLineID, segno))));
10015  break;
10016 #endif
10017 #ifdef HAVE_FDATASYNC
10018  case SYNC_METHOD_FDATASYNC:
10019  if (pg_fdatasync(fd) != 0)
10020  ereport(PANIC,
10022  errmsg("could not fdatasync log file %s: %m",
10023  XLogFileNameP(ThisTimeLineID, segno))));
10024  break;
10025 #endif
10026  case SYNC_METHOD_OPEN:
10028  /* write synced it already */
10029  break;
10030  default:
10031  elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
10032  break;
10033  }
10034 }
int pg_fdatasync(int fd)
Definition: fd.c:385
#define SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:28
int pg_fsync_writethrough(int fd)
Definition: fd.c:362
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:350
#define PANIC
Definition: elog.h:53
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:29
char * XLogFileNameP(TimeLineID tli, XLogSegNo segno)
Definition: xlog.c:10040
int errcode_for_file_access(void)
Definition: elog.c:598
#define SYNC_METHOD_FSYNC
Definition: xlog.h:25
#define ereport(elevel, rest)
Definition: elog.h:122
#define SYNC_METHOD_OPEN
Definition: xlog.h:27
TimeLineID ThisTimeLineID
Definition: xlog.c:178
int sync_method
Definition: xlog.c:102
#define SYNC_METHOD_FDATASYNC
Definition: xlog.h:26
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219
bool RecoveryInProgress ( void  )

Definition at line 7805 of file xlog.c.

References InitXLOGAccess(), LocalRecoveryInProgress, pg_memory_barrier, XLogCtlData::SharedRecoveryInProgress, and XLogCtl.

Referenced by BackgroundWriterMain(), check_transaction_read_only(), check_XactIsoLevel(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_start_backup(), do_pg_stop_backup(), error_commit_ts_disabled(), get_relation_info(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetOldestXmin(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), gin_clean_pending_list(), heap_page_prune_opt(), IdentifySystem(), InitPostgres(), InitTempTableNamespace(), IsCheckpointOnSchedule(), LockAcquireExtended(), MarkBufferDirtyHint(), OldSerXidSetActiveSerXmin(), perform_base_backup(), pg_create_restore_point(), pg_current_wal_flush_location(), pg_current_wal_insert_location(), pg_current_wal_location(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_logical_slot_get_changes_guts(), pg_switch_wal(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PreventCommandDuringRecovery(), ProcSendSignal(), ProcSleep(), read_local_xlog_page(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), sendDir(), ShutdownXLOG(), standard_ProcessUtility(), StartLogicalReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), WalReceiverMain(), WalSndShutdown(), WalSndWaitForWal(), XLogBackgroundFlush(), XLogInsertAllowed(), XLogNeedsFlush(), XlogReadTwoPhaseData(), and XLogSendPhysical().

7806 {
7807  /*
7808  * We check shared state each time only until we leave recovery mode. We
7809  * can't re-enter recovery, so there's no need to keep checking after the
7810  * shared variable has once been seen false.
7811  */
7813  return false;
7814  else
7815  {
7816  /*
7817  * use volatile pointer to make sure we make a fresh read of the
7818  * shared variable.
7819  */
7820  volatile XLogCtlData *xlogctl = XLogCtl;
7821 
7823 
7824  /*
7825  * Initialize TimeLineID and RedoRecPtr when we discover that recovery
7826  * is finished. InitPostgres() relies upon this behaviour to ensure
7827  * that InitXLOGAccess() is called at backend startup. (If you change
7828  * this, see also LocalSetXLogInsertAllowed.)
7829  */
7831  {
7832  /*
7833  * If we just exited recovery, make sure we read TimeLineID and
7834  * RedoRecPtr after SharedRecoveryInProgress (for machines with
7835  * weak memory ordering).
7836  */
7838  InitXLOGAccess();
7839  }
7840 
7841  /*
7842  * Note: We don't need a memory barrier when we're still in recovery.
7843  * We might exit recovery immediately after return, so the caller
7844  * can't rely on 'true' meaning that we're still in recovery anyway.
7845  */
7846 
7847  return LocalRecoveryInProgress;
7848  }
7849 }
void InitXLOGAccess(void)
Definition: xlog.c:8072
bool SharedRecoveryInProgress
Definition: xlog.c:636
#define pg_memory_barrier()
Definition: atomics.h:147
static XLogCtlData * XLogCtl
Definition: xlog.c:700
static bool LocalRecoveryInProgress
Definition: xlog.c:214
bool RecoveryIsPaused ( void  )

Definition at line 5873 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::recoveryPause, SpinLockAcquire, and SpinLockRelease.

Referenced by pg_is_wal_replay_paused(), and recoveryPausesHere().

5874 {
5875  bool recoveryPause;
5876 
5878  recoveryPause = XLogCtl->recoveryPause;
5880 
5881  return recoveryPause;
5882 }
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
bool recoveryPause
Definition: xlog.c:689
static XLogCtlData * XLogCtl
Definition: xlog.c:700
void RemovePromoteSignalFiles ( void  )

Definition at line 11948 of file xlog.c.

References FALLBACK_PROMOTE_SIGNAL_FILE, PROMOTE_SIGNAL_FILE, and unlink().

Referenced by PostmasterMain().

11949 {
11952 }
#define FALLBACK_PROMOTE_SIGNAL_FILE
Definition: xlog.c:84
#define PROMOTE_SIGNAL_FILE
Definition: xlog.c:83
int unlink(const char *filename)
void SetRecoveryPause ( bool  recoveryPause)

Definition at line 5885 of file xlog.c.

References XLogCtlData::info_lck, XLogCtlData::recoveryPause, SpinLockAcquire, and SpinLockRelease.

Referenced by pg_wal_replay_pause(), pg_wal_replay_resume(), and StartupXLOG().

5886 {
5888  XLogCtl->recoveryPause = recoveryPause;
5890 }
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
bool recoveryPause
Definition: xlog.c:689
static XLogCtlData * XLogCtl
Definition: xlog.c:700
void SetWalWriterSleeping ( bool  sleeping)

Definition at line 11984 of file xlog.c.

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, and XLogCtlData::WalWriterSleeping.

Referenced by WalWriterMain().

11985 {
11987  XLogCtl->WalWriterSleeping = sleeping;
11989 }
slock_t info_lck
Definition: xlog.c:697
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define SpinLockRelease(lock)
Definition: spin.h:64
bool WalWriterSleeping
Definition: xlog.c:649
static XLogCtlData * XLogCtl
Definition: xlog.c:700
void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 8253 of file xlog.c.

References CHECKPOINT_IMMEDIATE, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), ShutdownCLOG(), ShutdownCommitTs(), ShutdownMultiXact(), ShutdownSUBTRANS(), XLogArchiveCommandSet, and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

8254 {
8255  /* Don't be chatty in standalone mode */
8257  (errmsg("shutting down")));
8258 
8259  if (RecoveryInProgress())
8261  else
8262  {
8263  /*
8264  * If archiving is enabled, rotate the last XLOG file so that all the
8265  * remaining records are archived (postmaster wakes up the archiver
8266  * process one more time at the end of shutdown). The checkpoint
8267  * record will go to the next XLOG file and won't be archived (yet).
8268  */
8270  RequestXLogSwitch(false);
8271 
8273  }
8274  ShutdownCLOG();
8275  ShutdownCommitTs();
8276  ShutdownSUBTRANS();
8278 }
bool IsPostmasterEnvironment
Definition: globals.c:99
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:9322
void ShutdownSUBTRANS(void)
Definition: subtrans.c:271
void CreateCheckPoint(int flags)
Definition: xlog.c:8449
void ShutdownCLOG(void)
Definition: clog.c:574
bool CreateRestartPoint(int flags)
Definition: xlog.c:8991
#define XLogArchiveCommandSet()
Definition: xlog.h:139
#define LOG
Definition: elog.h:26
bool RecoveryInProgress(void)
Definition: xlog.c:7805
void ShutdownMultiXact(void)
Definition: multixact.c:2105
#define ereport(elevel, rest)
Definition: elog.h:122
#define NOTICE
Definition: elog.h:37
void ShutdownCommitTs(void)
Definition: commit_ts.c:745
#define XLogArchivingActive()
Definition: xlog.h:134
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define CHECKPOINT_IMMEDIATE
Definition: xlog.h:179
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:175
void StartupXLOG ( void  )

Definition at line 6132 of file xlog.c.

References AllowCascadeReplication, appendStringInfo(), appendStringInfoString(), archiveCleanupCommand, XLogCtlData::archiveCleanupCommand, ArchiveRecoveryRequested, ErrorContextCallback::arg, Assert, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, bgwriterLaunched, buf, ErrorContextCallback::callback, ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IMMEDIATE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, CheckRecoveryConsistency(), CheckRequiredParameterValues(), checkTimeLineSwitch(), checkXLogConsistency(), XLogCtlData::ckptXid, XLogCtlData::ckptXidEpoch, close, CompleteCommitTsInitialization(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), XLogCtlInsert::CurrBytePos, XLogCtlData::currentChunkStartTime, StringInfoData::data, DataDir, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DEBUG2, DEBUG3, DeleteAllExportedSnapshotFiles(), DisownLatch(), doPageWrites, doRequestWalReceiverReply, durable_rename(), elog, EnableHotStandby, EndRecPtr, ereport, errcode(), errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_internal(), ERROR, error_context_stack, ExecuteRecoveryCommand(), exitArchiveRecovery(), fast_promote, FATAL, findNewestTimeLine(), XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, XLogCtlInsert::fullPageWrites, GetCurrentTimestamp(), GetLatestXTime(), HandleStartupProcInterrupts(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), initStringInfo(), InRecovery, InRedo, Insert(), XLogCtlData::Insert, InvalidXLogRecPtr, IsPostmasterEnvironment, IsUnderPostmaster, lastFullPageWrites, LastRec, XLogCtlData::lastReplayedEndRecPtr, XLogCtlData::lastReplayedTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, RunningTransactionsData::latestCompletedXid, VariableCacheData::latestCompletedXid, lfirst, LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, LOG, XLogCtlData::LogwrtResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), master_image_masked, MAXFNAMELEN, MAXPGPATH, MemSet, ControlFileData::minRecoveryPoint, minRecoveryPoint, ControlFileData::minRecoveryPointTLI, minRecoveryPointTLI, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, CheckPoint::nextOid, VariableCacheData::nextOid, CheckPoint::nextXid, RunningTransactionsData::nextXid, VariableCacheData::nextXid, CheckPoint::nextXidEpoch, NIL, NOTICE, NULL, tablespaceinfo::oid, VariableCacheData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, OwnLatch(), XLogCtlData::pages, palloc(), PANIC, tablespaceinfo::path, pfree(), pg_usleep(), pgstat_reset_all(), PMSIGNAL_RECOVERY_STARTED, PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlInsert::PrevBytePos, ControlFileData::prevCheckPoint, ErrorContextCallback::previous, CheckPoint::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, XLogCtlData::PrevTimeLineID, proc_exit(), ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), psprintf(), PublishStartupProcessInformation(), reachedConsistency, read_backup_label(), read_tablespace_map(), XLogReaderState::readBuf, ReadCheckpointRecord(), ReadControlFile(), readFile, readOff, XLogReaderState::readPageTLI, ReadRecord(), readRecoveryCommandFile(), ReadRecPtr, RecordKnownAssignedTransactionIds(), RecoverPreparedTransactions(), RECOVERY_TARGET_ACTION_PAUSE, RECOVERY_TARGET_ACTION_PROMOTE, RECOVERY_TARGET_ACTION_SHUTDOWN, RECOVERY_TARGET_IMMEDIATE, RECOVERY_TARGET_LSN, RECOVERY_TARGET_NAME, RECOVERY_TARGET_TIME, RECOVERY_TARGET_XID, recoveryApplyDelay(), recoveryEndCommand, XLogCtlData::recoveryLastXTime, XLogCtlData::recoveryPause, recoveryPausesHere(), recoveryStopAfter, recoveryStopLSN, recoveryStopName, recoveryStopsAfter(), recoveryStopsBefore(), recoveryStopTime, recoveryStopXid, recoveryTarget, recoveryTargetAction, recoveryTargetLSN, recoveryTargetName, recoveryTargetTime, recoveryTargetTLI, recoveryTargetXid, XLogCtlData::recoveryWakeupLatch, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RedoStartLSN, RelationCacheInitFileRemove(), remove_tablespace_symlink(), RemoveNonParentXlogFiles(), replay_image_masked, XLogCtlData::replayEndRecPtr, XLogCtlData::replayEndTLI, RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), RmgrData::rm_cleanup, RM_MAX_ID, RmgrData::rm_redo, rm_redo_error_callback(), RmgrData::rm_startup, RmgrTable, SendPostmasterSignal(), SetCommitTsLimit(), SetForwardFsyncRequests(), SetMultiXactIdLimit(), SetRecoveryPause(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryInProgress, ShmemVariableCache, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRcv(), snprintf(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_INITIALIZED, StandbyMode, StandbyModeRequested, StandbyRecoverPreparedTransactions(), standbyState, StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), strlcpy(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_overflow, SyncDataDirectory(), XLogReaderState::system_identifier, ControlFileData::system_identifier, TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, ThisTimeLineID, xl_end_of_recovery::ThisTimeLineID, XLogCtlData::ThisTimeLineID, CheckPoint::time, ControlFileData::time, timestamptz_to_str(), tliOfPointInHistory(), tliSwitchPoint(), trace_recovery_messages, track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdAdvance, TransactionIdFollowsOrEquals(), TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TrimCLOG(), TrimMultiXact(), unlink(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, ControlFileData::unloggedLSN, XLogCtlData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WalRcvForceReply(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, RunningTransactionsData::xids, XLogRecord::xl_info, XLogRecord::xl_rmid, XLogRecord::xl_xid, XLogCtlData::xlblocks, XLByteToPrevSeg, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, xlog_outdesc(), XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName, XLogFilePath, XLogPageRead(), XLogReaderAllocate(), XLogReaderFree(), XLogReceiptTime, XLogRecGetData, XLogRecPtrIsInvalid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), XLogSegSize, XLR_CHECK_CONSISTENCY, XLR_INFO_MASK, and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

6133 {
6135  CheckPoint checkPoint;
6136  bool wasShutdown;
6137  bool reachedStopPoint = false;
6138  bool haveBackupLabel = false;
6139  bool haveTblspcMap = false;
6140  XLogRecPtr RecPtr,
6141  checkPointLoc,
6142  EndOfLog;
6143  TimeLineID EndOfLogTLI;
6144  TimeLineID PrevTimeLineID;
6145  XLogRecord *record;
6146  TransactionId oldestActiveXID;
6147  bool backupEndRequired = false;
6148  bool backupFromStandby = false;
6149  DBState dbstate_at_startup;
6150  XLogReaderState *xlogreader;
6151  XLogPageReadPrivate private;
6152  bool fast_promoted = false;
6153  struct stat st;
6154 
6155  /*
6156  * Read control file and check XLOG status looks valid.
6157  *
6158  * Note: in most control paths, *ControlFile is already valid and we need
6159  * not do ReadControlFile() here, but might as well do it to be sure.
6160  */
6161  ReadControlFile();
6162 
6163  if (ControlFile->state < DB_SHUTDOWNED ||
6166  ereport(FATAL,
6167  (errmsg("control file contains invalid data")));
6168 
6170  {
6171  /* This is the expected case, so don't be chatty in standalone mode */
6173  (errmsg("database system was shut down at %s",
6174  str_time(ControlFile->time))));
6175  }
6177  ereport(LOG,
6178  (errmsg("database system was shut down in recovery at %s",
6179  str_time(ControlFile->time))));
6180  else if (ControlFile->state == DB_SHUTDOWNING)
6181  ereport(LOG,
6182  (errmsg("database system shutdown was interrupted; last known up at %s",
6183  str_time(ControlFile->time))));
6184  else if (ControlFile->state == DB_IN_CRASH_RECOVERY)
6185  ereport(LOG,
6186  (errmsg("database system was interrupted while in recovery at %s",
6188  errhint("This probably means that some data is corrupted and"
6189  " you will have to use the last backup for recovery.")));
6191  ereport(LOG,
6192  (errmsg("database system was interrupted while in recovery at log time %s",
6194  errhint("If this has occurred more than once some data might be corrupted"
6195  " and you might need to choose an earlier recovery target.")));
6196  else if (ControlFile->state == DB_IN_PRODUCTION)
6197  ereport(LOG,
6198  (errmsg("database system was interrupted; last known up at %s",
6199  str_time(ControlFile->time))));
6200 
6201  /* This is just to allow attaching to startup process with a debugger */
6202 #ifdef XLOG_REPLAY_DELAY
6204  pg_usleep(60000000L);
6205 #endif
6206 
6207  /*
6208  * Verify that pg_wal and pg_wal/archive_status exist. In cases where
6209  * someone has performed a copy for PITR, these directories may have been
6210  * excluded and need to be re-created.
6211  */
6213 
6214  /*
6215  * If we previously crashed, there might be data which we had written,
6216  * intending to fsync it, but which we had not actually fsync'd yet.
6217  * Therefore, a power failure in the near future might cause earlier
6218  * unflushed writes to be lost, even though more recent data written to
6219  * disk from here on would be persisted. To avoid that, fsync the entire
6220  * data directory.
6221  */
6222  if (ControlFile->state != DB_SHUTDOWNED &&
6225 
6226  /*
6227  * Initialize on the assumption we want to recover to the latest timeline
6228  * that's active according to pg_control.
6229  */
6233  else
6235 
6236  /*
6237  * Check for recovery control file, and if so set up state for offline
6238  * recovery
6239  */
6241 
6242  /*
6243  * Save archive_cleanup_command in shared memory so that other processes
6244  * can see it.
6245  */
6248  sizeof(XLogCtl->archiveCleanupCommand));
6249 
6251  {
6253  ereport(LOG,
6254  (errmsg("entering standby mode")));
6255  else if (recoveryTarget == RECOVERY_TARGET_XID)
6256  ereport(LOG,
6257  (errmsg("starting point-in-time recovery to XID %u",
6258  recoveryTargetXid)));
6260  ereport(LOG,
6261  (errmsg("starting point-in-time recovery to %s",
6264  ereport(LOG,
6265  (errmsg("starting point-in-time recovery to \"%s\"",
6266  recoveryTargetName)));
6267  else if (recoveryTarget == RECOVERY_TARGET_LSN)
6268  ereport(LOG,
6269  (errmsg("starting point-in-time recovery to WAL position (LSN) \"%X/%X\"",
6270  (uint32) (recoveryTargetLSN >> 32),
6273  ereport(LOG,
6274  (errmsg("starting point-in-time recovery to earliest consistent point")));
6275  else
6276  ereport(LOG,
6277  (errmsg("starting archive recovery")));
6278  }
6279 
6280  /*
6281  * Take ownership of the wakeup latch if we're going to sleep during
6282  * recovery.
6283  */
6286 
6287  /* Set up XLOG reader facility */
6288  MemSet(&private, 0, sizeof(XLogPageReadPrivate));
6289  xlogreader = XLogReaderAllocate(&XLogPageRead, &private);
6290  if (!xlogreader)
6291  ereport(ERROR,
6292  (errcode(ERRCODE_OUT_OF_MEMORY),
6293  errmsg("out of memory"),
6294  errdetail("Failed while allocating an XLog reading processor.")));
6296 
6297  /*
6298  * Allocate pages dedicated to WAL consistency checks, those had better
6299  * be aligned.
6300  */
6301  replay_image_masked = (char *) palloc(BLCKSZ);
6302  master_image_masked = (char *) palloc(BLCKSZ);
6303 
6304  if (read_backup_label(&checkPointLoc, &backupEndRequired,
6305  &backupFromStandby))
6306  {
6307  List *tablespaces = NIL;
6308 
6309  /*
6310  * Archive recovery was requested, and thanks to the backup label
6311  * file, we know how far we need to replay to reach consistency. Enter
6312  * archive recovery directly.
6313  */
6314  InArchiveRecovery = true;
6316  StandbyMode = true;
6317 
6318  /*
6319  * When a backup_label file is present, we want to roll forward from
6320  * the checkpoint it identifies, rather than using pg_control.
6321  */
6322  record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
6323  if (record != NULL)
6324  {
6325  memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
6326  wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
6327  ereport(DEBUG1,
6328  (errmsg("checkpoint record is at %X/%X",
6329  (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
6330  InRecovery = true; /* force recovery even if SHUTDOWNED */
6331 
6332  /*
6333  * Make sure that REDO location exists. This may not be the case
6334  * if there was a crash during an online backup, which left a
6335  * backup_label around that references a WAL segment that's
6336  * already been archived.
6337  */
6338  if (checkPoint.redo < checkPointLoc)
6339  {
6340  if (!ReadRecord(xlogreader, checkPoint.redo, LOG, false))
6341  ereport(FATAL,
6342  (errmsg("could not find redo location referenced by checkpoint record"),
6343  errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
6344  }
6345  }
6346  else
6347  {
6348  ereport(FATAL,
6349  (errmsg("could not locate required checkpoint record"),
6350  errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));
6351  wasShutdown = false; /* keep compiler quiet */
6352  }
6353 
6354  /* read the tablespace_map file if present and create symlinks. */
6355  if (read_tablespace_map(&tablespaces))
6356  {
6357  ListCell *lc;
6358 
6359  foreach(lc, tablespaces)
6360  {
6361  tablespaceinfo *ti = lfirst(lc);
6362  char *linkloc;
6363 
6364  linkloc = psprintf("pg_tblspc/%s", ti->oid);
6365 
6366  /*
6367  * Remove the existing symlink if any and Create the symlink
6368  * under PGDATA.
6369  */
6370  remove_tablespace_symlink(linkloc);
6371 
6372  if (symlink(ti->path, linkloc) < 0)
6373  ereport(ERROR,
6375  errmsg("could not create symbolic link \"%s\": %m",
6376  linkloc)));
6377 
6378  pfree(ti->oid);
6379  pfree(ti->path);
6380  pfree(ti);
6381  }
6382 
6383  /* set flag to delete it later */
6384  haveTblspcMap = true;
6385  }
6386 
6387  /* set flag to delete it later */
6388  haveBackupLabel = true;
6389  }
6390  else
6391  {
6392  /*
6393  * If tablespace_map file is present without backup_label file, there
6394  * is no use of such file. There is no harm in retaining it, but it
6395  * is better to get rid of the map file so that we don't have any
6396  * redundant file in data directory and it will avoid any sort of
6397  * confusion. It seems prudent though to just rename the file out of
6398  * the way rather than delete it completely, also we ignore any error
6399  * that occurs in rename operation as even if map file is present
6400  * without backup_label file, it is harmless.
6401  */
6402  if (stat(TABLESPACE_MAP, &st) == 0)
6403  {
6406  ereport(LOG,
6407  (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
6409  errdetail("File \"%s\" was renamed to \"%s\".",
6411  else
6412  ereport(LOG,
6413  (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
6415  errdetail("Could not rename file \"%s\" to \"%s\": %m.",
6417  }
6418 
6419  /*
6420  * It's possible that archive recovery was requested, but we don't
6421  * know how far we need to replay the WAL before we reach consistency.
6422  * This can happen for example if a base backup is taken from a
6423  * running server using an atomic filesystem snapshot, without calling
6424  * pg_start/stop_backup. Or if you just kill a running master server
6425  * and put it into archive recovery by creating a recovery.conf file.
6426  *
6427  * Our strategy in that case is to perform crash recovery first,
6428  * replaying all the WAL present in pg_wal, and only enter archive
6429  * recovery after that.
6430  *
6431  * But usually we already know how far we need to replay the WAL (up
6432  * to minRecoveryPoint, up to backupEndPoint, or until we see an
6433  * end-of-backup record), and we can enter archive recovery directly.
6434  */
6440  {
6441  InArchiveRecovery = true;
6443  StandbyMode = true;
6444  }
6445 
6446  /*
6447  * Get the last valid checkpoint record. If the latest one according
6448  * to pg_control is broken, try the next-to-last one.
6449  */
6450  checkPointLoc = ControlFile->checkPoint;
6452  record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, true);
6453  if (record != NULL)
6454  {
6455  ereport(DEBUG1,
6456  (errmsg("checkpoint record is at %X/%X",
6457  (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
6458  }
6459  else if (StandbyMode)
6460  {
6461  /*
6462  * The last valid checkpoint record required for a streaming
6463  * recovery exists in neither standby nor the primary.
6464  */
6465  ereport(PANIC,
6466  (errmsg("could not locate a valid checkpoint record")));
6467  }
6468  else
6469  {
6470  checkPointLoc = ControlFile->prevCheckPoint;
6471  record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true);
6472  if (record != NULL)
6473  {
6474  ereport(LOG,
6475  (errmsg("using previous checkpoint record at %X/%X",
6476  (uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
6477  InRecovery = true; /* force recovery even if SHUTDOWNED */
6478  }
6479  else
6480  ereport(PANIC,
6481  (errmsg("could not locate a valid checkpoint record")));
6482  }
6483  memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
6484  wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
6485  }
6486 
6487  /*
6488  * Clear out any old relcache cache files. This is *necessary* if we do
6489  * any WAL replay, since that would probably result in the cache files
6490  * being out of sync with database reality. In theory we could leave them
6491  * in place if the database had been cleanly shut down, but it seems
6492  * safest to just remove them always and let them be rebuilt during the
6493  * first backend startup. These files needs to be removed from all
6494  * directories including pg_tblspc, however the symlinks are created only
6495  * after reading tablespace_map file in case of archive recovery from
6496  * backup, so needs to clear old relcache files here after creating
6497  * symlinks.
6498  */
6500 
6501  /*
6502  * If the location of the checkpoint record is not on the expected
6503  * timeline in the history of the requested timeline, we cannot proceed:
6504  * the backup is not part of the history of the requested timeline.
6505  */
6506  Assert(expectedTLEs); /* was initialized by reading checkpoint
6507  * record */
6508  if (tliOfPointInHistory(checkPointLoc, expectedTLEs) !=
6509  checkPoint.ThisTimeLineID)
6510  {
6511  XLogRecPtr switchpoint;
6512 
6513  /*
6514  * tliSwitchPoint will throw an error if the checkpoint's timeline is
6515  * not in expectedTLEs at all.
6516  */
6518  ereport(FATAL,
6519  (errmsg("requested timeline %u is not a child of this server's history",
6521  errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.",
6522  (uint32) (ControlFile->checkPoint >> 32),
6525  (uint32) (switchpoint >> 32),
6526  (uint32) switchpoint)));
6527  }
6528 
6529  /*
6530  * The min recovery point should be part of the requested timeline's
6531  * history, too.
6532  */
6536  ereport(FATAL,
6537  (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
6539  (uint32) (ControlFile->minRecoveryPoint >> 32),
6542 
6543  LastRec = RecPtr = checkPointLoc;
6544 
6545  ereport(DEBUG1,
6546  (errmsg_internal("redo record is at %X/%X; shutdown %s",
6547  (uint32) (checkPoint.redo >> 32), (uint32) checkPoint.redo,
6548  wasShutdown ? "TRUE" : "FALSE")));
6549  ereport(DEBUG1,
6550  (errmsg_internal("next transaction ID: %u:%u; next OID: %u",
6551  checkPoint.nextXidEpoch, checkPoint.nextXid,
6552  checkPoint.nextOid)));
6553  ereport(DEBUG1,
6554  (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
6555  checkPoint.nextMulti, checkPoint.nextMultiOffset)));
6556  ereport(DEBUG1,
6557  (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
6558  checkPoint.oldestXid, checkPoint.oldestXidDB)));
6559  ereport(DEBUG1,
6560  (errmsg_internal("oldest MultiXactId: %u, in database %u",
6561  checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
6562  ereport(DEBUG1,
6563  (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u",
6564  checkPoint.oldestCommitTsXid,
6565  checkPoint.newestCommitTsXid)));
6566  if (!TransactionIdIsNormal(checkPoint.nextXid))
6567  ereport(PANIC,
6568  (errmsg("invalid next transaction ID")));
6569 
6570  /* initialize shared memory variables from the checkpoint record */
6571  ShmemVariableCache->nextXid = checkPoint.nextXid;
6572  ShmemVariableCache->nextOid = checkPoint.nextOid;
6574  MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
6575  SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
6576  SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
6578  checkPoint.newestCommitTsXid);
6579  XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
6580  XLogCtl->ckptXid = checkPoint.nextXid;
6581 
6582  /*
6583  * Initialize replication slots, before there's a chance to remove
6584  * required resources.
6585  */
6587 
6588  /*
6589  * Startup logical state, needs to be setup now so we have proper data
6590  * during crash recovery.
6591  */
6593 
6594  /*
6595  * Startup MultiXact. We need to do this early to be able to replay
6596  * truncations.
6597  */
6598  StartupMultiXact();
6599 
6600  /*
6601  * Ditto commit timestamps. In a standby, we do it if setting is enabled
6602  * in ControlFile; in a master we base the decision on the GUC itself.
6603  */
6606  StartupCommitTs();
6607 
6608  /*
6609  * Recover knowledge about replay progress of known replication partners.
6610  */
6612 
6613  /*
6614  * Initialize unlogged LSN. On a clean shutdown, it's restored from the
6615  * control file. On recovery, all unlogged relations are blown away, so
6616  * the unlogged LSN counter can be reset too.
6617  */
6620  else
6621  XLogCtl->unloggedLSN = 1;
6622 
6623  /*
6624  * We must replay WAL entries using the same TimeLineID they were created
6625  * under, so temporarily adopt the TLI indicated by the checkpoint (see
6626  * also xlog_redo()).
6627  */
6628  ThisTimeLineID = checkPoint.ThisTimeLineID;
6629 
6630  /*
6631  * Copy any missing timeline history files between 'now' and the recovery
6632  * target timeline from archive to pg_wal. While we don't need those
6633  * files ourselves - the history file of the recovery target timeline
6634  * covers all the previous timelines in the history too - a cascading
6635  * standby server might be interested in them. Or, if you archive the WAL
6636  * from this server to a different archive than the master, it'd be good
6637  * for all the history files to get archived there after failover, so that
6638  * you can use one of the old timelines as a PITR target. Timeline history
6639  * files are small, so it's better to copy them unnecessarily than not
6640  * copy them and regret later.
6641  */
6643 
6644  lastFullPageWrites = checkPoint.fullPageWrites;
6645 
6648 
6649  if (RecPtr < checkPoint.redo)
6650  ereport(PANIC,
6651  (errmsg("invalid redo in checkpoint record")));
6652 
6653  /*
6654  * Check whether we need to force recovery from WAL. If it appears to
6655  * have been a clean shutdown and we did not have a recovery.conf file,
6656  * then assume no recovery needed.
6657  */
6658  if (checkPoint.redo < RecPtr)
6659  {
6660  if (wasShutdown)
6661  ereport(PANIC,
6662  (errmsg("invalid redo record in shutdown checkpoint")));
6663  InRecovery = true;
6664  }
6665  else if (ControlFile->state != DB_SHUTDOWNED)
6666  InRecovery = true;
6667  else if (ArchiveRecoveryRequested)
6668  {
6669  /* force recovery due to presence of recovery.conf */
6670  InRecovery = true;
6671  }
6672 
6673  /* REDO */
6674  if (InRecovery)
6675  {
6676  int rmid;
6677 
6678  /*
6679  * Update pg_control to show that we are recovering and to show the
6680  * selected checkpoint as the place we are starting from. We also mark
6681  * pg_control with any minimum recovery stop point obtained from a
6682  * backup history file.
6683  */
6684  dbstate_at_startup = ControlFile->state;
6685  if (InArchiveRecovery)
6687  else
6688  {
6689  ereport(LOG,
6690  (errmsg("database system was not properly shut down; "
6691  "automatic recovery in progress")));
6693  ereport(LOG,
6694  (errmsg("crash recovery starts in timeline %u "
6695  "and has target timeline %u",
6697  recoveryTargetTLI)));
6699  }
6701  ControlFile->checkPoint = checkPointLoc;
6702  ControlFile->checkPointCopy = checkPoint;
6703  if (InArchiveRecovery)
6704  {
6705  /* initialize minRecoveryPoint if not set yet */
6706  if (ControlFile->minRecoveryPoint < checkPoint.redo)
6707  {
6708  ControlFile->minRecoveryPoint = checkPoint.redo;
6710  }
6711  }
6712 
6713  /*
6714  * Set backupStartPoint if we're starting recovery from a base backup.
6715  *
6716  * Also set backupEndPoint and use minRecoveryPoint as the backup end
6717  * location if we're starting recovery from a base backup which was
6718  * taken from a standby. In this case, the database system status in
6719  * pg_control must indicate that the database was already in recovery.
6720  * Usually that will be DB_IN_ARCHIVE_RECOVERY but also can be
6721  * DB_SHUTDOWNED_IN_RECOVERY if recovery previously was interrupted
6722  * before reaching this point; e.g. because restore_command or
6723  * primary_conninfo were faulty.
6724  *
6725  * Any other state indicates that the backup somehow became corrupted
6726  * and we can't sensibly continue with recovery.
6727  */
6728  if (haveBackupLabel)
6729  {
6730  ControlFile->backupStartPoint = checkPoint.redo;
6731  ControlFile->backupEndRequired = backupEndRequired;
6732 
6733  if (backupFromStandby)
6734  {
6735  if (dbstate_at_startup != DB_IN_ARCHIVE_RECOVERY &&
6736  dbstate_at_startup != DB_SHUTDOWNED_IN_RECOVERY)
6737  ereport(FATAL,
6738  (errmsg("backup_label contains data inconsistent with control file"),
6739  errhint("This means that the backup is corrupted and you will "
6740  "have to use another backup for recovery.")));
6742  }
6743  }
6744  ControlFile->time = (pg_time_t) time(NULL);
6745  /* No need to hold ControlFileLock yet, we aren't up far enough */
6747 
6748  /* initialize our local copy of minRecoveryPoint */
6751 
6752  /*
6753  * Reset pgstat data, because it may be invalid after recovery.
6754  */
6755  pgstat_reset_all();
6756 
6757  /*
6758  * If there was a backup label file, it's done its job and the info
6759  * has now been propagated into pg_control. We must get rid of the
6760  * label file so that if we crash during recovery, we'll pick up at
6761  * the latest recovery restartpoint instead of going all the way back
6762  * to the backup start point. It seems prudent though to just rename
6763  * the file out of the way rather than delete it completely.
6764  */
6765  if (haveBackupLabel)
6766  {
6769  }
6770 
6771  /*
6772  * If there was a tablespace_map file, it's done its job and the
6773  * symlinks have been created. We must get rid of the map file so
6774  * that if we crash during recovery, we don't create symlinks again.
6775  * It seems prudent though to just rename the file out of the way
6776  * rather than delete it completely.
6777  */
6778  if (haveTblspcMap)
6779  {
6782  }
6783 
6784  /* Check that the GUCs used to generate the WAL allow recovery */
6786 
6787  /*
6788  * We're in recovery, so unlogged relations may be trashed and must be
6789  * reset. This should be done BEFORE allowing Hot Standby
6790  * connections, so that read-only backends don't try to read whatever
6791  * garbage is left over from before.
6792  */
6794 
6795  /*
6796  * Likewise, delete any saved transaction snapshot files that got left
6797  * behind by crashed backends.
6798  */
6800 
6801  /*
6802  * Initialize for Hot Standby, if enabled. We won't let backends in
6803  * yet, not until we've reached the min recovery point specified in
6804  * control file and we've established a recovery snapshot from a
6805  * running-xacts WAL record.
6806  */
6808  {
6809  TransactionId *xids;
6810  int nxids;
6811 
6812  ereport(DEBUG1,
6813  (errmsg("initializing for hot standby")));
6814 
6816 
6817  if (wasShutdown)
6818  oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
6819  else
6820  oldestActiveXID = checkPoint.oldestActiveXid;
6821  Assert(TransactionIdIsValid(oldestActiveXID));
6822 
6823  /* Tell procarray about the range of xids it has to deal with */
6825 
6826  /*
6827  * Startup commit log and subtrans only. MultiXact and commit
6828  * timestamp have already been started up and other SLRUs are not
6829  * maintained during recovery and need not be started yet.
6830  */
6831  StartupCLOG();
6832  StartupSUBTRANS(oldestActiveXID);
6833 
6834  /*
6835  * If we're beginning at a shutdown checkpoint, we know that
6836  * nothing was running on the master at this point. So fake-up an
6837  * empty running-xacts record and use that here and now. Recover
6838  * additional standby state for prepared transactions.
6839  */
6840  if (wasShutdown)
6841  {
6842  RunningTransactionsData running;
6843  TransactionId latestCompletedXid;
6844 
6845  /*
6846  * Construct a RunningTransactions snapshot representing a
6847  * shut down server, with only prepared transactions still
6848  * alive. We're never overflowed at this point because all
6849  * subxids are listed with their parent prepared transactions.
6850  */
6851  running.xcnt = nxids;
6852  running.subxcnt = 0;
6853  running.subxid_overflow = false;
6854  running.nextXid = checkPoint.nextXid;
6855  running.oldestRunningXid = oldestActiveXID;
6856  latestCompletedXid = checkPoint.nextXid;
6857  TransactionIdRetreat(latestCompletedXid);
6858  Assert(TransactionIdIsNormal(latestCompletedXid));
6859  running.latestCompletedXid = latestCompletedXid;
6860  running.xids = xids;
6861 
6862  ProcArrayApplyRecoveryInfo(&running);
6863 
6865  }
6866  }
6867 
6868  /* Initialize resource managers */
6869  for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
6870  {
6871  if (RmgrTable[rmid].rm_startup != NULL)
6872  RmgrTable[rmid].rm_startup();
6873  }
6874 
6875  /*
6876  * Initialize shared variables for tracking progress of WAL replay, as
6877  * if we had just replayed the record before the REDO location (or the
6878  * checkpoint record itself, if it's a shutdown checkpoint).
6879  */
6881  if (checkPoint.redo < RecPtr)
6882  XLogCtl->replayEndRecPtr = checkPoint.redo;
6883  else
6890  XLogCtl->recoveryPause = false;
6892 
6893  /* Also ensure XLogReceiptTime has a sane value */
6895 
6896  /*
6897  * Let postmaster know we've started redo now, so that it can launch
6898  * checkpointer to perform restartpoints. We don't bother during
6899  * crash recovery as restartpoints can only be performed during
6900  * archive recovery. And we'd like to keep crash recovery simple, to
6901  * avoid introducing bugs that could affect you when recovering after
6902  * crash.
6903  *
6904  * After this point, we can no longer assume that we're the only
6905  * process in addition to postmaster! Also, fsync requests are
6906