PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
xlog.h
Go to the documentation of this file.
1 /*
2  * xlog.h
3  *
4  * PostgreSQL transaction log manager
5  *
6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/include/access/xlog.h
10  */
11 #ifndef XLOG_H
12 #define XLOG_H
13 
14 #include "access/rmgr.h"
15 #include "access/xlogdefs.h"
16 #include "datatype/timestamp.h"
17 #include "lib/stringinfo.h"
18 #include "storage/block.h"
19 #include "storage/buf.h"
20 #include "storage/relfilenode.h"
21 #include "utils/pg_crc.h"
22 
23 /*
24  * The overall layout of an XLOG record is:
25  * Fixed-size header (XLogRecord struct)
26  * rmgr-specific data
27  * BkpBlock
28  * backup block data
29  * BkpBlock
30  * backup block data
31  * ...
32  *
33  * where there can be zero to four backup blocks (as signaled by xl_info flag
34  * bits). XLogRecord structs always start on MAXALIGN boundaries in the WAL
35  * files, and we round up SizeOfXLogRecord so that the rmgr data is also
36  * guaranteed to begin on a MAXALIGN boundary. However, no padding is added
37  * to align BkpBlock structs or backup block data.
38  *
39  * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
40  * and also not any backup blocks. xl_tot_len counts everything. Neither
41  * length field is rounded up to an alignment boundary.
42  */
43 typedef struct XLogRecord
44 {
45  uint32 xl_tot_len; /* total len of entire record */
46  TransactionId xl_xid; /* xact id */
47  uint32 xl_len; /* total len of rmgr data */
48  uint8 xl_info; /* flag bits, see below */
49  RmgrId xl_rmid; /* resource manager for this record */
50  /* 2 bytes of padding here, initialize to zero */
51  XLogRecPtr xl_prev; /* ptr to previous record in log */
52  pg_crc32 xl_crc; /* CRC for this record */
53 
54  /* If MAXALIGN==8, there are 4 wasted bytes here */
55 
56  /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
57 
58 } XLogRecord;
59 
60 #define SizeOfXLogRecord MAXALIGN(sizeof(XLogRecord))
61 
62 #define XLogRecGetData(record) ((char*) (record) + SizeOfXLogRecord)
63 
64 /*
65  * XLOG uses only low 4 bits of xl_info. High 4 bits may be used by rmgr.
66  */
67 #define XLR_INFO_MASK 0x0F
68 
69 /*
70  * If we backed up any disk blocks with the XLOG record, we use flag bits in
71  * xl_info to signal it. We support backup of up to 4 disk blocks per XLOG
72  * record.
73  */
74 #define XLR_BKP_BLOCK_MASK 0x0F /* all info bits used for bkp blocks */
75 #define XLR_MAX_BKP_BLOCKS 4
76 #define XLR_BKP_BLOCK(iblk) (0x08 >> (iblk)) /* iblk in 0..3 */
77 
78 /* Sync methods */
79 #define SYNC_METHOD_FSYNC 0
80 #define SYNC_METHOD_FDATASYNC 1
81 #define SYNC_METHOD_OPEN 2 /* for O_SYNC */
82 #define SYNC_METHOD_FSYNC_WRITETHROUGH 3
83 #define SYNC_METHOD_OPEN_DSYNC 4 /* for O_DSYNC */
84 extern int sync_method;
85 
86 /*
87  * The rmgr data to be written by XLogInsert() is defined by a chain of
88  * one or more XLogRecData structs. (Multiple structs would be used when
89  * parts of the source data aren't physically adjacent in memory, or when
90  * multiple associated buffers need to be specified.)
91  *
92  * If buffer is valid then XLOG will check if buffer must be backed up
93  * (ie, whether this is first change of that page since last checkpoint).
94  * If so, the whole page contents are attached to the XLOG record, and XLOG
95  * sets XLR_BKP_BLOCK(N) bit in xl_info. Note that the buffer must be pinned
96  * and exclusive-locked by the caller, so that it won't change under us.
97  * NB: when the buffer is backed up, we DO NOT insert the data pointed to by
98  * this XLogRecData struct into the XLOG record, since we assume it's present
99  * in the buffer. Therefore, rmgr redo routines MUST pay attention to
100  * XLR_BKP_BLOCK(N) to know what is actually stored in the XLOG record.
101  * The N'th XLR_BKP_BLOCK bit corresponds to the N'th distinct buffer
102  * value (ignoring InvalidBuffer) appearing in the rdata chain.
103  *
104  * When buffer is valid, caller must set buffer_std to indicate whether the
105  * page uses standard pd_lower/pd_upper header fields. If this is true, then
106  * XLOG is allowed to omit the free space between pd_lower and pd_upper from
107  * the backed-up page image. Note that even when buffer_std is false, the
108  * page MUST have an LSN field as its first eight bytes!
109  *
110  * Note: data can be NULL to indicate no rmgr data associated with this chain
111  * entry. This can be sensible (ie, not a wasted entry) if buffer is valid.
112  * The implication is that the buffer has been changed by the operation being
113  * logged, and so may need to be backed up, but the change can be redone using
114  * only information already present elsewhere in the XLOG entry.
115  */
116 typedef struct XLogRecData
117 {
118  char *data; /* start of rmgr data to include */
119  uint32 len; /* length of rmgr data to include */
120  Buffer buffer; /* buffer associated with data, if any */
121  bool buffer_std; /* buffer has standard pd_lower/pd_upper */
122  struct XLogRecData *next; /* next struct in chain, or NULL */
123 } XLogRecData;
124 
125 extern PGDLLIMPORT TimeLineID ThisTimeLineID; /* current TLI */
126 
127 /*
128  * Prior to 8.4, all activity during recovery was carried out by the startup
129  * process. This local variable continues to be used in many parts of the
130  * code to indicate actions taken by RecoveryManagers. Other processes that
131  * potentially perform work during recovery should check RecoveryInProgress().
132  * See XLogCtl notes in xlog.c.
133  */
134 extern bool InRecovery;
135 
136 /*
137  * Like InRecovery, standbyState is only valid in the startup process.
138  * In all other processes it will have the value STANDBY_DISABLED (so
139  * InHotStandby will read as FALSE).
140  *
141  * In DISABLED state, we're performing crash recovery or hot standby was
142  * disabled in postgresql.conf.
143  *
144  * In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but
145  * we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record
146  * to initialize our master-transaction tracking system.
147  *
148  * When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING
149  * state. The tracked information might still be incomplete, so we can't allow
150  * connections yet, but redo functions must update the in-memory state when
151  * appropriate.
152  *
153  * In SNAPSHOT_READY mode, we have full knowledge of transactions that are
154  * (or were) running in the master at the current WAL location. Snapshots
155  * can be taken, and read-only queries can be run.
156  */
157 typedef enum
158 {
164 
166 
167 #define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING)
168 
169 /*
170  * Recovery target type.
171  * Only set during a Point in Time recovery, not when standby_mode = on
172  */
173 typedef enum
174 {
181 
183 
184 extern bool reachedConsistency;
185 
186 /* these variables are GUC parameters related to XLOG */
187 extern int CheckPointSegments;
188 extern int wal_keep_segments;
189 extern int XLOGbuffers;
190 extern int XLogArchiveTimeout;
191 extern bool XLogArchiveMode;
192 extern char *XLogArchiveCommand;
193 extern bool EnableHotStandby;
194 extern bool fullPageWrites;
195 extern bool wal_log_hints;
196 extern bool log_checkpoints;
197 
198 /* WAL levels */
199 typedef enum WalLevel
200 {
205 } WalLevel;
206 extern int wal_level;
207 
208 #define XLogArchivingActive() (XLogArchiveMode && wal_level >= WAL_LEVEL_ARCHIVE)
209 #define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
210 
211 /*
212  * Is WAL-logging necessary for archival or log-shipping, or can we skip
213  * WAL-logging if we fsync() the data before committing instead?
214  */
215 #define XLogIsNeeded() (wal_level >= WAL_LEVEL_ARCHIVE)
216 
217 /*
218  * Is a full-page image needed for hint bit updates?
219  *
220  * Normally, we don't WAL-log hint bit updates, but if checksums are enabled,
221  * we have to protect them against torn page writes. When you only set
222  * individual bits on a page, it's still consistent no matter what combination
223  * of the bits make it to disk, but the checksum wouldn't match. Also WAL-log
224  * them if forced by wal_log_hints=on.
225  */
226 #define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints)
227 
228 /* Do we need to WAL-log information required only for Hot Standby and logical replication? */
229 #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_HOT_STANDBY)
230 
231 /* Do we need to WAL-log information required only for logical replication? */
232 #define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL)
233 
234 #ifdef WAL_DEBUG
235 extern bool XLOG_DEBUG;
236 #endif
237 
238 /*
239  * OR-able request flag bits for checkpoints. The "cause" bits are used only
240  * for logging purposes. Note: the flags must be defined so that it's
241  * sensible to OR together request flags arising from different requestors.
242  */
243 
244 /* These directly affect the behavior of CreateCheckPoint and subsidiaries */
245 #define CHECKPOINT_IS_SHUTDOWN 0x0001 /* Checkpoint is for shutdown */
246 #define CHECKPOINT_END_OF_RECOVERY 0x0002 /* Like shutdown checkpoint,
247  * but issued at end of WAL
248  * recovery */
249 #define CHECKPOINT_IMMEDIATE 0x0004 /* Do it without delays */
250 #define CHECKPOINT_FORCE 0x0008 /* Force even if no activity */
251 /* These are important to RequestCheckpoint */
252 #define CHECKPOINT_WAIT 0x0010 /* Wait for completion */
253 /* These indicate the cause of a checkpoint request */
254 #define CHECKPOINT_CAUSE_XLOG 0x0020 /* XLOG consumption */
255 #define CHECKPOINT_CAUSE_TIME 0x0040 /* Elapsed time */
257 /* Checkpoint statistics */
258 typedef struct CheckpointStatsData
259 {
260  TimestampTz ckpt_start_t; /* start of checkpoint */
261  TimestampTz ckpt_write_t; /* start of flushing buffers */
262  TimestampTz ckpt_sync_t; /* start of fsyncs */
263  TimestampTz ckpt_sync_end_t; /* end of fsyncs */
264  TimestampTz ckpt_end_t; /* end of checkpoint */
265 
266  int ckpt_bufs_written; /* # of buffers written */
268  int ckpt_segs_added; /* # of new xlog segments created */
269  int ckpt_segs_removed; /* # of xlog segments deleted */
270  int ckpt_segs_recycled; /* # of xlog segments recycled */
272  int ckpt_sync_rels; /* # of relations synced */
273  uint64 ckpt_longest_sync; /* Longest sync for one relation */
274  uint64 ckpt_agg_sync_time; /* The sum of all the individual sync
275  * times, which is not necessarily the
276  * same as the total elapsed time for
277  * the entire sync phase. */
279 
281 
282 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
283 extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
284 extern void XLogFlush(XLogRecPtr RecPtr);
285 extern bool XLogBackgroundFlush(void);
286 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
287 extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
288 extern int XLogFileOpen(XLogSegNo segno);
289 
290 extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
291  BlockNumber blk, char *page, bool page_std);
292 extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
293 extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
294 
295 extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
297 extern void XLogSetAsyncXactLSN(XLogRecPtr record);
299 
300 extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
301  int block_index,
302  bool get_cleanup_lock, bool keep_buffer);
303 
304 extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
305 extern void xlog_desc(StringInfo buf, XLogRecord *record);
306 extern const char *xlog_identify(uint8 info);
307 
308 extern void issue_xlog_fsync(int fd, XLogSegNo segno);
309 
310 extern bool RecoveryInProgress(void);
311 extern bool HotStandbyActive(void);
312 extern bool HotStandbyActiveInReplay(void);
313 extern bool XLogInsertAllowed(void);
314 extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
315 extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
316 extern XLogRecPtr GetXLogInsertRecPtr(void);
317 extern XLogRecPtr GetXLogWriteRecPtr(void);
318 extern bool RecoveryIsPaused(void);
319 extern void SetRecoveryPause(bool recoveryPause);
320 extern TimestampTz GetLatestXTime(void);
322 extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
323 
324 extern void UpdateControlFile(void);
325 extern uint64 GetSystemIdentifier(void);
326 extern bool DataChecksumsEnabled(void);
328 extern Size XLOGShmemSize(void);
329 extern void XLOGShmemInit(void);
330 extern void BootStrapXLOG(void);
331 extern void StartupXLOG(void);
332 extern void ShutdownXLOG(int code, Datum arg);
333 extern void InitXLOGAccess(void);
334 extern void CreateCheckPoint(int flags);
335 extern bool CreateRestartPoint(int flags);
336 extern void XLogPutNextOid(Oid nextOid);
337 extern XLogRecPtr XLogRestorePoint(const char *rpName);
338 extern void UpdateFullPageWrites(void);
339 extern XLogRecPtr GetRedoRecPtr(void);
340 extern XLogRecPtr GetInsertRecPtr(void);
341 extern XLogRecPtr GetFlushRecPtr(void);
342 extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
343 
344 extern bool CheckPromoteSignal(void);
345 extern void WakeupRecovery(void);
346 extern void SetWalWriterSleeping(bool sleeping);
347 
348 /*
349  * Starting/stopping a base backup
350  */
351 extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
352  TimeLineID *starttli_p, char **labelfile);
353 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
354  TimeLineID *stoptli_p);
355 extern void do_pg_abort_backup(void);
357 /* File path names (all relative to $PGDATA) */
358 #define BACKUP_LABEL_FILE "backup_label"
359 #define BACKUP_LABEL_OLD "backup_label.old"
360 
361 #endif /* XLOG_H */