PostgreSQL Source Code  git master
xlogutils.c File Reference
#include "postgres.h"
#include <unistd.h>
#include "access/timeline.h"
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/smgr.h"
#include "utils/guc.h"
#include "utils/hsearch.h"
#include "utils/rel.h"
Include dependency graph for xlogutils.c:

Go to the source code of this file.

Data Structures

struct  xl_invalid_page_key
 
struct  xl_invalid_page
 
struct  FakeRelCacheEntryData
 

Typedefs

typedef struct xl_invalid_page_key xl_invalid_page_key
 
typedef struct xl_invalid_page xl_invalid_page
 
typedef FakeRelCacheEntryDataFakeRelCacheEntry
 

Functions

static void report_invalid_page (int elevel, RelFileNode node, ForkNumber forkno, BlockNumber blkno, bool present)
 
static void log_invalid_page (RelFileNode node, ForkNumber forkno, BlockNumber blkno, bool present)
 
static void forget_invalid_pages (RelFileNode node, ForkNumber forkno, BlockNumber minblkno)
 
static void forget_invalid_pages_db (Oid dbid)
 
bool XLogHaveInvalidPages (void)
 
void XLogCheckInvalidPages (void)
 
XLogRedoAction XLogReadBufferForRedo (XLogReaderState *record, uint8 block_id, Buffer *buf)
 
Buffer XLogInitBufferForRedo (XLogReaderState *record, uint8 block_id)
 
XLogRedoAction XLogReadBufferForRedoExtended (XLogReaderState *record, uint8 block_id, ReadBufferMode mode, bool get_cleanup_lock, Buffer *buf)
 
Buffer XLogReadBufferExtended (RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode)
 
Relation CreateFakeRelcacheEntry (RelFileNode rnode)
 
void FreeFakeRelcacheEntry (Relation fakerel)
 
void XLogDropRelation (RelFileNode rnode, ForkNumber forknum)
 
void XLogDropDatabase (Oid dbid)
 
void XLogTruncateRelation (RelFileNode rnode, ForkNumber forkNum, BlockNumber nblocks)
 
void XLogReadDetermineTimeline (XLogReaderState *state, XLogRecPtr wantPage, uint32 wantLength, TimeLineID currTLI)
 
void wal_segment_open (XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p)
 
void wal_segment_close (XLogReaderState *state)
 
int read_local_xlog_page (XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
 
void WALReadRaiseError (WALReadError *errinfo)
 

Variables

bool ignore_invalid_pages = false
 
bool InRecovery = false
 
HotStandbyState standbyState = STANDBY_DISABLED
 
static HTABinvalid_page_tab = NULL
 

Typedef Documentation

◆ FakeRelCacheEntry

Definition at line 557 of file xlogutils.c.

◆ xl_invalid_page

◆ xl_invalid_page_key

Function Documentation

◆ CreateFakeRelcacheEntry()

Relation CreateFakeRelcacheEntry ( RelFileNode  rnode)

Definition at line 574 of file xlogutils.c.

575 {
576  FakeRelCacheEntry fakeentry;
577  Relation rel;
578 
579  /* Allocate the Relation struct and all related space in one block. */
580  fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
581  rel = (Relation) fakeentry;
582 
583  rel->rd_rel = &fakeentry->pgc;
584  rel->rd_node = rnode;
585 
586  /*
587  * We will never be working with temp rels during recovery or while
588  * syncing WAL-skipped files.
589  */
591 
592  /* It must be a permanent table here */
593  rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
594 
595  /* We don't know the name of the relation; use relfilenode instead */
596  sprintf(RelationGetRelationName(rel), "%u", rnode.relNode);
597 
598  /*
599  * We set up the lockRelId in case anything tries to lock the dummy
600  * relation. Note that this is fairly bogus since relNode may be
601  * different from the relation's OID. It shouldn't really matter though.
602  * In recovery, we are running by ourselves and can't have any lock
603  * conflicts. While syncing, we already hold AccessExclusiveLock.
604  */
605  rel->rd_lockInfo.lockRelId.dbId = rnode.dbNode;
606  rel->rd_lockInfo.lockRelId.relId = rnode.relNode;
607 
608  rel->rd_smgr = NULL;
609 
610  return rel;
611 }
#define InvalidBackendId
Definition: backendid.h:23
void * palloc0(Size size)
Definition: mcxt.c:1093
#define sprintf
Definition: port.h:227
#define RelationGetRelationName(relation)
Definition: rel.h:512
struct RelationData * Relation
Definition: relcache.h:26
FormData_pg_class pgc
Definition: xlogutils.c:554
LockRelId lockRelId
Definition: rel.h:45
Oid relId
Definition: rel.h:39
Oid dbId
Definition: rel.h:40
LockInfoData rd_lockInfo
Definition: rel.h:112
RelFileNode rd_node
Definition: rel.h:56
BackendId rd_backend
Definition: rel.h:59
SMgrRelation rd_smgr
Definition: rel.h:57
Form_pg_class rd_rel
Definition: rel.h:109

References LockRelId::dbId, RelFileNode::dbNode, InvalidBackendId, LockInfoData::lockRelId, palloc0(), FakeRelCacheEntryData::pgc, RelationData::rd_backend, RelationData::rd_lockInfo, RelationData::rd_node, RelationData::rd_rel, RelationData::rd_smgr, RelationGetRelationName, LockRelId::relId, RelFileNode::relNode, and sprintf.

Referenced by heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), smgr_redo(), and smgrDoPendingSyncs().

◆ forget_invalid_pages()

static void forget_invalid_pages ( RelFileNode  node,
ForkNumber  forkno,
BlockNumber  minblkno 
)
static

Definition at line 165 of file xlogutils.c.

166 {
168  xl_invalid_page *hentry;
169 
170  if (invalid_page_tab == NULL)
171  return; /* nothing to do */
172 
174 
175  while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
176  {
177  if (RelFileNodeEquals(hentry->key.node, node) &&
178  hentry->key.forkno == forkno &&
179  hentry->key.blkno >= minblkno)
180  {
182  {
183  char *path = relpathperm(hentry->key.node, forkno);
184 
185  elog(DEBUG2, "page %u of relation %s has been dropped",
186  hentry->key.blkno, path);
187  pfree(path);
188  }
189 
191  (void *) &hentry->key,
192  HASH_REMOVE, NULL) == NULL)
193  elog(ERROR, "hash table corrupted");
194  }
195  }
196 }
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
bool message_level_is_interesting(int elevel)
Definition: elog.c:265
#define DEBUG2
Definition: elog.h:23
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
@ HASH_REMOVE
Definition: hsearch.h:115
void pfree(void *pointer)
Definition: mcxt.c:1169
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:229
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
#define relpathperm(rnode, forknum)
Definition: relpath.h:83
RelFileNode node
Definition: xlogutils.c:69
BlockNumber blkno
Definition: xlogutils.c:71
ForkNumber forkno
Definition: xlogutils.c:70
xl_invalid_page_key key
Definition: xlogutils.c:76
static HTAB * invalid_page_tab
Definition: xlogutils.c:80

References xl_invalid_page_key::blkno, DEBUG2, elog, ERROR, xl_invalid_page_key::forkno, HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), invalid_page_tab, xl_invalid_page::key, message_level_is_interesting(), xl_invalid_page_key::node, pfree(), RelFileNodeEquals, relpathperm, and status().

Referenced by XLogDropRelation(), and XLogTruncateRelation().

◆ forget_invalid_pages_db()

static void forget_invalid_pages_db ( Oid  dbid)
static

Definition at line 200 of file xlogutils.c.

201 {
203  xl_invalid_page *hentry;
204 
205  if (invalid_page_tab == NULL)
206  return; /* nothing to do */
207 
209 
210  while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
211  {
212  if (hentry->key.node.dbNode == dbid)
213  {
215  {
216  char *path = relpathperm(hentry->key.node, hentry->key.forkno);
217 
218  elog(DEBUG2, "page %u of relation %s has been dropped",
219  hentry->key.blkno, path);
220  pfree(path);
221  }
222 
224  (void *) &hentry->key,
225  HASH_REMOVE, NULL) == NULL)
226  elog(ERROR, "hash table corrupted");
227  }
228  }
229 }

References xl_invalid_page_key::blkno, RelFileNode::dbNode, DEBUG2, elog, ERROR, xl_invalid_page_key::forkno, HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), invalid_page_tab, xl_invalid_page::key, message_level_is_interesting(), xl_invalid_page_key::node, pfree(), relpathperm, and status().

Referenced by XLogDropDatabase().

◆ FreeFakeRelcacheEntry()

void FreeFakeRelcacheEntry ( Relation  fakerel)

Definition at line 617 of file xlogutils.c.

618 {
619  /* make sure the fakerel is not referenced by the SmgrRelation anymore */
620  if (fakerel->rd_smgr != NULL)
621  smgrclearowner(&fakerel->rd_smgr, fakerel->rd_smgr);
622  pfree(fakerel);
623 }
void smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
Definition: smgr.c:227

References pfree(), RelationData::rd_smgr, and smgrclearowner().

Referenced by heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), smgr_redo(), and smgrDoPendingSyncs().

◆ log_invalid_page()

static void log_invalid_page ( RelFileNode  node,
ForkNumber  forkno,
BlockNumber  blkno,
bool  present 
)
static

Definition at line 101 of file xlogutils.c.

103 {
105  xl_invalid_page *hentry;
106  bool found;
107 
108  /*
109  * Once recovery has reached a consistent state, the invalid-page table
110  * should be empty and remain so. If a reference to an invalid page is
111  * found after consistency is reached, PANIC immediately. This might seem
112  * aggressive, but it's better than letting the invalid reference linger
113  * in the hash table until the end of recovery and PANIC there, which
114  * might come only much later if this is a standby server.
115  */
116  if (reachedConsistency)
117  {
118  report_invalid_page(WARNING, node, forkno, blkno, present);
120  "WAL contains references to invalid pages");
121  }
122 
123  /*
124  * Log references to invalid pages at DEBUG1 level. This allows some
125  * tracing of the cause (note the elog context mechanism will tell us
126  * something about the XLOG record that generated the reference).
127  */
129  report_invalid_page(DEBUG1, node, forkno, blkno, present);
130 
131  if (invalid_page_tab == NULL)
132  {
133  /* create hash table when first needed */
134  HASHCTL ctl;
135 
136  ctl.keysize = sizeof(xl_invalid_page_key);
137  ctl.entrysize = sizeof(xl_invalid_page);
138 
139  invalid_page_tab = hash_create("XLOG invalid-page table",
140  100,
141  &ctl,
143  }
144 
145  /* we currently assume xl_invalid_page_key contains no padding */
146  key.node = node;
147  key.forkno = forkno;
148  key.blkno = blkno;
149  hentry = (xl_invalid_page *)
150  hash_search(invalid_page_tab, (void *) &key, HASH_ENTER, &found);
151 
152  if (!found)
153  {
154  /* hash_search already filled in the key */
155  hentry->present = present;
156  }
157  else
158  {
159  /* repeat reference ... leave "present" as it was */
160  }
161 }
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:349
#define WARNING
Definition: elog.h:30
#define PANIC
Definition: elog.h:36
#define DEBUG1
Definition: elog.h:24
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
bool reachedConsistency
Definition: xlog.c:881
bool ignore_invalid_pages
Definition: xlogutils.c:36
struct xl_invalid_page xl_invalid_page
struct xl_invalid_page_key xl_invalid_page_key
static void report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno, BlockNumber blkno, bool present)
Definition: xlogutils.c:85

References DEBUG1, elog, HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), ignore_invalid_pages, invalid_page_tab, sort-test::key, HASHCTL::keysize, message_level_is_interesting(), PANIC, xl_invalid_page::present, reachedConsistency, report_invalid_page(), and WARNING.

Referenced by XLogReadBufferExtended().

◆ read_local_xlog_page()

int read_local_xlog_page ( XLogReaderState state,
XLogRecPtr  targetPagePtr,
int  reqLen,
XLogRecPtr  targetRecPtr,
char *  cur_page 
)

Definition at line 852 of file xlogutils.c.

854 {
855  XLogRecPtr read_upto,
856  loc;
857  TimeLineID tli;
858  int count;
859  WALReadError errinfo;
860  TimeLineID currTLI;
861 
862  loc = targetPagePtr + reqLen;
863 
864  /* Loop waiting for xlog to be available if necessary */
865  while (1)
866  {
867  /*
868  * Determine the limit of xlog we can currently read to, and what the
869  * most recent timeline is.
870  */
871  if (!RecoveryInProgress())
872  read_upto = GetFlushRecPtr(&currTLI);
873  else
874  read_upto = GetXLogReplayRecPtr(&currTLI);
875  tli = currTLI;
876 
877  /*
878  * Check which timeline to get the record from.
879  *
880  * We have to do it each time through the loop because if we're in
881  * recovery as a cascading standby, the current timeline might've
882  * become historical. We can't rely on RecoveryInProgress() because in
883  * a standby configuration like
884  *
885  * A => B => C
886  *
887  * if we're a logical decoding session on C, and B gets promoted, our
888  * timeline will change while we remain in recovery.
889  *
890  * We can't just keep reading from the old timeline as the last WAL
891  * archive in the timeline will get renamed to .partial by
892  * StartupXLOG().
893  *
894  * If that happens after our caller determined the TLI but before
895  * we actually read the xlog page, we might still try to read from the
896  * old (now renamed) segment and fail. There's not much we can do
897  * about this, but it can only happen when we're a leaf of a cascading
898  * standby whose primary gets promoted while we're decoding, so a
899  * one-off ERROR isn't too bad.
900  */
901  XLogReadDetermineTimeline(state, targetPagePtr, reqLen, tli);
902 
903  if (state->currTLI == currTLI)
904  {
905 
906  if (loc <= read_upto)
907  break;
908 
910  pg_usleep(1000L);
911  }
912  else
913  {
914  /*
915  * We're on a historical timeline, so limit reading to the switch
916  * point where we moved to the next timeline.
917  *
918  * We don't need to GetFlushRecPtr or GetXLogReplayRecPtr. We know
919  * about the new timeline, so we must've received past the end of
920  * it.
921  */
922  read_upto = state->currTLIValidUntil;
923 
924  /*
925  * Setting tli to our wanted record's TLI is slightly wrong; the
926  * page might begin on an older timeline if it contains a timeline
927  * switch, since its xlog segment will have been copied from the
928  * prior timeline. This is pretty harmless though, as nothing
929  * cares so long as the timeline doesn't go backwards. We should
930  * read the page header instead; FIXME someday.
931  */
932  tli = state->currTLI;
933 
934  /* No need to wait on a historical timeline */
935  break;
936  }
937  }
938 
939  if (targetPagePtr + XLOG_BLCKSZ <= read_upto)
940  {
941  /*
942  * more than one block available; read only that block, have caller
943  * come back if they need more.
944  */
945  count = XLOG_BLCKSZ;
946  }
947  else if (targetPagePtr + reqLen > read_upto)
948  {
949  /* not enough data there */
950  return -1;
951  }
952  else
953  {
954  /* enough bytes available to satisfy the request */
955  count = read_upto - targetPagePtr;
956  }
957 
958  /*
959  * Even though we just determined how much of the page can be validly read
960  * as 'count', read the whole page anyway. It's guaranteed to be
961  * zero-padded up to the page boundary if it's incomplete.
962  */
963  if (!WALRead(state, cur_page, targetPagePtr, XLOG_BLCKSZ, tli,
964  &errinfo))
965  WALReadRaiseError(&errinfo);
966 
967  /* number of valid bytes in the buffer */
968  return count;
969 }
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
void pg_usleep(long microsec)
Definition: signal.c:53
Definition: regguts.h:318
bool RecoveryInProgress(void)
Definition: xlog.c:8404
XLogRecPtr GetFlushRecPtr(TimeLineID *insertTLI)
Definition: xlog.c:8713
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
Definition: xlog.c:11966
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59
bool WALRead(XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli, WALReadError *errinfo)
Definition: xlogreader.c:1100
void XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wantLength, TimeLineID currTLI)
Definition: xlogutils.c:709
void WALReadRaiseError(WALReadError *errinfo)
Definition: xlogutils.c:976

References CHECK_FOR_INTERRUPTS, GetFlushRecPtr(), GetXLogReplayRecPtr(), pg_usleep(), RecoveryInProgress(), WALRead(), WALReadRaiseError(), and XLogReadDetermineTimeline().

Referenced by create_logical_replication_slot(), pg_logical_replication_slot_advance(), pg_logical_slot_get_changes_guts(), and XlogReadTwoPhaseData().

◆ report_invalid_page()

static void report_invalid_page ( int  elevel,
RelFileNode  node,
ForkNumber  forkno,
BlockNumber  blkno,
bool  present 
)
static

Definition at line 85 of file xlogutils.c.

87 {
88  char *path = relpathperm(node, forkno);
89 
90  if (present)
91  elog(elevel, "page %u of relation %s is uninitialized",
92  blkno, path);
93  else
94  elog(elevel, "page %u of relation %s does not exist",
95  blkno, path);
96  pfree(path);
97 }

References elog, pfree(), and relpathperm.

Referenced by log_invalid_page(), and XLogCheckInvalidPages().

◆ wal_segment_close()

void wal_segment_close ( XLogReaderState state)

Definition at line 833 of file xlogutils.c.

834 {
835  close(state->seg.ws_file);
836  /* need to check errno? */
837  state->seg.ws_file = -1;
838 }
#define close(a)
Definition: win32.h:12

References close.

Referenced by create_logical_replication_slot(), CreateReplicationSlot(), pg_logical_replication_slot_advance(), pg_logical_slot_get_changes_guts(), StartLogicalReplication(), StartReplication(), StartupXLOG(), WalSndErrorCleanup(), XlogReadTwoPhaseData(), and XLogSendPhysical().

◆ wal_segment_open()

void wal_segment_open ( XLogReaderState state,
XLogSegNo  nextSegNo,
TimeLineID tli_p 
)

Definition at line 808 of file xlogutils.c.

810 {
811  TimeLineID tli = *tli_p;
812  char path[MAXPGPATH];
813 
814  XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
815  state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
816  if (state->seg.ws_file >= 0)
817  return;
818 
819  if (errno == ENOENT)
820  ereport(ERROR,
822  errmsg("requested WAL segment %s has already been removed",
823  path)));
824  else
825  ereport(ERROR,
827  errmsg("could not open file \"%s\": %m",
828  path)));
829 }
#define PG_BINARY
Definition: c.h:1268
int errcode_for_file_access(void)
Definition: elog.c:716
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ereport(elevel,...)
Definition: elog.h:143
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1071
#define MAXPGPATH
#define XLogFilePath(path, tli, logSegNo, wal_segsz_bytes)

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, MAXPGPATH, PG_BINARY, and XLogFilePath.

Referenced by create_logical_replication_slot(), pg_logical_replication_slot_advance(), pg_logical_slot_get_changes_guts(), and XlogReadTwoPhaseData().

◆ WALReadRaiseError()

void WALReadRaiseError ( WALReadError errinfo)

Definition at line 976 of file xlogutils.c.

977 {
978  WALOpenSegment *seg = &errinfo->wre_seg;
979  char fname[MAXFNAMELEN];
980 
981  XLogFileName(fname, seg->ws_tli, seg->ws_segno, wal_segment_size);
982 
983  if (errinfo->wre_read < 0)
984  {
985  errno = errinfo->wre_errno;
986  ereport(ERROR,
988  errmsg("could not read from log segment %s, offset %d: %m",
989  fname, errinfo->wre_off)));
990  }
991  else if (errinfo->wre_read == 0)
992  {
993  ereport(ERROR,
995  errmsg("could not read from log segment %s, offset %d: read %d of %d",
996  fname, errinfo->wre_off, errinfo->wre_read,
997  errinfo->wre_req)));
998  }
999 }
int errcode(int sqlerrcode)
Definition: elog.c:693
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
XLogSegNo ws_segno
Definition: xlogreader.h:47
TimeLineID ws_tli
Definition: xlogreader.h:48
WALOpenSegment wre_seg
Definition: xlogreader.h:301
int wal_segment_size
Definition: xlog.c:123
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
#define MAXFNAMELEN

References ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, MAXFNAMELEN, wal_segment_size, WALReadError::wre_errno, WALReadError::wre_off, WALReadError::wre_read, WALReadError::wre_req, WALReadError::wre_seg, WALOpenSegment::ws_segno, WALOpenSegment::ws_tli, and XLogFileName.

Referenced by logical_read_xlog_page(), read_local_xlog_page(), and XLogSendPhysical().

◆ XLogCheckInvalidPages()

void XLogCheckInvalidPages ( void  )

Definition at line 243 of file xlogutils.c.

244 {
246  xl_invalid_page *hentry;
247  bool foundone = false;
248 
249  if (invalid_page_tab == NULL)
250  return; /* nothing to do */
251 
253 
254  /*
255  * Our strategy is to emit WARNING messages for all remaining entries and
256  * only PANIC after we've dumped all the available info.
257  */
258  while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
259  {
260  report_invalid_page(WARNING, hentry->key.node, hentry->key.forkno,
261  hentry->key.blkno, hentry->present);
262  foundone = true;
263  }
264 
265  if (foundone)
267  "WAL contains references to invalid pages");
268 
270  invalid_page_tab = NULL;
271 }
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:862

References xl_invalid_page_key::blkno, elog, xl_invalid_page_key::forkno, hash_destroy(), hash_seq_init(), hash_seq_search(), ignore_invalid_pages, invalid_page_tab, xl_invalid_page::key, xl_invalid_page_key::node, PANIC, xl_invalid_page::present, report_invalid_page(), status(), and WARNING.

Referenced by CheckRecoveryConsistency().

◆ XLogDropDatabase()

void XLogDropDatabase ( Oid  dbid)

Definition at line 643 of file xlogutils.c.

644 {
645  /*
646  * This is unnecessarily heavy-handed, as it will close SMgrRelation
647  * objects for other databases as well. DROP DATABASE occurs seldom enough
648  * that it's not worth introducing a variant of smgrclose for just this
649  * purpose. XXX: Or should we rather leave the smgr entries dangling?
650  */
651  smgrcloseall();
652 
654 }
void smgrcloseall(void)
Definition: smgr.c:286
static void forget_invalid_pages_db(Oid dbid)
Definition: xlogutils.c:200

References forget_invalid_pages_db(), and smgrcloseall().

Referenced by dbase_redo().

◆ XLogDropRelation()

void XLogDropRelation ( RelFileNode  rnode,
ForkNumber  forknum 
)

Definition at line 632 of file xlogutils.c.

633 {
634  forget_invalid_pages(rnode, forknum, 0);
635 }
static void forget_invalid_pages(RelFileNode node, ForkNumber forkno, BlockNumber minblkno)
Definition: xlogutils.c:165

References forget_invalid_pages().

Referenced by DropRelationFiles().

◆ XLogHaveInvalidPages()

bool XLogHaveInvalidPages ( void  )

Definition at line 233 of file xlogutils.c.

234 {
235  if (invalid_page_tab != NULL &&
237  return true;
238  return false;
239 }
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1382

References hash_get_num_entries(), and invalid_page_tab.

Referenced by RecoveryRestartPoint().

◆ XLogInitBufferForRedo()

◆ XLogReadBufferExtended()

Buffer XLogReadBufferExtended ( RelFileNode  rnode,
ForkNumber  forknum,
BlockNumber  blkno,
ReadBufferMode  mode 
)

Definition at line 459 of file xlogutils.c.

461 {
462  BlockNumber lastblock;
463  Buffer buffer;
464  SMgrRelation smgr;
465 
466  Assert(blkno != P_NEW);
467 
468  /* Open the relation at smgr level */
469  smgr = smgropen(rnode, InvalidBackendId);
470 
471  /*
472  * Create the target file if it doesn't already exist. This lets us cope
473  * if the replay sequence contains writes to a relation that is later
474  * deleted. (The original coding of this routine would instead suppress
475  * the writes, but that seems like it risks losing valuable data if the
476  * filesystem loses an inode during a crash. Better to write the data
477  * until we are actually told to delete the file.)
478  */
479  smgrcreate(smgr, forknum, true);
480 
481  lastblock = smgrnblocks(smgr, forknum);
482 
483  if (blkno < lastblock)
484  {
485  /* page exists in file */
486  buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
487  mode, NULL);
488  }
489  else
490  {
491  /* hm, page doesn't exist in file */
492  if (mode == RBM_NORMAL)
493  {
494  log_invalid_page(rnode, forknum, blkno, false);
495  return InvalidBuffer;
496  }
497  if (mode == RBM_NORMAL_NO_LOG)
498  return InvalidBuffer;
499  /* OK to extend the file */
500  /* we do this in recovery only - no rel-extension lock needed */
502  buffer = InvalidBuffer;
503  do
504  {
505  if (buffer != InvalidBuffer)
506  {
509  ReleaseBuffer(buffer);
510  }
511  buffer = ReadBufferWithoutRelcache(rnode, forknum,
512  P_NEW, mode, NULL);
513  }
514  while (BufferGetBlockNumber(buffer) < blkno);
515  /* Handle the corner case that P_NEW returns non-consecutive pages */
516  if (BufferGetBlockNumber(buffer) != blkno)
517  {
520  ReleaseBuffer(buffer);
521  buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
522  mode, NULL);
523  }
524  }
525 
526  if (mode == RBM_NORMAL)
527  {
528  /* check that page has been initialized */
529  Page page = (Page) BufferGetPage(buffer);
530 
531  /*
532  * We assume that PageIsNew is safe without a lock. During recovery,
533  * there should be no other backends that could modify the buffer at
534  * the same time.
535  */
536  if (PageIsNew(page))
537  {
538  ReleaseBuffer(buffer);
539  log_invalid_page(rnode, forknum, blkno, true);
540  return InvalidBuffer;
541  }
542  }
543 
544  return buffer;
545 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2748
Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:780
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3757
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3996
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define P_NEW
Definition: bufmgr.h:91
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:42
@ RBM_NORMAL
Definition: bufmgr.h:39
@ RBM_NORMAL_NO_LOG
Definition: bufmgr.h:45
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
Pointer Page
Definition: bufpage.h:78
#define PageIsNew(page)
Definition: bufpage.h:228
Assert(fmt[strlen(fmt) - 1] !='\n')
static PgChecksumMode mode
Definition: pg_checksums.c:65
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:548
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:333
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
bool InRecovery
Definition: xlogutils.c:52
static void log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno, bool present)
Definition: xlogutils.c:101

References Assert(), BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, InRecovery, InvalidBackendId, InvalidBuffer, LockBuffer(), log_invalid_page(), mode, P_NEW, PageIsNew, RBM_NORMAL, RBM_NORMAL_NO_LOG, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, ReadBufferWithoutRelcache(), ReleaseBuffer(), smgrcreate(), smgrnblocks(), and smgropen().

Referenced by checkXLogConsistency(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

◆ XLogReadBufferForRedo()

XLogRedoAction XLogReadBufferForRedo ( XLogReaderState record,
uint8  block_id,
Buffer buf 
)

Definition at line 312 of file xlogutils.c.

314 {
315  return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
316  false, buf);
317 }

References buf, RBM_NORMAL, and XLogReadBufferForRedoExtended().

Referenced by _bt_clear_incomplete_split(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_split(), btree_xlog_unlink_page(), generic_redo(), ginRedoClearIncompleteSplit(), ginRedoDeletePage(), ginRedoInsert(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageUpdateRecord(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), and xlog_redo().

◆ XLogReadBufferForRedoExtended()

XLogRedoAction XLogReadBufferForRedoExtended ( XLogReaderState record,
uint8  block_id,
ReadBufferMode  mode,
bool  get_cleanup_lock,
Buffer buf 
)

Definition at line 349 of file xlogutils.c.

353 {
354  XLogRecPtr lsn = record->EndRecPtr;
355  RelFileNode rnode;
356  ForkNumber forknum;
357  BlockNumber blkno;
358  Page page;
359  bool zeromode;
360  bool willinit;
361 
362  if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
363  {
364  /* Caller specified a bogus block_id */
365  elog(PANIC, "failed to locate backup block with ID %d", block_id);
366  }
367 
368  /*
369  * Make sure that if the block is marked with WILL_INIT, the caller is
370  * going to initialize it. And vice versa.
371  */
373  willinit = (record->blocks[block_id].flags & BKPBLOCK_WILL_INIT) != 0;
374  if (willinit && !zeromode)
375  elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
376  if (!willinit && zeromode)
377  elog(PANIC, "block to be initialized in redo routine must be marked with WILL_INIT flag in the WAL record");
378 
379  /* If it has a full-page image and it should be restored, do it. */
380  if (XLogRecBlockImageApply(record, block_id))
381  {
382  Assert(XLogRecHasBlockImage(record, block_id));
383  *buf = XLogReadBufferExtended(rnode, forknum, blkno,
384  get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
385  page = BufferGetPage(*buf);
386  if (!RestoreBlockImage(record, block_id, page))
387  elog(ERROR, "failed to restore block image");
388 
389  /*
390  * The page may be uninitialized. If so, we can't set the LSN because
391  * that would corrupt the page.
392  */
393  if (!PageIsNew(page))
394  {
395  PageSetLSN(page, lsn);
396  }
397 
399 
400  /*
401  * At the end of crash recovery the init forks of unlogged relations
402  * are copied, without going through shared buffers. So we need to
403  * force the on-disk state of init forks to always be in sync with the
404  * state in shared buffers.
405  */
406  if (forknum == INIT_FORKNUM)
408 
409  return BLK_RESTORED;
410  }
411  else
412  {
413  *buf = XLogReadBufferExtended(rnode, forknum, blkno, mode);
414  if (BufferIsValid(*buf))
415  {
417  {
418  if (get_cleanup_lock)
420  else
422  }
423  if (lsn <= PageGetLSN(BufferGetPage(*buf)))
424  return BLK_DONE;
425  else
426  return BLK_NEEDS_REDO;
427  }
428  else
429  return BLK_NOTFOUND;
430  }
431 }
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1565
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4053
void FlushOneBuffer(Buffer buffer)
Definition: bufmgr.c:3737
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
#define PageSetLSN(page, lsn)
Definition: bufpage.h:367
#define PageGetLSN(page)
Definition: bufpage.h:365
ForkNumber
Definition: relpath.h:41
@ INIT_FORKNUM
Definition: relpath.h:46
XLogRecPtr EndRecPtr
Definition: xlogreader.h:176
DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID+1]
Definition: xlogreader.h:207
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1531
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:1584
#define XLogRecBlockImageApply(decoder, block_id)
Definition: xlogreader.h:327
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:325
#define BKPBLOCK_WILL_INIT
Definition: xlogrecord.h:185
Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode)
Definition: xlogutils.c:459
@ BLK_RESTORED
Definition: xlogutils.h:73
@ BLK_NEEDS_REDO
Definition: xlogutils.h:71
@ BLK_DONE
Definition: xlogutils.h:72
@ BLK_NOTFOUND
Definition: xlogutils.h:74

References Assert(), BKPBLOCK_WILL_INIT, BLK_DONE, BLK_NEEDS_REDO, BLK_NOTFOUND, BLK_RESTORED, XLogReaderState::blocks, buf, BUFFER_LOCK_EXCLUSIVE, BufferGetPage, BufferIsValid, elog, XLogReaderState::EndRecPtr, ERROR, DecodedBkpBlock::flags, FlushOneBuffer(), INIT_FORKNUM, LockBuffer(), LockBufferForCleanup(), MarkBufferDirty(), mode, PageGetLSN, PageIsNew, PageSetLSN, PANIC, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RestoreBlockImage(), XLogReadBufferExtended(), XLogRecBlockImageApply, XLogRecGetBlockTag(), and XLogRecHasBlockImage.

Referenced by btree_xlog_vacuum(), hash_xlog_delete(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_squeeze_page(), hash_xlog_vacuum_one_page(), heap_xlog_prune(), heap_xlog_vacuum(), heap_xlog_visible(), XLogInitBufferForRedo(), and XLogReadBufferForRedo().

◆ XLogReadDetermineTimeline()

void XLogReadDetermineTimeline ( XLogReaderState state,
XLogRecPtr  wantPage,
uint32  wantLength,
TimeLineID  currTLI 
)

Definition at line 709 of file xlogutils.c.

711 {
712  const XLogRecPtr lastReadPage = (state->seg.ws_segno *
713  state->segcxt.ws_segsize + state->segoff);
714 
715  Assert(wantPage != InvalidXLogRecPtr && wantPage % XLOG_BLCKSZ == 0);
716  Assert(wantLength <= XLOG_BLCKSZ);
717  Assert(state->readLen == 0 || state->readLen <= XLOG_BLCKSZ);
718  Assert(currTLI != 0);
719 
720  /*
721  * If the desired page is currently read in and valid, we have nothing to
722  * do.
723  *
724  * The caller should've ensured that it didn't previously advance readOff
725  * past the valid limit of this timeline, so it doesn't matter if the
726  * current TLI has since become historical.
727  */
728  if (lastReadPage == wantPage &&
729  state->readLen != 0 &&
730  lastReadPage + state->readLen >= wantPage + Min(wantLength, XLOG_BLCKSZ - 1))
731  return;
732 
733  /*
734  * If we're reading from the current timeline, it hasn't become historical
735  * and the page we're reading is after the last page read, we can again
736  * just carry on. (Seeking backwards requires a check to make sure the
737  * older page isn't on a prior timeline).
738  *
739  * currTLI might've become historical since the caller obtained the value,
740  * but the caller is required not to read past the flush limit it saw at
741  * the time it looked up the timeline. There's nothing we can do about it
742  * if StartupXLOG() renames it to .partial concurrently.
743  */
744  if (state->currTLI == currTLI && wantPage >= lastReadPage)
745  {
746  Assert(state->currTLIValidUntil == InvalidXLogRecPtr);
747  return;
748  }
749 
750  /*
751  * If we're just reading pages from a previously validated historical
752  * timeline and the timeline we're reading from is valid until the end of
753  * the current segment we can just keep reading.
754  */
755  if (state->currTLIValidUntil != InvalidXLogRecPtr &&
756  state->currTLI != currTLI &&
757  state->currTLI != 0 &&
758  ((wantPage + wantLength) / state->segcxt.ws_segsize) <
759  (state->currTLIValidUntil / state->segcxt.ws_segsize))
760  return;
761 
762  /*
763  * If we reach this point we're either looking up a page for random
764  * access, the current timeline just became historical, or we're reading
765  * from a new segment containing a timeline switch. In all cases we need
766  * to determine the newest timeline on the segment.
767  *
768  * If it's the current timeline we can just keep reading from here unless
769  * we detect a timeline switch that makes the current timeline historical.
770  * If it's a historical timeline we can read all the segment on the newest
771  * timeline because it contains all the old timelines' data too. So only
772  * one switch check is required.
773  */
774  {
775  /*
776  * We need to re-read the timeline history in case it's been changed
777  * by a promotion or replay from a cascaded replica.
778  */
779  List *timelineHistory = readTimeLineHistory(currTLI);
780  XLogRecPtr endOfSegment;
781 
782  endOfSegment = ((wantPage / state->segcxt.ws_segsize) + 1) *
783  state->segcxt.ws_segsize - 1;
784  Assert(wantPage / state->segcxt.ws_segsize ==
785  endOfSegment / state->segcxt.ws_segsize);
786 
787  /*
788  * Find the timeline of the last LSN on the segment containing
789  * wantPage.
790  */
791  state->currTLI = tliOfPointInHistory(endOfSegment, timelineHistory);
792  state->currTLIValidUntil = tliSwitchPoint(state->currTLI, timelineHistory,
793  &state->nextTLI);
794 
795  Assert(state->currTLIValidUntil == InvalidXLogRecPtr ||
796  wantPage + wantLength < state->currTLIValidUntil);
797 
798  list_free_deep(timelineHistory);
799 
800  elog(DEBUG3, "switched to timeline %u valid until %X/%X",
801  state->currTLI,
802  LSN_FORMAT_ARGS(state->currTLIValidUntil));
803  }
804 }
TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history)
Definition: timeline.c:552
XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
Definition: timeline.c:580
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76
#define Min(x, y)
Definition: c.h:986
#define DEBUG3
Definition: elog.h:22
void list_free_deep(List *list)
Definition: list.c:1519
Definition: pg_list.h:51
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), DEBUG3, elog, InvalidXLogRecPtr, list_free_deep(), LSN_FORMAT_ARGS, Min, readTimeLineHistory(), tliOfPointInHistory(), and tliSwitchPoint().

Referenced by logical_read_xlog_page(), and read_local_xlog_page().

◆ XLogTruncateRelation()

void XLogTruncateRelation ( RelFileNode  rnode,
ForkNumber  forkNum,
BlockNumber  nblocks 
)

Definition at line 662 of file xlogutils.c.

664 {
665  forget_invalid_pages(rnode, forkNum, nblocks);
666 }

References forget_invalid_pages().

Referenced by smgr_redo().

Variable Documentation

◆ ignore_invalid_pages

bool ignore_invalid_pages = false

Definition at line 36 of file xlogutils.c.

Referenced by log_invalid_page(), and XLogCheckInvalidPages().

◆ InRecovery

◆ invalid_page_tab

HTAB* invalid_page_tab = NULL
static

◆ standbyState