PostgreSQL Source Code  git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
storage.c File Reference
#include "postgres.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "miscadmin.h"
#include "storage/bulk_write.h"
#include "storage/freespace.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "utils/rel.h"
Include dependency graph for storage.c:

Go to the source code of this file.

Data Structures

struct  PendingRelDelete
 
struct  PendingRelSync
 

Typedefs

typedef struct PendingRelDelete PendingRelDelete
 
typedef struct PendingRelSync PendingRelSync
 

Functions

static void AddPendingSync (const RelFileLocator *rlocator)
 
SMgrRelation RelationCreateStorage (RelFileLocator rlocator, char relpersistence, bool register_delete)
 
void log_smgrcreate (const RelFileLocator *rlocator, ForkNumber forkNum)
 
void RelationDropStorage (Relation rel)
 
void RelationPreserveStorage (RelFileLocator rlocator, bool atCommit)
 
void RelationTruncate (Relation rel, BlockNumber nblocks)
 
void RelationPreTruncate (Relation rel)
 
void RelationCopyStorage (SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
 
bool RelFileLocatorSkippingWAL (RelFileLocator rlocator)
 
Size EstimatePendingSyncsSpace (void)
 
void SerializePendingSyncs (Size maxSize, char *startAddress)
 
void RestorePendingSyncs (char *startAddress)
 
void smgrDoPendingDeletes (bool isCommit)
 
void smgrDoPendingSyncs (bool isCommit, bool isParallelWorker)
 
int smgrGetPendingDeletes (bool forCommit, RelFileLocator **ptr)
 
void PostPrepare_smgr (void)
 
void AtSubCommit_smgr (void)
 
void AtSubAbort_smgr (void)
 
void smgr_redo (XLogReaderState *record)
 

Variables

int wal_skip_threshold = 2048
 
static PendingRelDeletependingDeletes = NULL
 
static HTABpendingSyncHash = NULL
 

Typedef Documentation

◆ PendingRelDelete

◆ PendingRelSync

Function Documentation

◆ AddPendingSync()

static void AddPendingSync ( const RelFileLocator rlocator)
static

Definition at line 85 of file storage.c.

86 {
87  PendingRelSync *pending;
88  bool found;
89 
90  /* create the hash if not yet */
91  if (!pendingSyncHash)
92  {
93  HASHCTL ctl;
94 
95  ctl.keysize = sizeof(RelFileLocator);
96  ctl.entrysize = sizeof(PendingRelSync);
98  pendingSyncHash = hash_create("pending sync hash", 16, &ctl,
100  }
101 
102  pending = hash_search(pendingSyncHash, rlocator, HASH_ENTER, &found);
103  Assert(!found);
104  pending->is_truncated = false;
105 }
#define Assert(condition)
Definition: c.h:863
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
MemoryContext TopTransactionContext
Definition: mcxt.c:154
tree ctl
Definition: radixtree.h:1853
struct RelFileLocator RelFileLocator
static HTAB * pendingSyncHash
Definition: storage.c:77
struct PendingRelSync PendingRelSync
bool is_truncated
Definition: storage.c:73

References Assert, ctl, HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), PendingRelSync::is_truncated, pendingSyncHash, and TopTransactionContext.

Referenced by RelationCreateStorage(), and RestorePendingSyncs().

◆ AtSubAbort_smgr()

void AtSubAbort_smgr ( void  )

Definition at line 934 of file storage.c.

935 {
936  smgrDoPendingDeletes(false);
937 }
void smgrDoPendingDeletes(bool isCommit)
Definition: storage.c:632

References smgrDoPendingDeletes().

Referenced by AbortSubTransaction().

◆ AtSubCommit_smgr()

void AtSubCommit_smgr ( void  )

Definition at line 914 of file storage.c.

915 {
916  int nestLevel = GetCurrentTransactionNestLevel();
917  PendingRelDelete *pending;
918 
919  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
920  {
921  if (pending->nestLevel >= nestLevel)
922  pending->nestLevel = nestLevel - 1;
923  }
924 }
static PendingRelDelete * pendingDeletes
Definition: storage.c:76
struct PendingRelDelete * next
Definition: storage.c:67
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:928

References GetCurrentTransactionNestLevel(), PendingRelDelete::nestLevel, PendingRelDelete::next, and pendingDeletes.

Referenced by CommitSubTransaction().

◆ EstimatePendingSyncsSpace()

Size EstimatePendingSyncsSpace ( void  )

Definition at line 546 of file storage.c.

547 {
548  long entries;
549 
551  return mul_size(1 + entries, sizeof(RelFileLocator));
552 }
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

References hash_get_num_entries(), mul_size(), and pendingSyncHash.

Referenced by InitializeParallelDSM().

◆ log_smgrcreate()

void log_smgrcreate ( const RelFileLocator rlocator,
ForkNumber  forkNum 
)

Definition at line 186 of file storage.c.

187 {
188  xl_smgr_create xlrec;
189 
190  /*
191  * Make an XLOG entry reporting the file creation.
192  */
193  xlrec.rlocator = *rlocator;
194  xlrec.forkNum = forkNum;
195 
196  XLogBeginInsert();
197  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
199 }
#define XLOG_SMGR_CREATE
Definition: storage_xlog.h:30
ForkNumber forkNum
Definition: storage_xlog.h:36
RelFileLocator rlocator
Definition: storage_xlog.h:35
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:82

References xl_smgr_create::forkNum, xl_smgr_create::rlocator, XLOG_SMGR_CREATE, XLogBeginInsert(), XLogInsert(), XLogRegisterData(), and XLR_SPECIAL_REL_UPDATE.

Referenced by CreateAndCopyRelationData(), fill_seq_with_data(), heapam_relation_copy_data(), heapam_relation_set_new_filelocator(), index_build(), index_copy_data(), and RelationCreateStorage().

◆ PostPrepare_smgr()

void PostPrepare_smgr ( void  )

Definition at line 893 of file storage.c.

894 {
895  PendingRelDelete *pending;
897 
898  for (pending = pendingDeletes; pending != NULL; pending = next)
899  {
900  next = pending->next;
902  /* must explicitly free the list entry */
903  pfree(pending);
904  }
905 }
static int32 next
Definition: blutils.c:219
void pfree(void *pointer)
Definition: mcxt.c:1521

References next, PendingRelDelete::next, pendingDeletes, and pfree().

Referenced by PrepareTransaction().

◆ RelationCopyStorage()

void RelationCopyStorage ( SMgrRelation  src,
SMgrRelation  dst,
ForkNumber  forkNum,
char  relpersistence 
)

Definition at line 452 of file storage.c.

454 {
455  bool use_wal;
456  bool copying_initfork;
457  BlockNumber nblocks;
458  BlockNumber blkno;
459  BulkWriteState *bulkstate;
460 
461  /*
462  * The init fork for an unlogged relation in many respects has to be
463  * treated the same as normal relation, changes need to be WAL logged and
464  * it needs to be synced to disk.
465  */
466  copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
467  forkNum == INIT_FORKNUM;
468 
469  /*
470  * We need to log the copied data in WAL iff WAL archiving/streaming is
471  * enabled AND it's a permanent relation. This gives the same answer as
472  * "RelationNeedsWAL(rel) || copying_initfork", because we know the
473  * current operation created new relation storage.
474  */
475  use_wal = XLogIsNeeded() &&
476  (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
477 
478  bulkstate = smgr_bulk_start_smgr(dst, forkNum, use_wal);
479 
480  nblocks = smgrnblocks(src, forkNum);
481 
482  for (blkno = 0; blkno < nblocks; blkno++)
483  {
485 
486  /* If we got a cancel signal during the copy of the data, quit */
488 
489  buf = smgr_bulk_get_buf(bulkstate);
490  smgrread(src, forkNum, blkno, (Page) buf);
491 
492  if (!PageIsVerifiedExtended((Page) buf, blkno,
494  {
495  /*
496  * For paranoia's sake, capture the file path before invoking the
497  * ereport machinery. This guards against the possibility of a
498  * relcache flush caused by, e.g., an errcontext callback.
499  * (errcontext callbacks shouldn't be risking any such thing, but
500  * people have been known to forget that rule.)
501  */
503  src->smgr_rlocator.backend,
504  forkNum);
505 
506  ereport(ERROR,
508  errmsg("invalid page in block %u of relation %s",
509  blkno, relpath)));
510  }
511 
512  /*
513  * Queue the page for WAL-logging and writing out. Unfortunately we
514  * don't know what kind of a page this is, so we have to log the full
515  * page including any unused space.
516  */
517  smgr_bulk_write(bulkstate, blkno, buf, false);
518  }
519  smgr_bulk_finish(bulkstate);
520 }
uint32 BlockNumber
Definition: block.h:31
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
Pointer Page
Definition: bufpage.h:81
#define PIV_LOG_WARNING
Definition: bufpage.h:468
#define PIV_REPORT_STAT
Definition: bufpage.h:469
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition: bulk_write.c:323
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition: bulk_write.c:347
BulkWriteState * smgr_bulk_start_smgr(SMgrRelation smgr, ForkNumber forknum, bool use_wal)
Definition: bulk_write.c:100
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition: bulk_write.c:130
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
static char * buf
Definition: pg_test_fsync.c:72
@ INIT_FORKNUM
Definition: relpath.h:61
#define relpath(rlocator, forknum)
Definition: relpath.h:102
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:93
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:677
static void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer)
Definition: smgr.h:116
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37
#define XLogIsNeeded()
Definition: xlog.h:109

References RelFileLocatorBackend::backend, buf, CHECK_FOR_INTERRUPTS, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg(), ERROR, INIT_FORKNUM, RelFileLocatorBackend::locator, PageIsVerifiedExtended(), PIV_LOG_WARNING, PIV_REPORT_STAT, relpath, relpathbackend, smgr_bulk_finish(), smgr_bulk_get_buf(), smgr_bulk_start_smgr(), smgr_bulk_write(), SMgrRelationData::smgr_rlocator, smgrnblocks(), smgrread(), and XLogIsNeeded.

Referenced by heapam_relation_copy_data(), and index_copy_data().

◆ RelationCreateStorage()

SMgrRelation RelationCreateStorage ( RelFileLocator  rlocator,
char  relpersistence,
bool  register_delete 
)

Definition at line 121 of file storage.c.

123 {
124  SMgrRelation srel;
125  ProcNumber procNumber;
126  bool needs_wal;
127 
128  Assert(!IsInParallelMode()); /* couldn't update pendingSyncHash */
129 
130  switch (relpersistence)
131  {
132  case RELPERSISTENCE_TEMP:
133  procNumber = ProcNumberForTempRelations();
134  needs_wal = false;
135  break;
136  case RELPERSISTENCE_UNLOGGED:
137  procNumber = INVALID_PROC_NUMBER;
138  needs_wal = false;
139  break;
140  case RELPERSISTENCE_PERMANENT:
141  procNumber = INVALID_PROC_NUMBER;
142  needs_wal = true;
143  break;
144  default:
145  elog(ERROR, "invalid relpersistence: %c", relpersistence);
146  return NULL; /* placate compiler */
147  }
148 
149  srel = smgropen(rlocator, procNumber);
150  smgrcreate(srel, MAIN_FORKNUM, false);
151 
152  if (needs_wal)
154 
155  /*
156  * Add the relation to the list of stuff to delete at abort, if we are
157  * asked to do so.
158  */
159  if (register_delete)
160  {
161  PendingRelDelete *pending;
162 
163  pending = (PendingRelDelete *)
165  pending->rlocator = rlocator;
166  pending->procNumber = procNumber;
167  pending->atCommit = false; /* delete if abort */
169  pending->next = pendingDeletes;
170  pendingDeletes = pending;
171  }
172 
173  if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
174  {
175  Assert(procNumber == INVALID_PROC_NUMBER);
176  AddPendingSync(&rlocator);
177  }
178 
179  return srel;
180 }
#define elog(elevel,...)
Definition: elog.h:225
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
#define ProcNumberForTempRelations()
Definition: proc.h:324
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int ProcNumber
Definition: procnumber.h:24
@ MAIN_FORKNUM
Definition: relpath.h:58
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:201
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:414
static void AddPendingSync(const RelFileLocator *rlocator)
Definition: storage.c:85
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:186
ProcNumber procNumber
Definition: storage.c:64
RelFileLocator rlocator
Definition: storage.c:63
bool IsInParallelMode(void)
Definition: xact.c:1088

References AddPendingSync(), Assert, PendingRelDelete::atCommit, elog, ERROR, GetCurrentTransactionNestLevel(), INVALID_PROC_NUMBER, IsInParallelMode(), RelFileLocatorBackend::locator, log_smgrcreate(), MAIN_FORKNUM, MemoryContextAlloc(), PendingRelDelete::nestLevel, PendingRelDelete::next, pendingDeletes, PendingRelDelete::procNumber, ProcNumberForTempRelations, PendingRelDelete::rlocator, SMgrRelationData::smgr_rlocator, smgrcreate(), smgropen(), TopMemoryContext, and XLogIsNeeded.

Referenced by CreateAndCopyRelationData(), heap_create(), heapam_relation_copy_data(), heapam_relation_set_new_filelocator(), index_copy_data(), and RelationSetNewRelfilenumber().

◆ RelationDropStorage()

void RelationDropStorage ( Relation  rel)

Definition at line 206 of file storage.c.

207 {
208  PendingRelDelete *pending;
209 
210  /* Add the relation to the list of stuff to delete at commit */
211  pending = (PendingRelDelete *)
213  pending->rlocator = rel->rd_locator;
214  pending->procNumber = rel->rd_backend;
215  pending->atCommit = true; /* delete if commit */
217  pending->next = pendingDeletes;
218  pendingDeletes = pending;
219 
220  /*
221  * NOTE: if the relation was created in this transaction, it will now be
222  * present in the pending-delete list twice, once with atCommit true and
223  * once with atCommit false. Hence, it will be physically deleted at end
224  * of xact in either case (and the other entry will be ignored by
225  * smgrDoPendingDeletes, so no error will occur). We could instead remove
226  * the existing list entry and delete the physical file immediately, but
227  * for now I'll keep the logic simple.
228  */
229 
230  RelationCloseSmgr(rel);
231 }
static void RelationCloseSmgr(Relation relation)
Definition: rel.h:582
ProcNumber rd_backend
Definition: rel.h:60
RelFileLocator rd_locator
Definition: rel.h:57

References PendingRelDelete::atCommit, GetCurrentTransactionNestLevel(), MemoryContextAlloc(), PendingRelDelete::nestLevel, PendingRelDelete::next, pendingDeletes, PendingRelDelete::procNumber, RelationData::rd_backend, RelationData::rd_locator, RelationCloseSmgr(), PendingRelDelete::rlocator, and TopMemoryContext.

Referenced by heap_drop_with_catalog(), heapam_relation_copy_data(), index_copy_data(), index_drop(), reindex_index(), and RelationSetNewRelfilenumber().

◆ RelationPreserveStorage()

void RelationPreserveStorage ( RelFileLocator  rlocator,
bool  atCommit 
)

Definition at line 251 of file storage.c.

252 {
253  PendingRelDelete *pending;
254  PendingRelDelete *prev;
256 
257  prev = NULL;
258  for (pending = pendingDeletes; pending != NULL; pending = next)
259  {
260  next = pending->next;
261  if (RelFileLocatorEquals(rlocator, pending->rlocator)
262  && pending->atCommit == atCommit)
263  {
264  /* unlink and delete list entry */
265  if (prev)
266  prev->next = next;
267  else
269  pfree(pending);
270  /* prev does not change */
271  }
272  else
273  {
274  /* unrelated entry, don't touch it */
275  prev = pending;
276  }
277  }
278 }
#define RelFileLocatorEquals(locator1, locator2)

References PendingRelDelete::atCommit, next, PendingRelDelete::next, pendingDeletes, pfree(), RelFileLocatorEquals, and PendingRelDelete::rlocator.

Referenced by ATExecAddIndex(), and write_relmap_file().

◆ RelationPreTruncate()

void RelationPreTruncate ( Relation  rel)

Definition at line 424 of file storage.c.

425 {
426  PendingRelSync *pending;
427 
428  if (!pendingSyncHash)
429  return;
430 
431  pending = hash_search(pendingSyncHash,
432  &(RelationGetSmgr(rel)->smgr_rlocator.locator),
433  HASH_FIND, NULL);
434  if (pending)
435  pending->is_truncated = true;
436 }
@ HASH_FIND
Definition: hsearch.h:113
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567

References HASH_FIND, hash_search(), PendingRelSync::is_truncated, pendingSyncHash, and RelationGetSmgr().

Referenced by RelationTruncate().

◆ RelationTruncate()

void RelationTruncate ( Relation  rel,
BlockNumber  nblocks 
)

Definition at line 288 of file storage.c.

289 {
290  bool fsm;
291  bool vm;
292  bool need_fsm_vacuum = false;
293  ForkNumber forks[MAX_FORKNUM];
294  BlockNumber blocks[MAX_FORKNUM];
295  int nforks = 0;
296  SMgrRelation reln;
297 
298  /*
299  * Make sure smgr_targblock etc aren't pointing somewhere past new end.
300  * (Note: don't rely on this reln pointer below this loop.)
301  */
302  reln = RelationGetSmgr(rel);
304  for (int i = 0; i <= MAX_FORKNUM; ++i)
306 
307  /* Prepare for truncation of MAIN fork of the relation */
308  forks[nforks] = MAIN_FORKNUM;
309  blocks[nforks] = nblocks;
310  nforks++;
311 
312  /* Prepare for truncation of the FSM if it exists */
314  if (fsm)
315  {
316  blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
317  if (BlockNumberIsValid(blocks[nforks]))
318  {
319  forks[nforks] = FSM_FORKNUM;
320  nforks++;
321  need_fsm_vacuum = true;
322  }
323  }
324 
325  /* Prepare for truncation of the visibility map too if it exists */
327  if (vm)
328  {
329  blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
330  if (BlockNumberIsValid(blocks[nforks]))
331  {
332  forks[nforks] = VISIBILITYMAP_FORKNUM;
333  nforks++;
334  }
335  }
336 
337  RelationPreTruncate(rel);
338 
339  /*
340  * Make sure that a concurrent checkpoint can't complete while truncation
341  * is in progress.
342  *
343  * The truncation operation might drop buffers that the checkpoint
344  * otherwise would have flushed. If it does, then it's essential that the
345  * files actually get truncated on disk before the checkpoint record is
346  * written. Otherwise, if reply begins from that checkpoint, the
347  * to-be-truncated blocks might still exist on disk but have older
348  * contents than expected, which can cause replay to fail. It's OK for the
349  * blocks to not exist on disk at all, but not for them to have the wrong
350  * contents.
351  */
354 
355  /*
356  * We WAL-log the truncation before actually truncating, which means
357  * trouble if the truncation fails. If we then crash, the WAL replay
358  * likely isn't going to succeed in the truncation either, and cause a
359  * PANIC. It's tempting to put a critical section here, but that cure
360  * would be worse than the disease. It would turn a usually harmless
361  * failure to truncate, that might spell trouble at WAL replay, into a
362  * certain PANIC.
363  */
364  if (RelationNeedsWAL(rel))
365  {
366  /*
367  * Make an XLOG entry reporting the file truncation.
368  */
369  XLogRecPtr lsn;
370  xl_smgr_truncate xlrec;
371 
372  xlrec.blkno = nblocks;
373  xlrec.rlocator = rel->rd_locator;
374  xlrec.flags = SMGR_TRUNCATE_ALL;
375 
376  XLogBeginInsert();
377  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
378 
379  lsn = XLogInsert(RM_SMGR_ID,
381 
382  /*
383  * Flush, because otherwise the truncation of the main relation might
384  * hit the disk before the WAL record, and the truncation of the FSM
385  * or visibility map. If we crashed during that window, we'd be left
386  * with a truncated heap, but the FSM or visibility map would still
387  * contain entries for the non-existent heap pages.
388  */
389  if (fsm || vm)
390  XLogFlush(lsn);
391  }
392 
393  /*
394  * This will first remove any buffers from the buffer pool that should no
395  * longer exist after truncation is complete, and then truncate the
396  * corresponding files on disk.
397  */
398  smgrtruncate(RelationGetSmgr(rel), forks, nforks, blocks);
399 
400  /* We've done all the critical work, so checkpoints are OK now. */
402 
403  /*
404  * Update upper-level FSM pages to account for the truncation. This is
405  * important because the just-truncated pages were likely marked as
406  * all-free, and would be preferentially selected.
407  *
408  * NB: There's no point in delaying checkpoints until this is done.
409  * Because the FSM is not WAL-logged, we have to be prepared for the
410  * possibility of corruption after a crash anyway.
411  */
412  if (need_fsm_vacuum)
414 }
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
BlockNumber FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
Definition: freespace.c:275
int i
Definition: isn.c:72
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:120
#define RelationNeedsWAL(relation)
Definition: rel.h:628
ForkNumber
Definition: relpath.h:56
@ FSM_FORKNUM
Definition: relpath.h:59
@ VISIBILITYMAP_FORKNUM
Definition: relpath.h:60
#define MAX_FORKNUM
Definition: relpath.h:70
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
Definition: smgr.c:725
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:401
PGPROC * MyProc
Definition: proc.c:66
void RelationPreTruncate(Relation rel)
Definition: storage.c:424
#define SMGR_TRUNCATE_ALL
Definition: storage_xlog.h:43
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
int delayChkptFlags
Definition: proc.h:240
BlockNumber smgr_targblock
Definition: smgr.h:45
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46
RelFileLocator rlocator
Definition: storage_xlog.h:49
BlockNumber blkno
Definition: storage_xlog.h:48
BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2802
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert, xl_smgr_truncate::blkno, BlockNumberIsValid(), DELAY_CHKPT_COMPLETE, PGPROC::delayChkptFlags, xl_smgr_truncate::flags, FreeSpaceMapPrepareTruncateRel(), FreeSpaceMapVacuumRange(), FSM_FORKNUM, i, InvalidBlockNumber, MAIN_FORKNUM, MAX_FORKNUM, MyProc, RelationData::rd_locator, RelationGetSmgr(), RelationNeedsWAL, RelationPreTruncate(), xl_smgr_truncate::rlocator, SMgrRelationData::smgr_cached_nblocks, SMgrRelationData::smgr_targblock, SMGR_TRUNCATE_ALL, smgrexists(), smgrtruncate(), VISIBILITYMAP_FORKNUM, visibilitymap_prepare_truncate(), XLOG_SMGR_TRUNCATE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), and XLR_SPECIAL_REL_UPDATE.

Referenced by heapam_relation_nontransactional_truncate(), lazy_truncate_heap(), RelationTruncateIndexes(), and spgvacuumscan().

◆ RelFileLocatorSkippingWAL()

bool RelFileLocatorSkippingWAL ( RelFileLocator  rlocator)

Definition at line 532 of file storage.c.

533 {
534  if (!pendingSyncHash ||
535  hash_search(pendingSyncHash, &rlocator, HASH_FIND, NULL) == NULL)
536  return false;
537 
538  return true;
539 }

References HASH_FIND, hash_search(), and pendingSyncHash.

Referenced by MarkBufferDirtyHint(), and RelationInitPhysicalAddr().

◆ RestorePendingSyncs()

void RestorePendingSyncs ( char *  startAddress)

Definition at line 610 of file storage.c.

611 {
612  RelFileLocator *rlocator;
613 
614  Assert(pendingSyncHash == NULL);
615  for (rlocator = (RelFileLocator *) startAddress; rlocator->relNumber != 0;
616  rlocator++)
617  AddPendingSync(rlocator);
618 }
RelFileNumber relNumber

References AddPendingSync(), Assert, pendingSyncHash, and RelFileLocator::relNumber.

Referenced by ParallelWorkerMain().

◆ SerializePendingSyncs()

void SerializePendingSyncs ( Size  maxSize,
char *  startAddress 
)

Definition at line 559 of file storage.c.

560 {
561  HTAB *tmphash;
562  HASHCTL ctl;
563  HASH_SEQ_STATUS scan;
564  PendingRelSync *sync;
565  PendingRelDelete *delete;
566  RelFileLocator *src;
567  RelFileLocator *dest = (RelFileLocator *) startAddress;
568 
569  if (!pendingSyncHash)
570  goto terminate;
571 
572  /* Create temporary hash to collect active relfilelocators */
573  ctl.keysize = sizeof(RelFileLocator);
574  ctl.entrysize = sizeof(RelFileLocator);
575  ctl.hcxt = CurrentMemoryContext;
576  tmphash = hash_create("tmp relfilelocators",
579 
580  /* collect all rlocator from pending syncs */
582  while ((sync = (PendingRelSync *) hash_seq_search(&scan)))
583  (void) hash_search(tmphash, &sync->rlocator, HASH_ENTER, NULL);
584 
585  /* remove deleted rnodes */
586  for (delete = pendingDeletes; delete != NULL; delete = delete->next)
587  if (delete->atCommit)
588  (void) hash_search(tmphash, &delete->rlocator,
589  HASH_REMOVE, NULL);
590 
591  hash_seq_init(&scan, tmphash);
592  while ((src = (RelFileLocator *) hash_seq_search(&scan)))
593  *dest++ = *src;
594 
595  hash_destroy(tmphash);
596 
597 terminate:
598  MemSet(dest, 0, sizeof(RelFileLocator));
599 }
#define MemSet(start, val, len)
Definition: c.h:1025
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:865
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
@ HASH_REMOVE
Definition: hsearch.h:115
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
Definition: dynahash.c:220
RelFileLocator rlocator
Definition: storage.c:72

References ctl, CurrentMemoryContext, generate_unaccent_rules::dest, HASH_BLOBS, HASH_CONTEXT, hash_create(), hash_destroy(), HASH_ELEM, HASH_ENTER, hash_get_num_entries(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), MemSet, PendingRelDelete::next, pendingDeletes, pendingSyncHash, and PendingRelSync::rlocator.

Referenced by InitializeParallelDSM().

◆ smgr_redo()

void smgr_redo ( XLogReaderState record)

Definition at line 940 of file storage.c.

941 {
942  XLogRecPtr lsn = record->EndRecPtr;
943  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
944 
945  /* Backup blocks are not used in smgr records */
946  Assert(!XLogRecHasAnyBlockRefs(record));
947 
948  if (info == XLOG_SMGR_CREATE)
949  {
950  xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
951  SMgrRelation reln;
952 
953  reln = smgropen(xlrec->rlocator, INVALID_PROC_NUMBER);
954  smgrcreate(reln, xlrec->forkNum, true);
955  }
956  else if (info == XLOG_SMGR_TRUNCATE)
957  {
958  xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
959  SMgrRelation reln;
960  Relation rel;
961  ForkNumber forks[MAX_FORKNUM];
962  BlockNumber blocks[MAX_FORKNUM];
963  int nforks = 0;
964  bool need_fsm_vacuum = false;
965 
966  reln = smgropen(xlrec->rlocator, INVALID_PROC_NUMBER);
967 
968  /*
969  * Forcibly create relation if it doesn't exist (which suggests that
970  * it was dropped somewhere later in the WAL sequence). As in
971  * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
972  * log as best we can until the drop is seen.
973  */
974  smgrcreate(reln, MAIN_FORKNUM, true);
975 
976  /*
977  * Before we perform the truncation, update minimum recovery point to
978  * cover this WAL record. Once the relation is truncated, there's no
979  * going back. The buffer manager enforces the WAL-first rule for
980  * normal updates to relation files, so that the minimum recovery
981  * point is always updated before the corresponding change in the data
982  * file is flushed to disk. We have to do the same manually here.
983  *
984  * Doing this before the truncation means that if the truncation fails
985  * for some reason, you cannot start up the system even after restart,
986  * until you fix the underlying situation so that the truncation will
987  * succeed. Alternatively, we could update the minimum recovery point
988  * after truncation, but that would leave a small window where the
989  * WAL-first rule could be violated.
990  */
991  XLogFlush(lsn);
992 
993  /* Prepare for truncation of MAIN fork */
994  if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
995  {
996  forks[nforks] = MAIN_FORKNUM;
997  blocks[nforks] = xlrec->blkno;
998  nforks++;
999 
1000  /* Also tell xlogutils.c about it */
1002  }
1003 
1004  /* Prepare for truncation of FSM and VM too */
1005  rel = CreateFakeRelcacheEntry(xlrec->rlocator);
1006 
1007  if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
1008  smgrexists(reln, FSM_FORKNUM))
1009  {
1010  blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
1011  if (BlockNumberIsValid(blocks[nforks]))
1012  {
1013  forks[nforks] = FSM_FORKNUM;
1014  nforks++;
1015  need_fsm_vacuum = true;
1016  }
1017  }
1018  if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
1020  {
1021  blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
1022  if (BlockNumberIsValid(blocks[nforks]))
1023  {
1024  forks[nforks] = VISIBILITYMAP_FORKNUM;
1025  nforks++;
1026  }
1027  }
1028 
1029  /* Do the real work to truncate relation forks */
1030  if (nforks > 0)
1031  smgrtruncate(reln, forks, nforks, blocks);
1032 
1033  /*
1034  * Update upper-level FSM pages to account for the truncation. This is
1035  * important because the just-truncated pages were likely marked as
1036  * all-free, and would be preferentially selected.
1037  */
1038  if (need_fsm_vacuum)
1039  FreeSpaceMapVacuumRange(rel, xlrec->blkno,
1041 
1042  FreeFakeRelcacheEntry(rel);
1043  }
1044  else
1045  elog(PANIC, "smgr_redo: unknown op code %u", info);
1046 }
unsigned char uint8
Definition: c.h:516
#define PANIC
Definition: elog.h:42
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define SMGR_TRUNCATE_HEAP
Definition: storage_xlog.h:40
#define SMGR_TRUNCATE_FSM
Definition: storage_xlog.h:42
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void FreeFakeRelcacheEntry(Relation fakerel)
Definition: xlogutils.c:629
void XLogTruncateRelation(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nblocks)
Definition: xlogutils.c:671
Relation CreateFakeRelcacheEntry(RelFileLocator rlocator)
Definition: xlogutils.c:582

References Assert, xl_smgr_truncate::blkno, BlockNumberIsValid(), CreateFakeRelcacheEntry(), elog, XLogReaderState::EndRecPtr, xl_smgr_truncate::flags, xl_smgr_create::forkNum, FreeFakeRelcacheEntry(), FreeSpaceMapPrepareTruncateRel(), FreeSpaceMapVacuumRange(), FSM_FORKNUM, INVALID_PROC_NUMBER, InvalidBlockNumber, MAIN_FORKNUM, MAX_FORKNUM, PANIC, xl_smgr_create::rlocator, xl_smgr_truncate::rlocator, SMGR_TRUNCATE_FSM, SMGR_TRUNCATE_HEAP, SMGR_TRUNCATE_VM, smgrcreate(), smgrexists(), smgropen(), smgrtruncate(), VISIBILITYMAP_FORKNUM, visibilitymap_prepare_truncate(), XLOG_SMGR_CREATE, XLOG_SMGR_TRUNCATE, XLogFlush(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogTruncateRelation(), and XLR_INFO_MASK.

◆ smgrDoPendingDeletes()

void smgrDoPendingDeletes ( bool  isCommit)

Definition at line 632 of file storage.c.

633 {
634  int nestLevel = GetCurrentTransactionNestLevel();
635  PendingRelDelete *pending;
636  PendingRelDelete *prev;
638  int nrels = 0,
639  maxrels = 0;
640  SMgrRelation *srels = NULL;
641 
642  prev = NULL;
643  for (pending = pendingDeletes; pending != NULL; pending = next)
644  {
645  next = pending->next;
646  if (pending->nestLevel < nestLevel)
647  {
648  /* outer-level entries should not be processed yet */
649  prev = pending;
650  }
651  else
652  {
653  /* unlink list entry first, so we don't retry on failure */
654  if (prev)
655  prev->next = next;
656  else
658  /* do deletion if called for */
659  if (pending->atCommit == isCommit)
660  {
661  SMgrRelation srel;
662 
663  srel = smgropen(pending->rlocator, pending->procNumber);
664 
665  /* allocate the initial array, or extend it, if needed */
666  if (maxrels == 0)
667  {
668  maxrels = 8;
669  srels = palloc(sizeof(SMgrRelation) * maxrels);
670  }
671  else if (maxrels <= nrels)
672  {
673  maxrels *= 2;
674  srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
675  }
676 
677  srels[nrels++] = srel;
678  }
679  /* must explicitly free the list entry */
680  pfree(pending);
681  /* prev does not change */
682  }
683  }
684 
685  if (nrels > 0)
686  {
687  smgrdounlinkall(srels, nrels, false);
688 
689  for (int i = 0; i < nrels; i++)
690  smgrclose(srels[i]);
691 
692  pfree(srels);
693  }
694 }
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
void smgrclose(SMgrRelation reln)
Definition: smgr.c:323
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:465

References PendingRelDelete::atCommit, GetCurrentTransactionNestLevel(), i, PendingRelDelete::nestLevel, next, PendingRelDelete::next, palloc(), pendingDeletes, pfree(), PendingRelDelete::procNumber, repalloc(), PendingRelDelete::rlocator, smgrclose(), smgrdounlinkall(), and smgropen().

Referenced by AbortTransaction(), AtSubAbort_smgr(), and CommitTransaction().

◆ smgrDoPendingSyncs()

void smgrDoPendingSyncs ( bool  isCommit,
bool  isParallelWorker 
)

Definition at line 700 of file storage.c.

701 {
702  PendingRelDelete *pending;
703  int nrels = 0,
704  maxrels = 0;
705  SMgrRelation *srels = NULL;
706  HASH_SEQ_STATUS scan;
707  PendingRelSync *pendingsync;
708 
710 
711  if (!pendingSyncHash)
712  return; /* no relation needs sync */
713 
714  /* Abort -- just throw away all pending syncs */
715  if (!isCommit)
716  {
717  pendingSyncHash = NULL;
718  return;
719  }
720 
722 
723  /* Parallel worker -- just throw away all pending syncs */
724  if (isParallelWorker)
725  {
726  pendingSyncHash = NULL;
727  return;
728  }
729 
730  /* Skip syncing nodes that smgrDoPendingDeletes() will delete. */
731  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
732  if (pending->atCommit)
733  (void) hash_search(pendingSyncHash, &pending->rlocator,
734  HASH_REMOVE, NULL);
735 
737  while ((pendingsync = (PendingRelSync *) hash_seq_search(&scan)))
738  {
739  ForkNumber fork;
740  BlockNumber nblocks[MAX_FORKNUM + 1];
741  BlockNumber total_blocks = 0;
742  SMgrRelation srel;
743 
744  srel = smgropen(pendingsync->rlocator, INVALID_PROC_NUMBER);
745 
746  /*
747  * We emit newpage WAL records for smaller relations.
748  *
749  * Small WAL records have a chance to be flushed along with other
750  * backends' WAL records. We emit WAL records instead of syncing for
751  * files that are smaller than a certain threshold, expecting faster
752  * commit. The threshold is defined by the GUC wal_skip_threshold.
753  */
754  if (!pendingsync->is_truncated)
755  {
756  for (fork = 0; fork <= MAX_FORKNUM; fork++)
757  {
758  if (smgrexists(srel, fork))
759  {
760  BlockNumber n = smgrnblocks(srel, fork);
761 
762  /* we shouldn't come here for unlogged relations */
763  Assert(fork != INIT_FORKNUM);
764  nblocks[fork] = n;
765  total_blocks += n;
766  }
767  else
768  nblocks[fork] = InvalidBlockNumber;
769  }
770  }
771 
772  /*
773  * Sync file or emit WAL records for its contents.
774  *
775  * Although we emit WAL record if the file is small enough, do file
776  * sync regardless of the size if the file has experienced a
777  * truncation. It is because the file would be followed by trailing
778  * garbage blocks after a crash recovery if, while a past longer file
779  * had been flushed out, we omitted syncing-out of the file and
780  * emitted WAL instead. You might think that we could choose WAL if
781  * the current main fork is longer than ever, but there's a case where
782  * main fork is longer than ever but FSM fork gets shorter.
783  */
784  if (pendingsync->is_truncated ||
785  total_blocks * BLCKSZ / 1024 >= wal_skip_threshold)
786  {
787  /* allocate the initial array, or extend it, if needed */
788  if (maxrels == 0)
789  {
790  maxrels = 8;
791  srels = palloc(sizeof(SMgrRelation) * maxrels);
792  }
793  else if (maxrels <= nrels)
794  {
795  maxrels *= 2;
796  srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
797  }
798 
799  srels[nrels++] = srel;
800  }
801  else
802  {
803  /* Emit WAL records for all blocks. The file is small enough. */
804  for (fork = 0; fork <= MAX_FORKNUM; fork++)
805  {
806  int n = nblocks[fork];
807  Relation rel;
808 
809  if (!BlockNumberIsValid(n))
810  continue;
811 
812  /*
813  * Emit WAL for the whole file. Unfortunately we don't know
814  * what kind of a page this is, so we have to log the full
815  * page including any unused space. ReadBufferExtended()
816  * counts some pgstat events; unfortunately, we discard them.
817  */
819  log_newpage_range(rel, fork, 0, n, false);
821  }
822  }
823  }
824 
825  pendingSyncHash = NULL;
826 
827  if (nrels > 0)
828  {
829  smgrdosyncall(srels, nrels);
830  pfree(srels);
831  }
832 }
#define AssertPendingSyncs_RelationCache()
Definition: relcache.h:135
void smgrdosyncall(SMgrRelation *rels, int nrels)
Definition: smgr.c:429
int wal_skip_threshold
Definition: storage.c:39
void log_newpage_range(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std)
Definition: xloginsert.c:1270

References Assert, AssertPendingSyncs_RelationCache, PendingRelDelete::atCommit, BlockNumberIsValid(), CreateFakeRelcacheEntry(), FreeFakeRelcacheEntry(), GetCurrentTransactionNestLevel(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), INIT_FORKNUM, INVALID_PROC_NUMBER, InvalidBlockNumber, PendingRelSync::is_truncated, RelFileLocatorBackend::locator, log_newpage_range(), MAX_FORKNUM, PendingRelDelete::next, palloc(), pendingDeletes, pendingSyncHash, pfree(), repalloc(), PendingRelDelete::rlocator, PendingRelSync::rlocator, SMgrRelationData::smgr_rlocator, smgrdosyncall(), smgrexists(), smgrnblocks(), smgropen(), and wal_skip_threshold.

Referenced by AbortTransaction(), CommitTransaction(), and PrepareTransaction().

◆ smgrGetPendingDeletes()

int smgrGetPendingDeletes ( bool  forCommit,
RelFileLocator **  ptr 
)

Definition at line 852 of file storage.c.

853 {
854  int nestLevel = GetCurrentTransactionNestLevel();
855  int nrels;
856  RelFileLocator *rptr;
857  PendingRelDelete *pending;
858 
859  nrels = 0;
860  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
861  {
862  if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
863  && pending->procNumber == INVALID_PROC_NUMBER)
864  nrels++;
865  }
866  if (nrels == 0)
867  {
868  *ptr = NULL;
869  return 0;
870  }
871  rptr = (RelFileLocator *) palloc(nrels * sizeof(RelFileLocator));
872  *ptr = rptr;
873  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
874  {
875  if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
876  && pending->procNumber == INVALID_PROC_NUMBER)
877  {
878  *rptr = pending->rlocator;
879  rptr++;
880  }
881  }
882  return nrels;
883 }

References PendingRelDelete::atCommit, GetCurrentTransactionNestLevel(), INVALID_PROC_NUMBER, PendingRelDelete::nestLevel, PendingRelDelete::next, palloc(), pendingDeletes, PendingRelDelete::procNumber, and PendingRelDelete::rlocator.

Referenced by RecordTransactionAbort(), RecordTransactionCommit(), and StartPrepare().

Variable Documentation

◆ pendingDeletes

◆ pendingSyncHash

◆ wal_skip_threshold

int wal_skip_threshold = 2048

Definition at line 39 of file storage.c.

Referenced by smgrDoPendingSyncs().