PostgreSQL Source Code  git master
storage.c File Reference
#include "postgres.h"
#include "access/parallel.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "miscadmin.h"
#include "storage/freespace.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "utils/rel.h"
Include dependency graph for storage.c:

Go to the source code of this file.

Data Structures

struct  PendingRelDelete
 
struct  PendingRelSync
 

Typedefs

typedef struct PendingRelDelete PendingRelDelete
 
typedef struct PendingRelSync PendingRelSync
 

Functions

static void AddPendingSync (const RelFileLocator *rlocator)
 
SMgrRelation RelationCreateStorage (RelFileLocator rlocator, char relpersistence, bool register_delete)
 
void log_smgrcreate (const RelFileLocator *rlocator, ForkNumber forkNum)
 
void RelationDropStorage (Relation rel)
 
void RelationPreserveStorage (RelFileLocator rlocator, bool atCommit)
 
void RelationTruncate (Relation rel, BlockNumber nblocks)
 
void RelationPreTruncate (Relation rel)
 
void RelationCopyStorage (SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
 
bool RelFileLocatorSkippingWAL (RelFileLocator rlocator)
 
Size EstimatePendingSyncsSpace (void)
 
void SerializePendingSyncs (Size maxSize, char *startAddress)
 
void RestorePendingSyncs (char *startAddress)
 
void smgrDoPendingDeletes (bool isCommit)
 
void smgrDoPendingSyncs (bool isCommit, bool isParallelWorker)
 
int smgrGetPendingDeletes (bool forCommit, RelFileLocator **ptr)
 
void PostPrepare_smgr (void)
 
void AtSubCommit_smgr (void)
 
void AtSubAbort_smgr (void)
 
void smgr_redo (XLogReaderState *record)
 

Variables

int wal_skip_threshold = 2048
 
static PendingRelDeletependingDeletes = NULL
 
static HTABpendingSyncHash = NULL
 

Typedef Documentation

◆ PendingRelDelete

◆ PendingRelSync

Function Documentation

◆ AddPendingSync()

static void AddPendingSync ( const RelFileLocator rlocator)
static

Definition at line 84 of file storage.c.

85 {
86  PendingRelSync *pending;
87  bool found;
88 
89  /* create the hash if not yet */
90  if (!pendingSyncHash)
91  {
92  HASHCTL ctl;
93 
94  ctl.keysize = sizeof(RelFileLocator);
95  ctl.entrysize = sizeof(PendingRelSync);
97  pendingSyncHash = hash_create("pending sync hash", 16, &ctl,
99  }
100 
101  pending = hash_search(pendingSyncHash, rlocator, HASH_ENTER, &found);
102  Assert(!found);
103  pending->is_truncated = false;
104 }
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:953
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:350
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
Assert(fmt[strlen(fmt) - 1] !='\n')
MemoryContext TopTransactionContext
Definition: mcxt.c:135
struct RelFileLocator RelFileLocator
static HTAB * pendingSyncHash
Definition: storage.c:76
struct PendingRelSync PendingRelSync
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
MemoryContext hcxt
Definition: hsearch.h:86
bool is_truncated
Definition: storage.c:72

References Assert(), HASHCTL::entrysize, HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), HASHCTL::hcxt, PendingRelSync::is_truncated, HASHCTL::keysize, pendingSyncHash, and TopTransactionContext.

Referenced by RelationCreateStorage(), and RestorePendingSyncs().

◆ AtSubAbort_smgr()

void AtSubAbort_smgr ( void  )

Definition at line 952 of file storage.c.

953 {
954  smgrDoPendingDeletes(false);
955 }
void smgrDoPendingDeletes(bool isCommit)
Definition: storage.c:650

References smgrDoPendingDeletes().

Referenced by AbortSubTransaction().

◆ AtSubCommit_smgr()

void AtSubCommit_smgr ( void  )

Definition at line 932 of file storage.c.

933 {
934  int nestLevel = GetCurrentTransactionNestLevel();
935  PendingRelDelete *pending;
936 
937  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
938  {
939  if (pending->nestLevel >= nestLevel)
940  pending->nestLevel = nestLevel - 1;
941  }
942 }
static PendingRelDelete * pendingDeletes
Definition: storage.c:75
struct PendingRelDelete * next
Definition: storage.c:66
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:913

References GetCurrentTransactionNestLevel(), PendingRelDelete::nestLevel, PendingRelDelete::next, and pendingDeletes.

Referenced by CommitSubTransaction().

◆ EstimatePendingSyncsSpace()

Size EstimatePendingSyncsSpace ( void  )

Definition at line 564 of file storage.c.

565 {
566  long entries;
567 
569  return mul_size(1 + entries, sizeof(RelFileLocator));
570 }
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1377
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519

References hash_get_num_entries(), mul_size(), and pendingSyncHash.

Referenced by InitializeParallelDSM().

◆ log_smgrcreate()

void log_smgrcreate ( const RelFileLocator rlocator,
ForkNumber  forkNum 
)

Definition at line 185 of file storage.c.

186 {
187  xl_smgr_create xlrec;
188 
189  /*
190  * Make an XLOG entry reporting the file creation.
191  */
192  xlrec.rlocator = *rlocator;
193  xlrec.forkNum = forkNum;
194 
195  XLogBeginInsert();
196  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
198 }
#define XLOG_SMGR_CREATE
Definition: storage_xlog.h:30
ForkNumber forkNum
Definition: storage_xlog.h:36
RelFileLocator rlocator
Definition: storage_xlog.h:35
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:351
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:451
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:71

References xl_smgr_create::forkNum, xl_smgr_create::rlocator, XLOG_SMGR_CREATE, XLogBeginInsert(), XLogInsert(), XLogRegisterData(), and XLR_SPECIAL_REL_UPDATE.

Referenced by CreateAndCopyRelationData(), fill_seq_with_data(), heapam_relation_copy_data(), heapam_relation_set_new_filelocator(), index_copy_data(), and RelationCreateStorage().

◆ PostPrepare_smgr()

void PostPrepare_smgr ( void  )

Definition at line 911 of file storage.c.

912 {
913  PendingRelDelete *pending;
915 
916  for (pending = pendingDeletes; pending != NULL; pending = next)
917  {
918  next = pending->next;
920  /* must explicitly free the list entry */
921  pfree(pending);
922  }
923 }
static int32 next
Definition: blutils.c:219
void pfree(void *pointer)
Definition: mcxt.c:1306

References next, PendingRelDelete::next, pendingDeletes, and pfree().

Referenced by PrepareTransaction().

◆ RelationCopyStorage()

void RelationCopyStorage ( SMgrRelation  src,
SMgrRelation  dst,
ForkNumber  forkNum,
char  relpersistence 
)

Definition at line 451 of file storage.c.

453 {
455  Page page;
456  bool use_wal;
457  bool copying_initfork;
458  BlockNumber nblocks;
459  BlockNumber blkno;
460 
461  page = (Page) buf.data;
462 
463  /*
464  * The init fork for an unlogged relation in many respects has to be
465  * treated the same as normal relation, changes need to be WAL logged and
466  * it needs to be synced to disk.
467  */
468  copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
469  forkNum == INIT_FORKNUM;
470 
471  /*
472  * We need to log the copied data in WAL iff WAL archiving/streaming is
473  * enabled AND it's a permanent relation. This gives the same answer as
474  * "RelationNeedsWAL(rel) || copying_initfork", because we know the
475  * current operation created new relation storage.
476  */
477  use_wal = XLogIsNeeded() &&
478  (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
479 
480  nblocks = smgrnblocks(src, forkNum);
481 
482  for (blkno = 0; blkno < nblocks; blkno++)
483  {
484  /* If we got a cancel signal during the copy of the data, quit */
486 
487  smgrread(src, forkNum, blkno, buf.data);
488 
489  if (!PageIsVerifiedExtended(page, blkno,
491  {
492  /*
493  * For paranoia's sake, capture the file path before invoking the
494  * ereport machinery. This guards against the possibility of a
495  * relcache flush caused by, e.g., an errcontext callback.
496  * (errcontext callbacks shouldn't be risking any such thing, but
497  * people have been known to forget that rule.)
498  */
500  src->smgr_rlocator.backend,
501  forkNum);
502 
503  ereport(ERROR,
505  errmsg("invalid page in block %u of relation %s",
506  blkno, relpath)));
507  }
508 
509  /*
510  * WAL-log the copied page. Unfortunately we don't know what kind of a
511  * page this is, so we have to log the full page including any unused
512  * space.
513  */
514  if (use_wal)
515  log_newpage(&dst->smgr_rlocator.locator, forkNum, blkno, page, false);
516 
517  PageSetChecksumInplace(page, blkno);
518 
519  /*
520  * Now write the page. We say skipFsync = true because there's no
521  * need for smgr to schedule an fsync for this write; we'll do it
522  * ourselves below.
523  */
524  smgrextend(dst, forkNum, blkno, buf.data, true);
525  }
526 
527  /*
528  * When we WAL-logged rel pages, we must nonetheless fsync them. The
529  * reason is that since we're copying outside shared buffers, a CHECKPOINT
530  * occurring during the copy has no way to flush the previously written
531  * data to disk (indeed it won't know the new rel even exists). A crash
532  * later on would replay WAL from the checkpoint, therefore it wouldn't
533  * replay our earlier WAL entries. If we do not fsync those pages here,
534  * they might still not be on disk when the crash occurs.
535  */
536  if (use_wal || copying_initfork)
537  smgrimmedsync(dst, forkNum);
538 }
uint32 BlockNumber
Definition: block.h:31
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1539
Pointer Page
Definition: bufpage.h:78
#define PIV_LOG_WARNING
Definition: bufpage.h:465
#define PIV_REPORT_STAT
Definition: bufpage.h:466
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
static char * buf
Definition: pg_test_fsync.c:67
@ INIT_FORKNUM
Definition: relpath.h:53
#define relpath(rlocator, forknum)
Definition: relpath.h:94
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:85
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:579
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:493
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:691
void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
Definition: smgr.c:532
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:42
#define XLogIsNeeded()
Definition: xlog.h:104
XLogRecPtr log_newpage(RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blkno, Page page, bool page_std)
Definition: xloginsert.c:1097

References RelFileLocatorBackend::backend, buf, CHECK_FOR_INTERRUPTS, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg(), ERROR, INIT_FORKNUM, RelFileLocatorBackend::locator, log_newpage(), PageIsVerifiedExtended(), PageSetChecksumInplace(), PIV_LOG_WARNING, PIV_REPORT_STAT, relpath, relpathbackend, SMgrRelationData::smgr_rlocator, smgrextend(), smgrimmedsync(), smgrnblocks(), smgrread(), and XLogIsNeeded.

Referenced by heapam_relation_copy_data(), and index_copy_data().

◆ RelationCreateStorage()

SMgrRelation RelationCreateStorage ( RelFileLocator  rlocator,
char  relpersistence,
bool  register_delete 
)

Definition at line 120 of file storage.c.

122 {
123  SMgrRelation srel;
124  BackendId backend;
125  bool needs_wal;
126 
127  Assert(!IsInParallelMode()); /* couldn't update pendingSyncHash */
128 
129  switch (relpersistence)
130  {
131  case RELPERSISTENCE_TEMP:
132  backend = BackendIdForTempRelations();
133  needs_wal = false;
134  break;
135  case RELPERSISTENCE_UNLOGGED:
136  backend = InvalidBackendId;
137  needs_wal = false;
138  break;
139  case RELPERSISTENCE_PERMANENT:
140  backend = InvalidBackendId;
141  needs_wal = true;
142  break;
143  default:
144  elog(ERROR, "invalid relpersistence: %c", relpersistence);
145  return NULL; /* placate compiler */
146  }
147 
148  srel = smgropen(rlocator, backend);
149  smgrcreate(srel, MAIN_FORKNUM, false);
150 
151  if (needs_wal)
153 
154  /*
155  * Add the relation to the list of stuff to delete at abort, if we are
156  * asked to do so.
157  */
158  if (register_delete)
159  {
160  PendingRelDelete *pending;
161 
162  pending = (PendingRelDelete *)
164  pending->rlocator = rlocator;
165  pending->backend = backend;
166  pending->atCommit = false; /* delete if abort */
168  pending->next = pendingDeletes;
169  pendingDeletes = pending;
170  }
171 
172  if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
173  {
174  Assert(backend == InvalidBackendId);
175  AddPendingSync(&rlocator);
176  }
177 
178  return srel;
179 }
int BackendId
Definition: backendid.h:21
#define BackendIdForTempRelations()
Definition: backendid.h:34
#define InvalidBackendId
Definition: backendid.h:23
MemoryContext TopMemoryContext
Definition: mcxt.c:130
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:994
@ MAIN_FORKNUM
Definition: relpath.h:50
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:369
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:146
static void AddPendingSync(const RelFileLocator *rlocator)
Definition: storage.c:84
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:185
RelFileLocator rlocator
Definition: storage.c:62
BackendId backend
Definition: storage.c:63
bool IsInParallelMode(void)
Definition: xact.c:1068

References AddPendingSync(), Assert(), PendingRelDelete::atCommit, PendingRelDelete::backend, BackendIdForTempRelations, elog(), ERROR, GetCurrentTransactionNestLevel(), InvalidBackendId, IsInParallelMode(), RelFileLocatorBackend::locator, log_smgrcreate(), MAIN_FORKNUM, MemoryContextAlloc(), PendingRelDelete::nestLevel, PendingRelDelete::next, pendingDeletes, PendingRelDelete::rlocator, SMgrRelationData::smgr_rlocator, smgrcreate(), smgropen(), TopMemoryContext, and XLogIsNeeded.

Referenced by CreateAndCopyRelationData(), heap_create(), heapam_relation_copy_data(), heapam_relation_set_new_filelocator(), index_copy_data(), and RelationSetNewRelfilenumber().

◆ RelationDropStorage()

void RelationDropStorage ( Relation  rel)

Definition at line 205 of file storage.c.

206 {
207  PendingRelDelete *pending;
208 
209  /* Add the relation to the list of stuff to delete at commit */
210  pending = (PendingRelDelete *)
212  pending->rlocator = rel->rd_locator;
213  pending->backend = rel->rd_backend;
214  pending->atCommit = true; /* delete if commit */
216  pending->next = pendingDeletes;
217  pendingDeletes = pending;
218 
219  /*
220  * NOTE: if the relation was created in this transaction, it will now be
221  * present in the pending-delete list twice, once with atCommit true and
222  * once with atCommit false. Hence, it will be physically deleted at end
223  * of xact in either case (and the other entry will be ignored by
224  * smgrDoPendingDeletes, so no error will occur). We could instead remove
225  * the existing list entry and delete the physical file immediately, but
226  * for now I'll keep the logic simple.
227  */
228 
229  RelationCloseSmgr(rel);
230 }
static void RelationCloseSmgr(Relation relation)
Definition: rel.h:581
BackendId rd_backend
Definition: rel.h:59
RelFileLocator rd_locator
Definition: rel.h:56

References PendingRelDelete::atCommit, PendingRelDelete::backend, GetCurrentTransactionNestLevel(), MemoryContextAlloc(), PendingRelDelete::nestLevel, PendingRelDelete::next, pendingDeletes, RelationData::rd_backend, RelationData::rd_locator, RelationCloseSmgr(), PendingRelDelete::rlocator, and TopMemoryContext.

Referenced by DefineQueryRewrite(), heap_drop_with_catalog(), heapam_relation_copy_data(), index_copy_data(), index_drop(), reindex_index(), and RelationSetNewRelfilenumber().

◆ RelationPreserveStorage()

void RelationPreserveStorage ( RelFileLocator  rlocator,
bool  atCommit 
)

Definition at line 250 of file storage.c.

251 {
252  PendingRelDelete *pending;
253  PendingRelDelete *prev;
255 
256  prev = NULL;
257  for (pending = pendingDeletes; pending != NULL; pending = next)
258  {
259  next = pending->next;
260  if (RelFileLocatorEquals(rlocator, pending->rlocator)
261  && pending->atCommit == atCommit)
262  {
263  /* unlink and delete list entry */
264  if (prev)
265  prev->next = next;
266  else
268  pfree(pending);
269  /* prev does not change */
270  }
271  else
272  {
273  /* unrelated entry, don't touch it */
274  prev = pending;
275  }
276  }
277 }
#define RelFileLocatorEquals(locator1, locator2)

References PendingRelDelete::atCommit, next, PendingRelDelete::next, pendingDeletes, pfree(), RelFileLocatorEquals, and PendingRelDelete::rlocator.

Referenced by ATExecAddIndex(), and write_relmap_file().

◆ RelationPreTruncate()

void RelationPreTruncate ( Relation  rel)

Definition at line 423 of file storage.c.

424 {
425  PendingRelSync *pending;
426 
427  if (!pendingSyncHash)
428  return;
429 
430  pending = hash_search(pendingSyncHash,
431  &(RelationGetSmgr(rel)->smgr_rlocator.locator),
432  HASH_FIND, NULL);
433  if (pending)
434  pending->is_truncated = true;
435 }
@ HASH_FIND
Definition: hsearch.h:113
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:569

References HASH_FIND, hash_search(), PendingRelSync::is_truncated, pendingSyncHash, and RelationGetSmgr().

Referenced by RelationTruncate().

◆ RelationTruncate()

void RelationTruncate ( Relation  rel,
BlockNumber  nblocks 
)

Definition at line 287 of file storage.c.

288 {
289  bool fsm;
290  bool vm;
291  bool need_fsm_vacuum = false;
292  ForkNumber forks[MAX_FORKNUM];
293  BlockNumber blocks[MAX_FORKNUM];
294  int nforks = 0;
295  SMgrRelation reln;
296 
297  /*
298  * Make sure smgr_targblock etc aren't pointing somewhere past new end.
299  * (Note: don't rely on this reln pointer below this loop.)
300  */
301  reln = RelationGetSmgr(rel);
303  for (int i = 0; i <= MAX_FORKNUM; ++i)
305 
306  /* Prepare for truncation of MAIN fork of the relation */
307  forks[nforks] = MAIN_FORKNUM;
308  blocks[nforks] = nblocks;
309  nforks++;
310 
311  /* Prepare for truncation of the FSM if it exists */
313  if (fsm)
314  {
315  blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
316  if (BlockNumberIsValid(blocks[nforks]))
317  {
318  forks[nforks] = FSM_FORKNUM;
319  nforks++;
320  need_fsm_vacuum = true;
321  }
322  }
323 
324  /* Prepare for truncation of the visibility map too if it exists */
326  if (vm)
327  {
328  blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
329  if (BlockNumberIsValid(blocks[nforks]))
330  {
331  forks[nforks] = VISIBILITYMAP_FORKNUM;
332  nforks++;
333  }
334  }
335 
336  RelationPreTruncate(rel);
337 
338  /*
339  * Make sure that a concurrent checkpoint can't complete while truncation
340  * is in progress.
341  *
342  * The truncation operation might drop buffers that the checkpoint
343  * otherwise would have flushed. If it does, then it's essential that the
344  * files actually get truncated on disk before the checkpoint record is
345  * written. Otherwise, if reply begins from that checkpoint, the
346  * to-be-truncated blocks might still exist on disk but have older
347  * contents than expected, which can cause replay to fail. It's OK for the
348  * blocks to not exist on disk at all, but not for them to have the wrong
349  * contents.
350  */
353 
354  /*
355  * We WAL-log the truncation before actually truncating, which means
356  * trouble if the truncation fails. If we then crash, the WAL replay
357  * likely isn't going to succeed in the truncation either, and cause a
358  * PANIC. It's tempting to put a critical section here, but that cure
359  * would be worse than the disease. It would turn a usually harmless
360  * failure to truncate, that might spell trouble at WAL replay, into a
361  * certain PANIC.
362  */
363  if (RelationNeedsWAL(rel))
364  {
365  /*
366  * Make an XLOG entry reporting the file truncation.
367  */
368  XLogRecPtr lsn;
369  xl_smgr_truncate xlrec;
370 
371  xlrec.blkno = nblocks;
372  xlrec.rlocator = rel->rd_locator;
373  xlrec.flags = SMGR_TRUNCATE_ALL;
374 
375  XLogBeginInsert();
376  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
377 
378  lsn = XLogInsert(RM_SMGR_ID,
380 
381  /*
382  * Flush, because otherwise the truncation of the main relation might
383  * hit the disk before the WAL record, and the truncation of the FSM
384  * or visibility map. If we crashed during that window, we'd be left
385  * with a truncated heap, but the FSM or visibility map would still
386  * contain entries for the non-existent heap pages.
387  */
388  if (fsm || vm)
389  XLogFlush(lsn);
390  }
391 
392  /*
393  * This will first remove any buffers from the buffer pool that should no
394  * longer exist after truncation is complete, and then truncate the
395  * corresponding files on disk.
396  */
397  smgrtruncate(RelationGetSmgr(rel), forks, nforks, blocks);
398 
399  /* We've done all the critical work, so checkpoints are OK now. */
401 
402  /*
403  * Update upper-level FSM pages to account for the truncation. This is
404  * important because the just-truncated pages were likely marked as
405  * all-free, and would be preferentially selected.
406  *
407  * NB: There's no point in delaying checkpoints until this is done.
408  * Because the FSM is not WAL-logged, we have to be prepared for the
409  * possibility of corruption after a crash anyway.
410  */
411  if (need_fsm_vacuum)
413 }
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
BlockNumber FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
Definition: freespace.c:263
int i
Definition: isn.c:73
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:120
#define RelationNeedsWAL(relation)
Definition: rel.h:626
ForkNumber
Definition: relpath.h:48
@ FSM_FORKNUM
Definition: relpath.h:51
@ VISIBILITYMAP_FORKNUM
Definition: relpath.h:52
#define MAX_FORKNUM
Definition: relpath.h:62
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
Definition: smgr.c:626
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
PGPROC * MyProc
Definition: proc.c:68
void RelationPreTruncate(Relation rel)
Definition: storage.c:423
#define SMGR_TRUNCATE_ALL
Definition: storage_xlog.h:43
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
int delayChkptFlags
Definition: proc.h:231
BlockNumber smgr_targblock
Definition: smgr.h:53
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:54
RelFileLocator rlocator
Definition: storage_xlog.h:49
BlockNumber blkno
Definition: storage_xlog.h:48
BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2512
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), xl_smgr_truncate::blkno, BlockNumberIsValid(), DELAY_CHKPT_COMPLETE, PGPROC::delayChkptFlags, xl_smgr_truncate::flags, FreeSpaceMapPrepareTruncateRel(), FreeSpaceMapVacuumRange(), FSM_FORKNUM, i, InvalidBlockNumber, MAIN_FORKNUM, MAX_FORKNUM, MyProc, RelationData::rd_locator, RelationGetSmgr(), RelationNeedsWAL, RelationPreTruncate(), xl_smgr_truncate::rlocator, SMgrRelationData::smgr_cached_nblocks, SMgrRelationData::smgr_targblock, SMGR_TRUNCATE_ALL, smgrexists(), smgrtruncate(), VISIBILITYMAP_FORKNUM, visibilitymap_prepare_truncate(), XLOG_SMGR_TRUNCATE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), and XLR_SPECIAL_REL_UPDATE.

Referenced by heapam_relation_nontransactional_truncate(), lazy_truncate_heap(), RelationTruncateIndexes(), and spgvacuumscan().

◆ RelFileLocatorSkippingWAL()

bool RelFileLocatorSkippingWAL ( RelFileLocator  rlocator)

Definition at line 550 of file storage.c.

551 {
552  if (!pendingSyncHash ||
553  hash_search(pendingSyncHash, &rlocator, HASH_FIND, NULL) == NULL)
554  return false;
555 
556  return true;
557 }

References HASH_FIND, hash_search(), and pendingSyncHash.

Referenced by MarkBufferDirtyHint(), and RelationInitPhysicalAddr().

◆ RestorePendingSyncs()

void RestorePendingSyncs ( char *  startAddress)

Definition at line 628 of file storage.c.

629 {
630  RelFileLocator *rlocator;
631 
632  Assert(pendingSyncHash == NULL);
633  for (rlocator = (RelFileLocator *) startAddress; rlocator->relNumber != 0;
634  rlocator++)
635  AddPendingSync(rlocator);
636 }
RelFileNumber relNumber

References AddPendingSync(), Assert(), pendingSyncHash, and RelFileLocator::relNumber.

Referenced by ParallelWorkerMain().

◆ SerializePendingSyncs()

void SerializePendingSyncs ( Size  maxSize,
char *  startAddress 
)

Definition at line 577 of file storage.c.

578 {
579  HTAB *tmphash;
580  HASHCTL ctl;
581  HASH_SEQ_STATUS scan;
582  PendingRelSync *sync;
583  PendingRelDelete *delete;
584  RelFileLocator *src;
585  RelFileLocator *dest = (RelFileLocator *) startAddress;
586 
587  if (!pendingSyncHash)
588  goto terminate;
589 
590  /* Create temporary hash to collect active relfilelocators */
591  ctl.keysize = sizeof(RelFileLocator);
592  ctl.entrysize = sizeof(RelFileLocator);
594  tmphash = hash_create("tmp relfilelocators",
597 
598  /* collect all rlocator from pending syncs */
600  while ((sync = (PendingRelSync *) hash_seq_search(&scan)))
601  (void) hash_search(tmphash, &sync->rlocator, HASH_ENTER, NULL);
602 
603  /* remove deleted rnodes */
604  for (delete = pendingDeletes; delete != NULL; delete = delete->next)
605  if (delete->atCommit)
606  (void) hash_search(tmphash, (void *) &delete->rlocator,
607  HASH_REMOVE, NULL);
608 
609  hash_seq_init(&scan, tmphash);
610  while ((src = (RelFileLocator *) hash_seq_search(&scan)))
611  *dest++ = *src;
612 
613  hash_destroy(tmphash);
614 
615 terminate:
616  MemSet(dest, 0, sizeof(RelFileLocator));
617 }
#define MemSet(start, val, len)
Definition: c.h:953
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:863
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1431
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1421
@ HASH_REMOVE
Definition: hsearch.h:115
MemoryContext CurrentMemoryContext
Definition: mcxt.c:124
Definition: dynahash.c:220
RelFileLocator rlocator
Definition: storage.c:71

References CurrentMemoryContext, generate_unaccent_rules::dest, HASHCTL::entrysize, HASH_BLOBS, HASH_CONTEXT, hash_create(), hash_destroy(), HASH_ELEM, HASH_ENTER, hash_get_num_entries(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), HASHCTL::hcxt, HASHCTL::keysize, MemSet, PendingRelDelete::next, pendingDeletes, pendingSyncHash, and PendingRelSync::rlocator.

Referenced by InitializeParallelDSM().

◆ smgr_redo()

void smgr_redo ( XLogReaderState record)

Definition at line 958 of file storage.c.

959 {
960  XLogRecPtr lsn = record->EndRecPtr;
961  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
962 
963  /* Backup blocks are not used in smgr records */
964  Assert(!XLogRecHasAnyBlockRefs(record));
965 
966  if (info == XLOG_SMGR_CREATE)
967  {
968  xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
969  SMgrRelation reln;
970 
971  reln = smgropen(xlrec->rlocator, InvalidBackendId);
972  smgrcreate(reln, xlrec->forkNum, true);
973  }
974  else if (info == XLOG_SMGR_TRUNCATE)
975  {
976  xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
977  SMgrRelation reln;
978  Relation rel;
979  ForkNumber forks[MAX_FORKNUM];
980  BlockNumber blocks[MAX_FORKNUM];
981  int nforks = 0;
982  bool need_fsm_vacuum = false;
983 
984  reln = smgropen(xlrec->rlocator, InvalidBackendId);
985 
986  /*
987  * Forcibly create relation if it doesn't exist (which suggests that
988  * it was dropped somewhere later in the WAL sequence). As in
989  * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
990  * log as best we can until the drop is seen.
991  */
992  smgrcreate(reln, MAIN_FORKNUM, true);
993 
994  /*
995  * Before we perform the truncation, update minimum recovery point to
996  * cover this WAL record. Once the relation is truncated, there's no
997  * going back. The buffer manager enforces the WAL-first rule for
998  * normal updates to relation files, so that the minimum recovery
999  * point is always updated before the corresponding change in the data
1000  * file is flushed to disk. We have to do the same manually here.
1001  *
1002  * Doing this before the truncation means that if the truncation fails
1003  * for some reason, you cannot start up the system even after restart,
1004  * until you fix the underlying situation so that the truncation will
1005  * succeed. Alternatively, we could update the minimum recovery point
1006  * after truncation, but that would leave a small window where the
1007  * WAL-first rule could be violated.
1008  */
1009  XLogFlush(lsn);
1010 
1011  /* Prepare for truncation of MAIN fork */
1012  if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
1013  {
1014  forks[nforks] = MAIN_FORKNUM;
1015  blocks[nforks] = xlrec->blkno;
1016  nforks++;
1017 
1018  /* Also tell xlogutils.c about it */
1020  }
1021 
1022  /* Prepare for truncation of FSM and VM too */
1023  rel = CreateFakeRelcacheEntry(xlrec->rlocator);
1024 
1025  if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
1026  smgrexists(reln, FSM_FORKNUM))
1027  {
1028  blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
1029  if (BlockNumberIsValid(blocks[nforks]))
1030  {
1031  forks[nforks] = FSM_FORKNUM;
1032  nforks++;
1033  need_fsm_vacuum = true;
1034  }
1035  }
1036  if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
1038  {
1039  blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
1040  if (BlockNumberIsValid(blocks[nforks]))
1041  {
1042  forks[nforks] = VISIBILITYMAP_FORKNUM;
1043  nforks++;
1044  }
1045  }
1046 
1047  /* Do the real work to truncate relation forks */
1048  if (nforks > 0)
1049  smgrtruncate(reln, forks, nforks, blocks);
1050 
1051  /*
1052  * Update upper-level FSM pages to account for the truncation. This is
1053  * important because the just-truncated pages were likely marked as
1054  * all-free, and would be preferentially selected.
1055  */
1056  if (need_fsm_vacuum)
1057  FreeSpaceMapVacuumRange(rel, xlrec->blkno,
1059 
1060  FreeFakeRelcacheEntry(rel);
1061  }
1062  else
1063  elog(PANIC, "smgr_redo: unknown op code %u", info);
1064 }
unsigned char uint8
Definition: c.h:440
#define PANIC
Definition: elog.h:38
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define SMGR_TRUNCATE_HEAP
Definition: storage_xlog.h:40
#define SMGR_TRUNCATE_FSM
Definition: storage_xlog.h:42
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:411
#define XLogRecGetData(decoder)
Definition: xlogreader.h:416
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:418
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void FreeFakeRelcacheEntry(Relation fakerel)
Definition: xlogutils.c:643
void XLogTruncateRelation(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nblocks)
Definition: xlogutils.c:688
Relation CreateFakeRelcacheEntry(RelFileLocator rlocator)
Definition: xlogutils.c:600

References Assert(), xl_smgr_truncate::blkno, BlockNumberIsValid(), CreateFakeRelcacheEntry(), elog(), XLogReaderState::EndRecPtr, xl_smgr_truncate::flags, xl_smgr_create::forkNum, FreeFakeRelcacheEntry(), FreeSpaceMapPrepareTruncateRel(), FreeSpaceMapVacuumRange(), FSM_FORKNUM, InvalidBackendId, InvalidBlockNumber, MAIN_FORKNUM, MAX_FORKNUM, PANIC, xl_smgr_create::rlocator, xl_smgr_truncate::rlocator, SMGR_TRUNCATE_FSM, SMGR_TRUNCATE_HEAP, SMGR_TRUNCATE_VM, smgrcreate(), smgrexists(), smgropen(), smgrtruncate(), VISIBILITYMAP_FORKNUM, visibilitymap_prepare_truncate(), XLOG_SMGR_CREATE, XLOG_SMGR_TRUNCATE, XLogFlush(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogTruncateRelation(), and XLR_INFO_MASK.

◆ smgrDoPendingDeletes()

void smgrDoPendingDeletes ( bool  isCommit)

Definition at line 650 of file storage.c.

651 {
652  int nestLevel = GetCurrentTransactionNestLevel();
653  PendingRelDelete *pending;
654  PendingRelDelete *prev;
656  int nrels = 0,
657  maxrels = 0;
658  SMgrRelation *srels = NULL;
659 
660  prev = NULL;
661  for (pending = pendingDeletes; pending != NULL; pending = next)
662  {
663  next = pending->next;
664  if (pending->nestLevel < nestLevel)
665  {
666  /* outer-level entries should not be processed yet */
667  prev = pending;
668  }
669  else
670  {
671  /* unlink list entry first, so we don't retry on failure */
672  if (prev)
673  prev->next = next;
674  else
676  /* do deletion if called for */
677  if (pending->atCommit == isCommit)
678  {
679  SMgrRelation srel;
680 
681  srel = smgropen(pending->rlocator, pending->backend);
682 
683  /* allocate the initial array, or extend it, if needed */
684  if (maxrels == 0)
685  {
686  maxrels = 8;
687  srels = palloc(sizeof(SMgrRelation) * maxrels);
688  }
689  else if (maxrels <= nrels)
690  {
691  maxrels *= 2;
692  srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
693  }
694 
695  srels[nrels++] = srel;
696  }
697  /* must explicitly free the list entry */
698  pfree(pending);
699  /* prev does not change */
700  }
701  }
702 
703  if (nrels > 0)
704  {
705  smgrdounlinkall(srels, nrels, false);
706 
707  for (int i = 0; i < nrels; i++)
708  smgrclose(srels[i]);
709 
710  pfree(srels);
711  }
712 }
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1321
void * palloc(Size size)
Definition: mcxt.c:1199
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:420

References PendingRelDelete::atCommit, PendingRelDelete::backend, GetCurrentTransactionNestLevel(), i, PendingRelDelete::nestLevel, next, PendingRelDelete::next, palloc(), pendingDeletes, pfree(), repalloc(), PendingRelDelete::rlocator, smgrclose(), smgrdounlinkall(), and smgropen().

Referenced by AbortTransaction(), AtSubAbort_smgr(), and CommitTransaction().

◆ smgrDoPendingSyncs()

void smgrDoPendingSyncs ( bool  isCommit,
bool  isParallelWorker 
)

Definition at line 718 of file storage.c.

719 {
720  PendingRelDelete *pending;
721  int nrels = 0,
722  maxrels = 0;
723  SMgrRelation *srels = NULL;
724  HASH_SEQ_STATUS scan;
725  PendingRelSync *pendingsync;
726 
728 
729  if (!pendingSyncHash)
730  return; /* no relation needs sync */
731 
732  /* Abort -- just throw away all pending syncs */
733  if (!isCommit)
734  {
735  pendingSyncHash = NULL;
736  return;
737  }
738 
740 
741  /* Parallel worker -- just throw away all pending syncs */
742  if (isParallelWorker)
743  {
744  pendingSyncHash = NULL;
745  return;
746  }
747 
748  /* Skip syncing nodes that smgrDoPendingDeletes() will delete. */
749  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
750  if (pending->atCommit)
751  (void) hash_search(pendingSyncHash, (void *) &pending->rlocator,
752  HASH_REMOVE, NULL);
753 
755  while ((pendingsync = (PendingRelSync *) hash_seq_search(&scan)))
756  {
757  ForkNumber fork;
758  BlockNumber nblocks[MAX_FORKNUM + 1];
759  BlockNumber total_blocks = 0;
760  SMgrRelation srel;
761 
762  srel = smgropen(pendingsync->rlocator, InvalidBackendId);
763 
764  /*
765  * We emit newpage WAL records for smaller relations.
766  *
767  * Small WAL records have a chance to be emitted along with other
768  * backends' WAL records. We emit WAL records instead of syncing for
769  * files that are smaller than a certain threshold, expecting faster
770  * commit. The threshold is defined by the GUC wal_skip_threshold.
771  */
772  if (!pendingsync->is_truncated)
773  {
774  for (fork = 0; fork <= MAX_FORKNUM; fork++)
775  {
776  if (smgrexists(srel, fork))
777  {
778  BlockNumber n = smgrnblocks(srel, fork);
779 
780  /* we shouldn't come here for unlogged relations */
781  Assert(fork != INIT_FORKNUM);
782  nblocks[fork] = n;
783  total_blocks += n;
784  }
785  else
786  nblocks[fork] = InvalidBlockNumber;
787  }
788  }
789 
790  /*
791  * Sync file or emit WAL records for its contents.
792  *
793  * Although we emit WAL record if the file is small enough, do file
794  * sync regardless of the size if the file has experienced a
795  * truncation. It is because the file would be followed by trailing
796  * garbage blocks after a crash recovery if, while a past longer file
797  * had been flushed out, we omitted syncing-out of the file and
798  * emitted WAL instead. You might think that we could choose WAL if
799  * the current main fork is longer than ever, but there's a case where
800  * main fork is longer than ever but FSM fork gets shorter.
801  */
802  if (pendingsync->is_truncated ||
803  total_blocks * BLCKSZ / 1024 >= wal_skip_threshold)
804  {
805  /* allocate the initial array, or extend it, if needed */
806  if (maxrels == 0)
807  {
808  maxrels = 8;
809  srels = palloc(sizeof(SMgrRelation) * maxrels);
810  }
811  else if (maxrels <= nrels)
812  {
813  maxrels *= 2;
814  srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
815  }
816 
817  srels[nrels++] = srel;
818  }
819  else
820  {
821  /* Emit WAL records for all blocks. The file is small enough. */
822  for (fork = 0; fork <= MAX_FORKNUM; fork++)
823  {
824  int n = nblocks[fork];
825  Relation rel;
826 
827  if (!BlockNumberIsValid(n))
828  continue;
829 
830  /*
831  * Emit WAL for the whole file. Unfortunately we don't know
832  * what kind of a page this is, so we have to log the full
833  * page including any unused space. ReadBufferExtended()
834  * counts some pgstat events; unfortunately, we discard them.
835  */
837  log_newpage_range(rel, fork, 0, n, false);
839  }
840  }
841  }
842 
843  pendingSyncHash = NULL;
844 
845  if (nrels > 0)
846  {
847  smgrdosyncall(srels, nrels);
848  pfree(srels);
849  }
850 }
#define AssertPendingSyncs_RelationCache()
Definition: relcache.h:134
void smgrdosyncall(SMgrRelation *rels, int nrels)
Definition: smgr.c:384
int wal_skip_threshold
Definition: storage.c:38
void log_newpage_range(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std)
Definition: xloginsert.c:1224

References Assert(), AssertPendingSyncs_RelationCache, PendingRelDelete::atCommit, BlockNumberIsValid(), CreateFakeRelcacheEntry(), FreeFakeRelcacheEntry(), GetCurrentTransactionNestLevel(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), INIT_FORKNUM, InvalidBackendId, InvalidBlockNumber, PendingRelSync::is_truncated, RelFileLocatorBackend::locator, log_newpage_range(), MAX_FORKNUM, PendingRelDelete::next, palloc(), pendingDeletes, pendingSyncHash, pfree(), repalloc(), PendingRelDelete::rlocator, PendingRelSync::rlocator, SMgrRelationData::smgr_rlocator, smgrdosyncall(), smgrexists(), smgrnblocks(), smgropen(), and wal_skip_threshold.

Referenced by AbortTransaction(), CommitTransaction(), and PrepareTransaction().

◆ smgrGetPendingDeletes()

int smgrGetPendingDeletes ( bool  forCommit,
RelFileLocator **  ptr 
)

Definition at line 870 of file storage.c.

871 {
872  int nestLevel = GetCurrentTransactionNestLevel();
873  int nrels;
874  RelFileLocator *rptr;
875  PendingRelDelete *pending;
876 
877  nrels = 0;
878  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
879  {
880  if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
881  && pending->backend == InvalidBackendId)
882  nrels++;
883  }
884  if (nrels == 0)
885  {
886  *ptr = NULL;
887  return 0;
888  }
889  rptr = (RelFileLocator *) palloc(nrels * sizeof(RelFileLocator));
890  *ptr = rptr;
891  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
892  {
893  if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
894  && pending->backend == InvalidBackendId)
895  {
896  *rptr = pending->rlocator;
897  rptr++;
898  }
899  }
900  return nrels;
901 }

References PendingRelDelete::atCommit, PendingRelDelete::backend, GetCurrentTransactionNestLevel(), InvalidBackendId, PendingRelDelete::nestLevel, PendingRelDelete::next, palloc(), pendingDeletes, and PendingRelDelete::rlocator.

Referenced by RecordTransactionAbort(), RecordTransactionCommit(), and StartPrepare().

Variable Documentation

◆ pendingDeletes

◆ pendingSyncHash

◆ wal_skip_threshold

int wal_skip_threshold = 2048

Definition at line 38 of file storage.c.

Referenced by smgrDoPendingSyncs().