PostgreSQL Source Code git master
storage.c File Reference
#include "postgres.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "miscadmin.h"
#include "storage/bulk_write.h"
#include "storage/freespace.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "utils/rel.h"
Include dependency graph for storage.c:

Go to the source code of this file.

Data Structures

struct  PendingRelDelete
 
struct  PendingRelSync
 

Typedefs

typedef struct PendingRelDelete PendingRelDelete
 
typedef struct PendingRelSync PendingRelSync
 

Functions

static void AddPendingSync (const RelFileLocator *rlocator)
 
SMgrRelation RelationCreateStorage (RelFileLocator rlocator, char relpersistence, bool register_delete)
 
void log_smgrcreate (const RelFileLocator *rlocator, ForkNumber forkNum)
 
void RelationDropStorage (Relation rel)
 
void RelationPreserveStorage (RelFileLocator rlocator, bool atCommit)
 
void RelationTruncate (Relation rel, BlockNumber nblocks)
 
void RelationPreTruncate (Relation rel)
 
void RelationCopyStorage (SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
 
bool RelFileLocatorSkippingWAL (RelFileLocator rlocator)
 
Size EstimatePendingSyncsSpace (void)
 
void SerializePendingSyncs (Size maxSize, char *startAddress)
 
void RestorePendingSyncs (char *startAddress)
 
void smgrDoPendingDeletes (bool isCommit)
 
void smgrDoPendingSyncs (bool isCommit, bool isParallelWorker)
 
int smgrGetPendingDeletes (bool forCommit, RelFileLocator **ptr)
 
void PostPrepare_smgr (void)
 
void AtSubCommit_smgr (void)
 
void AtSubAbort_smgr (void)
 
void smgr_redo (XLogReaderState *record)
 

Variables

int wal_skip_threshold = 2048
 
static PendingRelDeletependingDeletes = NULL
 
static HTABpendingSyncHash = NULL
 

Typedef Documentation

◆ PendingRelDelete

◆ PendingRelSync

Function Documentation

◆ AddPendingSync()

static void AddPendingSync ( const RelFileLocator rlocator)
static

Definition at line 85 of file storage.c.

86{
87 PendingRelSync *pending;
88 bool found;
89
90 /* create the hash if not yet */
91 if (!pendingSyncHash)
92 {
94
95 ctl.keysize = sizeof(RelFileLocator);
96 ctl.entrysize = sizeof(PendingRelSync);
98 pendingSyncHash = hash_create("pending sync hash", 16, &ctl,
100 }
101
102 pending = hash_search(pendingSyncHash, rlocator, HASH_ENTER, &found);
103 Assert(!found);
104 pending->is_truncated = false;
105}
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
Assert(PointerIsAligned(start, uint64))
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
MemoryContext TopTransactionContext
Definition: mcxt.c:154
tree ctl
Definition: radixtree.h:1838
struct RelFileLocator RelFileLocator
static HTAB * pendingSyncHash
Definition: storage.c:77
struct PendingRelSync PendingRelSync
bool is_truncated
Definition: storage.c:73

References Assert(), ctl, HASH_BLOBS, HASH_CONTEXT, hash_create(), HASH_ELEM, HASH_ENTER, hash_search(), PendingRelSync::is_truncated, pendingSyncHash, and TopTransactionContext.

Referenced by RelationCreateStorage(), and RestorePendingSyncs().

◆ AtSubAbort_smgr()

void AtSubAbort_smgr ( void  )

Definition at line 959 of file storage.c.

960{
962}
void smgrDoPendingDeletes(bool isCommit)
Definition: storage.c:657

References smgrDoPendingDeletes().

Referenced by AbortSubTransaction().

◆ AtSubCommit_smgr()

void AtSubCommit_smgr ( void  )

Definition at line 939 of file storage.c.

940{
941 int nestLevel = GetCurrentTransactionNestLevel();
942 PendingRelDelete *pending;
943
944 for (pending = pendingDeletes; pending != NULL; pending = pending->next)
945 {
946 if (pending->nestLevel >= nestLevel)
947 pending->nestLevel = nestLevel - 1;
948 }
949}
static PendingRelDelete * pendingDeletes
Definition: storage.c:76
struct PendingRelDelete * next
Definition: storage.c:67
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:929

References GetCurrentTransactionNestLevel(), PendingRelDelete::nestLevel, PendingRelDelete::next, and pendingDeletes.

Referenced by CommitSubTransaction().

◆ EstimatePendingSyncsSpace()

Size EstimatePendingSyncsSpace ( void  )

Definition at line 571 of file storage.c.

572{
573 long entries;
574
576 return mul_size(1 + entries, sizeof(RelFileLocator));
577}
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
Size mul_size(Size s1, Size s2)
Definition: shmem.c:505

References hash_get_num_entries(), mul_size(), and pendingSyncHash.

Referenced by InitializeParallelDSM().

◆ log_smgrcreate()

void log_smgrcreate ( const RelFileLocator rlocator,
ForkNumber  forkNum 
)

Definition at line 186 of file storage.c.

187{
188 xl_smgr_create xlrec;
189
190 /*
191 * Make an XLOG entry reporting the file creation.
192 */
193 xlrec.rlocator = *rlocator;
194 xlrec.forkNum = forkNum;
195
197 XLogRegisterData(&xlrec, sizeof(xlrec));
199}
#define XLOG_SMGR_CREATE
Definition: storage_xlog.h:30
ForkNumber forkNum
Definition: storage_xlog.h:36
RelFileLocator rlocator
Definition: storage_xlog.h:35
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:82

References xl_smgr_create::forkNum, xl_smgr_create::rlocator, XLOG_SMGR_CREATE, XLogBeginInsert(), XLogInsert(), XLogRegisterData(), and XLR_SPECIAL_REL_UPDATE.

Referenced by CreateAndCopyRelationData(), fill_seq_with_data(), heapam_relation_copy_data(), heapam_relation_set_new_filelocator(), index_build(), index_copy_data(), and RelationCreateStorage().

◆ PostPrepare_smgr()

void PostPrepare_smgr ( void  )

Definition at line 918 of file storage.c.

919{
920 PendingRelDelete *pending;
922
923 for (pending = pendingDeletes; pending != NULL; pending = next)
924 {
925 next = pending->next;
927 /* must explicitly free the list entry */
928 pfree(pending);
929 }
930}
static int32 next
Definition: blutils.c:224
void pfree(void *pointer)
Definition: mcxt.c:1524

References next, PendingRelDelete::next, pendingDeletes, and pfree().

Referenced by PrepareTransaction().

◆ RelationCopyStorage()

void RelationCopyStorage ( SMgrRelation  src,
SMgrRelation  dst,
ForkNumber  forkNum,
char  relpersistence 
)

Definition at line 477 of file storage.c.

479{
480 bool use_wal;
481 bool copying_initfork;
482 BlockNumber nblocks;
483 BlockNumber blkno;
484 BulkWriteState *bulkstate;
485
486 /*
487 * The init fork for an unlogged relation in many respects has to be
488 * treated the same as normal relation, changes need to be WAL logged and
489 * it needs to be synced to disk.
490 */
491 copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
492 forkNum == INIT_FORKNUM;
493
494 /*
495 * We need to log the copied data in WAL iff WAL archiving/streaming is
496 * enabled AND it's a permanent relation. This gives the same answer as
497 * "RelationNeedsWAL(rel) || copying_initfork", because we know the
498 * current operation created new relation storage.
499 */
500 use_wal = XLogIsNeeded() &&
501 (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
502
503 bulkstate = smgr_bulk_start_smgr(dst, forkNum, use_wal);
504
505 nblocks = smgrnblocks(src, forkNum);
506
507 for (blkno = 0; blkno < nblocks; blkno++)
508 {
510
511 /* If we got a cancel signal during the copy of the data, quit */
513
514 buf = smgr_bulk_get_buf(bulkstate);
515 smgrread(src, forkNum, blkno, (Page) buf);
516
517 if (!PageIsVerifiedExtended((Page) buf, blkno,
519 {
520 /*
521 * For paranoia's sake, capture the file path before invoking the
522 * ereport machinery. This guards against the possibility of a
523 * relcache flush caused by, e.g., an errcontext callback.
524 * (errcontext callbacks shouldn't be risking any such thing, but
525 * people have been known to forget that rule.)
526 */
529 forkNum);
530
533 errmsg("invalid page in block %u of relation %s",
534 blkno, relpath.str)));
535 }
536
537 /*
538 * Queue the page for WAL-logging and writing out. Unfortunately we
539 * don't know what kind of a page this is, so we have to log the full
540 * page including any unused space.
541 */
542 smgr_bulk_write(bulkstate, blkno, buf, false);
543 }
544 smgr_bulk_finish(bulkstate);
545}
uint32 BlockNumber
Definition: block.h:31
bool PageIsVerifiedExtended(PageData *page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
#define PIV_LOG_WARNING
Definition: bufpage.h:468
PageData * Page
Definition: bufpage.h:82
#define PIV_REPORT_STAT
Definition: bufpage.h:469
void smgr_bulk_write(BulkWriteState *bulkstate, BlockNumber blocknum, BulkWriteBuffer buf, bool page_std)
Definition: bulk_write.c:323
BulkWriteBuffer smgr_bulk_get_buf(BulkWriteState *bulkstate)
Definition: bulk_write.c:347
BulkWriteState * smgr_bulk_start_smgr(SMgrRelation smgr, ForkNumber forknum, bool use_wal)
Definition: bulk_write.c:100
void smgr_bulk_finish(BulkWriteState *bulkstate)
Definition: bulk_write.c:130
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
static char * buf
Definition: pg_test_fsync.c:72
@ INIT_FORKNUM
Definition: relpath.h:61
#define relpath(rlocator, forknum)
Definition: relpath.h:150
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:141
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:767
static void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer)
Definition: smgr.h:117
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37
#define XLogIsNeeded()
Definition: xlog.h:109

References RelFileLocatorBackend::backend, buf, CHECK_FOR_INTERRUPTS, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg(), ERROR, INIT_FORKNUM, RelFileLocatorBackend::locator, PageIsVerifiedExtended(), PIV_LOG_WARNING, PIV_REPORT_STAT, relpath, relpathbackend, smgr_bulk_finish(), smgr_bulk_get_buf(), smgr_bulk_start_smgr(), smgr_bulk_write(), SMgrRelationData::smgr_rlocator, smgrnblocks(), smgrread(), and XLogIsNeeded.

Referenced by heapam_relation_copy_data(), and index_copy_data().

◆ RelationCreateStorage()

SMgrRelation RelationCreateStorage ( RelFileLocator  rlocator,
char  relpersistence,
bool  register_delete 
)

Definition at line 121 of file storage.c.

123{
124 SMgrRelation srel;
125 ProcNumber procNumber;
126 bool needs_wal;
127
128 Assert(!IsInParallelMode()); /* couldn't update pendingSyncHash */
129
130 switch (relpersistence)
131 {
132 case RELPERSISTENCE_TEMP:
133 procNumber = ProcNumberForTempRelations();
134 needs_wal = false;
135 break;
136 case RELPERSISTENCE_UNLOGGED:
137 procNumber = INVALID_PROC_NUMBER;
138 needs_wal = false;
139 break;
140 case RELPERSISTENCE_PERMANENT:
141 procNumber = INVALID_PROC_NUMBER;
142 needs_wal = true;
143 break;
144 default:
145 elog(ERROR, "invalid relpersistence: %c", relpersistence);
146 return NULL; /* placate compiler */
147 }
148
149 srel = smgropen(rlocator, procNumber);
150 smgrcreate(srel, MAIN_FORKNUM, false);
151
152 if (needs_wal)
154
155 /*
156 * Add the relation to the list of stuff to delete at abort, if we are
157 * asked to do so.
158 */
159 if (register_delete)
160 {
161 PendingRelDelete *pending;
162
163 pending = (PendingRelDelete *)
165 pending->rlocator = rlocator;
166 pending->procNumber = procNumber;
167 pending->atCommit = false; /* delete if abort */
169 pending->next = pendingDeletes;
170 pendingDeletes = pending;
171 }
172
173 if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
174 {
175 Assert(procNumber == INVALID_PROC_NUMBER);
176 AddPendingSync(&rlocator);
177 }
178
179 return srel;
180}
#define elog(elevel,...)
Definition: elog.h:225
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
MemoryContext TopMemoryContext
Definition: mcxt.c:149
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int ProcNumber
Definition: procnumber.h:24
#define ProcNumberForTempRelations()
Definition: procnumber.h:53
@ MAIN_FORKNUM
Definition: relpath.h:58
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:222
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:463
static void AddPendingSync(const RelFileLocator *rlocator)
Definition: storage.c:85
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:186
ProcNumber procNumber
Definition: storage.c:64
RelFileLocator rlocator
Definition: storage.c:63
bool IsInParallelMode(void)
Definition: xact.c:1089

References AddPendingSync(), Assert(), PendingRelDelete::atCommit, elog, ERROR, GetCurrentTransactionNestLevel(), INVALID_PROC_NUMBER, IsInParallelMode(), RelFileLocatorBackend::locator, log_smgrcreate(), MAIN_FORKNUM, MemoryContextAlloc(), PendingRelDelete::nestLevel, PendingRelDelete::next, pendingDeletes, PendingRelDelete::procNumber, ProcNumberForTempRelations, PendingRelDelete::rlocator, SMgrRelationData::smgr_rlocator, smgrcreate(), smgropen(), TopMemoryContext, and XLogIsNeeded.

Referenced by CreateAndCopyRelationData(), heap_create(), heapam_relation_copy_data(), heapam_relation_set_new_filelocator(), index_copy_data(), and RelationSetNewRelfilenumber().

◆ RelationDropStorage()

void RelationDropStorage ( Relation  rel)

Definition at line 206 of file storage.c.

207{
208 PendingRelDelete *pending;
209
210 /* Add the relation to the list of stuff to delete at commit */
211 pending = (PendingRelDelete *)
213 pending->rlocator = rel->rd_locator;
214 pending->procNumber = rel->rd_backend;
215 pending->atCommit = true; /* delete if commit */
217 pending->next = pendingDeletes;
218 pendingDeletes = pending;
219
220 /*
221 * NOTE: if the relation was created in this transaction, it will now be
222 * present in the pending-delete list twice, once with atCommit true and
223 * once with atCommit false. Hence, it will be physically deleted at end
224 * of xact in either case (and the other entry will be ignored by
225 * smgrDoPendingDeletes, so no error will occur). We could instead remove
226 * the existing list entry and delete the physical file immediately, but
227 * for now I'll keep the logic simple.
228 */
229
231}
static void RelationCloseSmgr(Relation relation)
Definition: rel.h:590
ProcNumber rd_backend
Definition: rel.h:60
RelFileLocator rd_locator
Definition: rel.h:57

References PendingRelDelete::atCommit, GetCurrentTransactionNestLevel(), MemoryContextAlloc(), PendingRelDelete::nestLevel, PendingRelDelete::next, pendingDeletes, PendingRelDelete::procNumber, RelationData::rd_backend, RelationData::rd_locator, RelationCloseSmgr(), PendingRelDelete::rlocator, and TopMemoryContext.

Referenced by heap_drop_with_catalog(), heapam_relation_copy_data(), index_copy_data(), index_drop(), reindex_index(), and RelationSetNewRelfilenumber().

◆ RelationPreserveStorage()

void RelationPreserveStorage ( RelFileLocator  rlocator,
bool  atCommit 
)

Definition at line 251 of file storage.c.

252{
253 PendingRelDelete *pending;
254 PendingRelDelete *prev;
256
257 prev = NULL;
258 for (pending = pendingDeletes; pending != NULL; pending = next)
259 {
260 next = pending->next;
261 if (RelFileLocatorEquals(rlocator, pending->rlocator)
262 && pending->atCommit == atCommit)
263 {
264 /* unlink and delete list entry */
265 if (prev)
266 prev->next = next;
267 else
269 pfree(pending);
270 /* prev does not change */
271 }
272 else
273 {
274 /* unrelated entry, don't touch it */
275 prev = pending;
276 }
277 }
278}
#define RelFileLocatorEquals(locator1, locator2)

References PendingRelDelete::atCommit, next, PendingRelDelete::next, pendingDeletes, pfree(), RelFileLocatorEquals, and PendingRelDelete::rlocator.

Referenced by ATExecAddIndex(), and write_relmap_file().

◆ RelationPreTruncate()

void RelationPreTruncate ( Relation  rel)

Definition at line 449 of file storage.c.

450{
451 PendingRelSync *pending;
452
453 if (!pendingSyncHash)
454 return;
455
457 &(RelationGetSmgr(rel)->smgr_rlocator.locator),
458 HASH_FIND, NULL);
459 if (pending)
460 pending->is_truncated = true;
461}
@ HASH_FIND
Definition: hsearch.h:113
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:575

References HASH_FIND, hash_search(), PendingRelSync::is_truncated, pendingSyncHash, and RelationGetSmgr().

Referenced by RelationTruncate().

◆ RelationTruncate()

void RelationTruncate ( Relation  rel,
BlockNumber  nblocks 
)

Definition at line 288 of file storage.c.

289{
290 bool fsm;
291 bool vm;
292 bool need_fsm_vacuum = false;
293 ForkNumber forks[MAX_FORKNUM];
294 BlockNumber old_blocks[MAX_FORKNUM];
295 BlockNumber blocks[MAX_FORKNUM];
296 int nforks = 0;
297 SMgrRelation reln;
298
299 /*
300 * Make sure smgr_targblock etc aren't pointing somewhere past new end.
301 * (Note: don't rely on this reln pointer below this loop.)
302 */
303 reln = RelationGetSmgr(rel);
305 for (int i = 0; i <= MAX_FORKNUM; ++i)
307
308 /* Prepare for truncation of MAIN fork of the relation */
309 forks[nforks] = MAIN_FORKNUM;
310 old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
311 blocks[nforks] = nblocks;
312 nforks++;
313
314 /* Prepare for truncation of the FSM if it exists */
316 if (fsm)
317 {
318 blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
319 if (BlockNumberIsValid(blocks[nforks]))
320 {
321 forks[nforks] = FSM_FORKNUM;
322 old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
323 nforks++;
324 need_fsm_vacuum = true;
325 }
326 }
327
328 /* Prepare for truncation of the visibility map too if it exists */
330 if (vm)
331 {
332 blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
333 if (BlockNumberIsValid(blocks[nforks]))
334 {
335 forks[nforks] = VISIBILITYMAP_FORKNUM;
336 old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
337 nforks++;
338 }
339 }
340
342
343 /*
344 * The code which follows can interact with concurrent checkpoints in two
345 * separate ways.
346 *
347 * First, the truncation operation might drop buffers that the checkpoint
348 * otherwise would have flushed. If it does, then it's essential that the
349 * files actually get truncated on disk before the checkpoint record is
350 * written. Otherwise, if reply begins from that checkpoint, the
351 * to-be-truncated blocks might still exist on disk but have older
352 * contents than expected, which can cause replay to fail. It's OK for the
353 * blocks to not exist on disk at all, but not for them to have the wrong
354 * contents. For this reason, we need to set DELAY_CHKPT_COMPLETE while
355 * this code executes.
356 *
357 * Second, the call to smgrtruncate() below will in turn call
358 * RegisterSyncRequest(). We need the sync request created by that call to
359 * be processed before the checkpoint completes. CheckPointGuts() will
360 * call ProcessSyncRequests(), but if we register our sync request after
361 * that happens, then the WAL record for the truncation could end up
362 * preceding the checkpoint record, while the actual sync doesn't happen
363 * until the next checkpoint. To prevent that, we need to set
364 * DELAY_CHKPT_START here. That way, if the XLOG_SMGR_TRUNCATE precedes
365 * the redo pointer of a concurrent checkpoint, we're guaranteed that the
366 * corresponding sync request will be processed before the checkpoint
367 * completes.
368 */
371
372 /*
373 * We WAL-log the truncation first and then truncate in a critical
374 * section. Truncation drops buffers, even if dirty, and then truncates
375 * disk files. All of that work needs to complete before the lock is
376 * released, or else old versions of pages on disk that are missing recent
377 * changes would become accessible again. We'll try the whole operation
378 * again in crash recovery if we panic, but even then we can't give up
379 * because we don't want standbys' relation sizes to diverge and break
380 * replay or visibility invariants downstream. The critical section also
381 * suppresses interrupts.
382 *
383 * (See also visibilitymap.c if changing this code.)
384 */
386
387 if (RelationNeedsWAL(rel))
388 {
389 /*
390 * Make an XLOG entry reporting the file truncation.
391 */
392 XLogRecPtr lsn;
393 xl_smgr_truncate xlrec;
394
395 xlrec.blkno = nblocks;
396 xlrec.rlocator = rel->rd_locator;
397 xlrec.flags = SMGR_TRUNCATE_ALL;
398
400 XLogRegisterData(&xlrec, sizeof(xlrec));
401
402 lsn = XLogInsert(RM_SMGR_ID,
404
405 /*
406 * Flush, because otherwise the truncation of the main relation might
407 * hit the disk before the WAL record, and the truncation of the FSM
408 * or visibility map. If we crashed during that window, we'd be left
409 * with a truncated heap, but the FSM or visibility map would still
410 * contain entries for the non-existent heap pages, and standbys would
411 * also never replay the truncation.
412 */
413 XLogFlush(lsn);
414 }
415
416 /*
417 * This will first remove any buffers from the buffer pool that should no
418 * longer exist after truncation is complete, and then truncate the
419 * corresponding files on disk.
420 */
421 smgrtruncate(RelationGetSmgr(rel), forks, nforks, old_blocks, blocks);
422
424
425 /* We've done all the critical work, so checkpoints are OK now. */
427
428 /*
429 * Update upper-level FSM pages to account for the truncation. This is
430 * important because the just-truncated pages were likely marked as
431 * all-free, and would be preferentially selected.
432 *
433 * NB: There's no point in delaying checkpoints until this is done.
434 * Because the FSM is not WAL-logged, we have to be prepared for the
435 * possibility of corruption after a crash anyway.
436 */
437 if (need_fsm_vacuum)
439}
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
BlockNumber FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
Definition: freespace.c:275
int i
Definition: isn.c:74
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define DELAY_CHKPT_START
Definition: proc.h:120
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:121
#define RelationNeedsWAL(relation)
Definition: rel.h:636
ForkNumber
Definition: relpath.h:56
@ FSM_FORKNUM
Definition: relpath.h:59
@ VISIBILITYMAP_FORKNUM
Definition: relpath.h:60
#define MAX_FORKNUM
Definition: relpath.h:70
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *old_nblocks, BlockNumber *nblocks)
Definition: smgr.c:823
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:444
PGPROC * MyProc
Definition: proc.c:66
void RelationPreTruncate(Relation rel)
Definition: storage.c:449
#define SMGR_TRUNCATE_ALL
Definition: storage_xlog.h:43
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
int delayChkptFlags
Definition: proc.h:241
BlockNumber smgr_targblock
Definition: smgr.h:45
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46
RelFileLocator rlocator
Definition: storage_xlog.h:49
BlockNumber blkno
Definition: storage_xlog.h:48
BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2790
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), xl_smgr_truncate::blkno, BlockNumberIsValid(), DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, PGPROC::delayChkptFlags, END_CRIT_SECTION, xl_smgr_truncate::flags, FreeSpaceMapPrepareTruncateRel(), FreeSpaceMapVacuumRange(), FSM_FORKNUM, i, InvalidBlockNumber, MAIN_FORKNUM, MAX_FORKNUM, MyProc, RelationData::rd_locator, RelationGetSmgr(), RelationNeedsWAL, RelationPreTruncate(), xl_smgr_truncate::rlocator, SMgrRelationData::smgr_cached_nblocks, SMgrRelationData::smgr_targblock, SMGR_TRUNCATE_ALL, smgrexists(), smgrnblocks(), smgrtruncate(), START_CRIT_SECTION, VISIBILITYMAP_FORKNUM, visibilitymap_prepare_truncate(), XLOG_SMGR_TRUNCATE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), and XLR_SPECIAL_REL_UPDATE.

Referenced by heapam_relation_nontransactional_truncate(), lazy_truncate_heap(), RelationTruncateIndexes(), and spgvacuumscan().

◆ RelFileLocatorSkippingWAL()

bool RelFileLocatorSkippingWAL ( RelFileLocator  rlocator)

Definition at line 557 of file storage.c.

558{
559 if (!pendingSyncHash ||
560 hash_search(pendingSyncHash, &rlocator, HASH_FIND, NULL) == NULL)
561 return false;
562
563 return true;
564}

References HASH_FIND, hash_search(), and pendingSyncHash.

Referenced by MarkBufferDirtyHint(), and RelationInitPhysicalAddr().

◆ RestorePendingSyncs()

void RestorePendingSyncs ( char *  startAddress)

Definition at line 635 of file storage.c.

636{
637 RelFileLocator *rlocator;
638
639 Assert(pendingSyncHash == NULL);
640 for (rlocator = (RelFileLocator *) startAddress; rlocator->relNumber != 0;
641 rlocator++)
642 AddPendingSync(rlocator);
643}
RelFileNumber relNumber

References AddPendingSync(), Assert(), pendingSyncHash, and RelFileLocator::relNumber.

Referenced by ParallelWorkerMain().

◆ SerializePendingSyncs()

void SerializePendingSyncs ( Size  maxSize,
char *  startAddress 
)

Definition at line 584 of file storage.c.

585{
586 HTAB *tmphash;
587 HASHCTL ctl;
588 HASH_SEQ_STATUS scan;
589 PendingRelSync *sync;
590 PendingRelDelete *delete;
591 RelFileLocator *src;
592 RelFileLocator *dest = (RelFileLocator *) startAddress;
593
594 if (!pendingSyncHash)
595 goto terminate;
596
597 /* Create temporary hash to collect active relfilelocators */
598 ctl.keysize = sizeof(RelFileLocator);
599 ctl.entrysize = sizeof(RelFileLocator);
601 tmphash = hash_create("tmp relfilelocators",
604
605 /* collect all rlocator from pending syncs */
607 while ((sync = (PendingRelSync *) hash_seq_search(&scan)))
608 (void) hash_search(tmphash, &sync->rlocator, HASH_ENTER, NULL);
609
610 /* remove deleted rnodes */
611 for (delete = pendingDeletes; delete != NULL; delete = delete->next)
612 if (delete->atCommit)
613 (void) hash_search(tmphash, &delete->rlocator,
614 HASH_REMOVE, NULL);
615
616 hash_seq_init(&scan, tmphash);
617 while ((src = (RelFileLocator *) hash_seq_search(&scan)))
618 *dest++ = *src;
619
620 hash_destroy(tmphash);
621
622terminate:
623 MemSet(dest, 0, sizeof(RelFileLocator));
624}
#define MemSet(start, val, len)
Definition: c.h:991
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:865
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
@ HASH_REMOVE
Definition: hsearch.h:115
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
Definition: dynahash.c:220
RelFileLocator rlocator
Definition: storage.c:72

References ctl, CurrentMemoryContext, generate_unaccent_rules::dest, HASH_BLOBS, HASH_CONTEXT, hash_create(), hash_destroy(), HASH_ELEM, HASH_ENTER, hash_get_num_entries(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), MemSet, PendingRelDelete::next, pendingDeletes, pendingSyncHash, and PendingRelSync::rlocator.

Referenced by InitializeParallelDSM().

◆ smgr_redo()

void smgr_redo ( XLogReaderState record)

Definition at line 965 of file storage.c.

966{
967 XLogRecPtr lsn = record->EndRecPtr;
968 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
969
970 /* Backup blocks are not used in smgr records */
972
973 if (info == XLOG_SMGR_CREATE)
974 {
975 xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
976 SMgrRelation reln;
977
978 reln = smgropen(xlrec->rlocator, INVALID_PROC_NUMBER);
979 smgrcreate(reln, xlrec->forkNum, true);
980 }
981 else if (info == XLOG_SMGR_TRUNCATE)
982 {
984 SMgrRelation reln;
985 Relation rel;
986 ForkNumber forks[MAX_FORKNUM];
987 BlockNumber blocks[MAX_FORKNUM];
988 BlockNumber old_blocks[MAX_FORKNUM];
989 int nforks = 0;
990 bool need_fsm_vacuum = false;
991
992 reln = smgropen(xlrec->rlocator, INVALID_PROC_NUMBER);
993
994 /*
995 * Forcibly create relation if it doesn't exist (which suggests that
996 * it was dropped somewhere later in the WAL sequence). As in
997 * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
998 * log as best we can until the drop is seen.
999 */
1000 smgrcreate(reln, MAIN_FORKNUM, true);
1001
1002 /*
1003 * Before we perform the truncation, update minimum recovery point to
1004 * cover this WAL record. Once the relation is truncated, there's no
1005 * going back. The buffer manager enforces the WAL-first rule for
1006 * normal updates to relation files, so that the minimum recovery
1007 * point is always updated before the corresponding change in the data
1008 * file is flushed to disk. We have to do the same manually here.
1009 *
1010 * Doing this before the truncation means that if the truncation fails
1011 * for some reason, you cannot start up the system even after restart,
1012 * until you fix the underlying situation so that the truncation will
1013 * succeed. Alternatively, we could update the minimum recovery point
1014 * after truncation, but that would leave a small window where the
1015 * WAL-first rule could be violated.
1016 */
1017 XLogFlush(lsn);
1018
1019 /* Prepare for truncation of MAIN fork */
1020 if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
1021 {
1022 forks[nforks] = MAIN_FORKNUM;
1023 old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
1024 blocks[nforks] = xlrec->blkno;
1025 nforks++;
1026
1027 /* Also tell xlogutils.c about it */
1029 }
1030
1031 /* Prepare for truncation of FSM and VM too */
1032 rel = CreateFakeRelcacheEntry(xlrec->rlocator);
1033
1034 if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
1035 smgrexists(reln, FSM_FORKNUM))
1036 {
1037 blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
1038 if (BlockNumberIsValid(blocks[nforks]))
1039 {
1040 forks[nforks] = FSM_FORKNUM;
1041 old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
1042 nforks++;
1043 need_fsm_vacuum = true;
1044 }
1045 }
1046 if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
1048 {
1049 blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
1050 if (BlockNumberIsValid(blocks[nforks]))
1051 {
1052 forks[nforks] = VISIBILITYMAP_FORKNUM;
1053 old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
1054 nforks++;
1055 }
1056 }
1057
1058 /* Do the real work to truncate relation forks */
1059 if (nforks > 0)
1060 {
1062 smgrtruncate(reln, forks, nforks, old_blocks, blocks);
1064 }
1065
1066 /*
1067 * Update upper-level FSM pages to account for the truncation. This is
1068 * important because the just-truncated pages were likely marked as
1069 * all-free, and would be preferentially selected.
1070 */
1071 if (need_fsm_vacuum)
1072 FreeSpaceMapVacuumRange(rel, xlrec->blkno,
1074
1076 }
1077 else
1078 elog(PANIC, "smgr_redo: unknown op code %u", info);
1079}
uint8_t uint8
Definition: c.h:500
#define PANIC
Definition: elog.h:42
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define SMGR_TRUNCATE_HEAP
Definition: storage_xlog.h:40
#define SMGR_TRUNCATE_FSM
Definition: storage_xlog.h:42
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
void FreeFakeRelcacheEntry(Relation fakerel)
Definition: xlogutils.c:618
void XLogTruncateRelation(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nblocks)
Definition: xlogutils.c:660
Relation CreateFakeRelcacheEntry(RelFileLocator rlocator)
Definition: xlogutils.c:571

References Assert(), xl_smgr_truncate::blkno, BlockNumberIsValid(), CreateFakeRelcacheEntry(), elog, END_CRIT_SECTION, XLogReaderState::EndRecPtr, xl_smgr_truncate::flags, xl_smgr_create::forkNum, FreeFakeRelcacheEntry(), FreeSpaceMapPrepareTruncateRel(), FreeSpaceMapVacuumRange(), FSM_FORKNUM, INVALID_PROC_NUMBER, InvalidBlockNumber, MAIN_FORKNUM, MAX_FORKNUM, PANIC, xl_smgr_create::rlocator, xl_smgr_truncate::rlocator, SMGR_TRUNCATE_FSM, SMGR_TRUNCATE_HEAP, SMGR_TRUNCATE_VM, smgrcreate(), smgrexists(), smgrnblocks(), smgropen(), smgrtruncate(), START_CRIT_SECTION, VISIBILITYMAP_FORKNUM, visibilitymap_prepare_truncate(), XLOG_SMGR_CREATE, XLOG_SMGR_TRUNCATE, XLogFlush(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, and XLogTruncateRelation().

◆ smgrDoPendingDeletes()

void smgrDoPendingDeletes ( bool  isCommit)

Definition at line 657 of file storage.c.

658{
659 int nestLevel = GetCurrentTransactionNestLevel();
660 PendingRelDelete *pending;
661 PendingRelDelete *prev;
663 int nrels = 0,
664 maxrels = 0;
665 SMgrRelation *srels = NULL;
666
667 prev = NULL;
668 for (pending = pendingDeletes; pending != NULL; pending = next)
669 {
670 next = pending->next;
671 if (pending->nestLevel < nestLevel)
672 {
673 /* outer-level entries should not be processed yet */
674 prev = pending;
675 }
676 else
677 {
678 /* unlink list entry first, so we don't retry on failure */
679 if (prev)
680 prev->next = next;
681 else
683 /* do deletion if called for */
684 if (pending->atCommit == isCommit)
685 {
686 SMgrRelation srel;
687
688 srel = smgropen(pending->rlocator, pending->procNumber);
689
690 /* allocate the initial array, or extend it, if needed */
691 if (maxrels == 0)
692 {
693 maxrels = 8;
694 srels = palloc(sizeof(SMgrRelation) * maxrels);
695 }
696 else if (maxrels <= nrels)
697 {
698 maxrels *= 2;
699 srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
700 }
701
702 srels[nrels++] = srel;
703 }
704 /* must explicitly free the list entry */
705 pfree(pending);
706 /* prev does not change */
707 }
708 }
709
710 if (nrels > 0)
711 {
712 smgrdounlinkall(srels, nrels, false);
713
714 for (int i = 0; i < nrels; i++)
715 smgrclose(srels[i]);
716
717 pfree(srels);
718 }
719}
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1544
void * palloc(Size size)
Definition: mcxt.c:1317
void smgrclose(SMgrRelation reln)
Definition: smgr.c:356
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:520

References PendingRelDelete::atCommit, GetCurrentTransactionNestLevel(), i, PendingRelDelete::nestLevel, next, PendingRelDelete::next, palloc(), pendingDeletes, pfree(), PendingRelDelete::procNumber, repalloc(), PendingRelDelete::rlocator, smgrclose(), smgrdounlinkall(), and smgropen().

Referenced by AbortTransaction(), AtSubAbort_smgr(), and CommitTransaction().

◆ smgrDoPendingSyncs()

void smgrDoPendingSyncs ( bool  isCommit,
bool  isParallelWorker 
)

Definition at line 725 of file storage.c.

726{
727 PendingRelDelete *pending;
728 int nrels = 0,
729 maxrels = 0;
730 SMgrRelation *srels = NULL;
731 HASH_SEQ_STATUS scan;
732 PendingRelSync *pendingsync;
733
735
736 if (!pendingSyncHash)
737 return; /* no relation needs sync */
738
739 /* Abort -- just throw away all pending syncs */
740 if (!isCommit)
741 {
742 pendingSyncHash = NULL;
743 return;
744 }
745
747
748 /* Parallel worker -- just throw away all pending syncs */
749 if (isParallelWorker)
750 {
751 pendingSyncHash = NULL;
752 return;
753 }
754
755 /* Skip syncing nodes that smgrDoPendingDeletes() will delete. */
756 for (pending = pendingDeletes; pending != NULL; pending = pending->next)
757 if (pending->atCommit)
758 (void) hash_search(pendingSyncHash, &pending->rlocator,
759 HASH_REMOVE, NULL);
760
762 while ((pendingsync = (PendingRelSync *) hash_seq_search(&scan)))
763 {
764 ForkNumber fork;
765 BlockNumber nblocks[MAX_FORKNUM + 1];
766 uint64 total_blocks = 0;
767 SMgrRelation srel;
768
769 srel = smgropen(pendingsync->rlocator, INVALID_PROC_NUMBER);
770
771 /*
772 * We emit newpage WAL records for smaller relations.
773 *
774 * Small WAL records have a chance to be flushed along with other
775 * backends' WAL records. We emit WAL records instead of syncing for
776 * files that are smaller than a certain threshold, expecting faster
777 * commit. The threshold is defined by the GUC wal_skip_threshold.
778 */
779 if (!pendingsync->is_truncated)
780 {
781 for (fork = 0; fork <= MAX_FORKNUM; fork++)
782 {
783 if (smgrexists(srel, fork))
784 {
785 BlockNumber n = smgrnblocks(srel, fork);
786
787 /* we shouldn't come here for unlogged relations */
788 Assert(fork != INIT_FORKNUM);
789 nblocks[fork] = n;
790 total_blocks += n;
791 }
792 else
793 nblocks[fork] = InvalidBlockNumber;
794 }
795 }
796
797 /*
798 * Sync file or emit WAL records for its contents.
799 *
800 * Although we emit WAL record if the file is small enough, do file
801 * sync regardless of the size if the file has experienced a
802 * truncation. It is because the file would be followed by trailing
803 * garbage blocks after a crash recovery if, while a past longer file
804 * had been flushed out, we omitted syncing-out of the file and
805 * emitted WAL instead. You might think that we could choose WAL if
806 * the current main fork is longer than ever, but there's a case where
807 * main fork is longer than ever but FSM fork gets shorter.
808 */
809 if (pendingsync->is_truncated ||
810 total_blocks >= wal_skip_threshold * (uint64) 1024 / BLCKSZ)
811 {
812 /* allocate the initial array, or extend it, if needed */
813 if (maxrels == 0)
814 {
815 maxrels = 8;
816 srels = palloc(sizeof(SMgrRelation) * maxrels);
817 }
818 else if (maxrels <= nrels)
819 {
820 maxrels *= 2;
821 srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
822 }
823
824 srels[nrels++] = srel;
825 }
826 else
827 {
828 /* Emit WAL records for all blocks. The file is small enough. */
829 for (fork = 0; fork <= MAX_FORKNUM; fork++)
830 {
831 int n = nblocks[fork];
832 Relation rel;
833
834 if (!BlockNumberIsValid(n))
835 continue;
836
837 /*
838 * Emit WAL for the whole file. Unfortunately we don't know
839 * what kind of a page this is, so we have to log the full
840 * page including any unused space. ReadBufferExtended()
841 * counts some pgstat events; unfortunately, we discard them.
842 */
844 log_newpage_range(rel, fork, 0, n, false);
846 }
847 }
848 }
849
850 pendingSyncHash = NULL;
851
852 if (nrels > 0)
853 {
854 smgrdosyncall(srels, nrels);
855 pfree(srels);
856 }
857}
uint64_t uint64
Definition: c.h:503
#define AssertPendingSyncs_RelationCache()
Definition: relcache.h:135
void smgrdosyncall(SMgrRelation *rels, int nrels)
Definition: smgr.c:480
int wal_skip_threshold
Definition: storage.c:39
void log_newpage_range(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std)
Definition: xloginsert.c:1270

References Assert(), AssertPendingSyncs_RelationCache, PendingRelDelete::atCommit, BlockNumberIsValid(), CreateFakeRelcacheEntry(), FreeFakeRelcacheEntry(), GetCurrentTransactionNestLevel(), HASH_REMOVE, hash_search(), hash_seq_init(), hash_seq_search(), INIT_FORKNUM, INVALID_PROC_NUMBER, InvalidBlockNumber, PendingRelSync::is_truncated, RelFileLocatorBackend::locator, log_newpage_range(), MAX_FORKNUM, PendingRelDelete::next, palloc(), pendingDeletes, pendingSyncHash, pfree(), repalloc(), PendingRelDelete::rlocator, PendingRelSync::rlocator, SMgrRelationData::smgr_rlocator, smgrdosyncall(), smgrexists(), smgrnblocks(), smgropen(), and wal_skip_threshold.

Referenced by AbortTransaction(), CommitTransaction(), and PrepareTransaction().

◆ smgrGetPendingDeletes()

int smgrGetPendingDeletes ( bool  forCommit,
RelFileLocator **  ptr 
)

Definition at line 877 of file storage.c.

878{
879 int nestLevel = GetCurrentTransactionNestLevel();
880 int nrels;
881 RelFileLocator *rptr;
882 PendingRelDelete *pending;
883
884 nrels = 0;
885 for (pending = pendingDeletes; pending != NULL; pending = pending->next)
886 {
887 if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
888 && pending->procNumber == INVALID_PROC_NUMBER)
889 nrels++;
890 }
891 if (nrels == 0)
892 {
893 *ptr = NULL;
894 return 0;
895 }
896 rptr = (RelFileLocator *) palloc(nrels * sizeof(RelFileLocator));
897 *ptr = rptr;
898 for (pending = pendingDeletes; pending != NULL; pending = pending->next)
899 {
900 if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
901 && pending->procNumber == INVALID_PROC_NUMBER)
902 {
903 *rptr = pending->rlocator;
904 rptr++;
905 }
906 }
907 return nrels;
908}

References PendingRelDelete::atCommit, GetCurrentTransactionNestLevel(), INVALID_PROC_NUMBER, PendingRelDelete::nestLevel, PendingRelDelete::next, palloc(), pendingDeletes, PendingRelDelete::procNumber, and PendingRelDelete::rlocator.

Referenced by RecordTransactionAbort(), RecordTransactionCommit(), and StartPrepare().

Variable Documentation

◆ pendingDeletes

◆ pendingSyncHash

◆ wal_skip_threshold

int wal_skip_threshold = 2048

Definition at line 39 of file storage.c.

Referenced by smgrDoPendingSyncs().