PostgreSQL Source Code  git master
storage.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * storage.c
4  * code to create and destroy physical storage for relations
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/catalog/storage.c
12  *
13  * NOTES
14  * Some of this code used to be in storage/smgr/smgr.c, and the
15  * function names still reflect that.
16  *
17  *-------------------------------------------------------------------------
18  */
19 
20 #include "postgres.h"
21 
22 #include "access/visibilitymap.h"
23 #include "access/xact.h"
24 #include "access/xlog.h"
25 #include "access/xloginsert.h"
26 #include "access/xlogutils.h"
27 #include "catalog/storage.h"
28 #include "catalog/storage_xlog.h"
29 #include "storage/freespace.h"
30 #include "storage/smgr.h"
31 #include "utils/memutils.h"
32 #include "utils/rel.h"
33 
34 /*
35  * We keep a list of all relations (represented as RelFileNode values)
36  * that have been created or deleted in the current transaction. When
37  * a relation is created, we create the physical file immediately, but
38  * remember it so that we can delete the file again if the current
39  * transaction is aborted. Conversely, a deletion request is NOT
40  * executed immediately, but is just entered in the list. When and if
41  * the transaction commits, we can delete the physical file.
42  *
43  * To handle subtransactions, every entry is marked with its transaction
44  * nesting level. At subtransaction commit, we reassign the subtransaction's
45  * entries to the parent nesting level. At subtransaction abort, we can
46  * immediately execute the abort-time actions for all entries of the current
47  * nesting level.
48  *
49  * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
50  * unbetimes. It'd probably be OK to keep it in TopTransactionContext,
51  * but I'm being paranoid.
52  */
53 
54 typedef struct PendingRelDelete
55 {
56  RelFileNode relnode; /* relation that may need to be deleted */
57  BackendId backend; /* InvalidBackendId if not a temp rel */
58  bool atCommit; /* T=delete at commit; F=delete at abort */
59  int nestLevel; /* xact nesting level of request */
60  struct PendingRelDelete *next; /* linked-list link */
62 
63 static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
64 
65 /*
66  * RelationCreateStorage
67  * Create physical storage for a relation.
68  *
69  * Create the underlying disk file storage for the relation. This only
70  * creates the main fork; additional forks are created lazily by the
71  * modules that need them.
72  *
73  * This function is transactional. The creation is WAL-logged, and if the
74  * transaction aborts later on, the storage will be destroyed.
75  */
76 void
78 {
79  PendingRelDelete *pending;
80  SMgrRelation srel;
82  bool needs_wal;
83 
84  switch (relpersistence)
85  {
86  case RELPERSISTENCE_TEMP:
87  backend = BackendIdForTempRelations();
88  needs_wal = false;
89  break;
90  case RELPERSISTENCE_UNLOGGED:
91  backend = InvalidBackendId;
92  needs_wal = false;
93  break;
94  case RELPERSISTENCE_PERMANENT:
95  backend = InvalidBackendId;
96  needs_wal = true;
97  break;
98  default:
99  elog(ERROR, "invalid relpersistence: %c", relpersistence);
100  return; /* placate compiler */
101  }
102 
103  srel = smgropen(rnode, backend);
104  smgrcreate(srel, MAIN_FORKNUM, false);
105 
106  if (needs_wal)
108 
109  /* Add the relation to the list of stuff to delete at abort */
110  pending = (PendingRelDelete *)
112  pending->relnode = rnode;
113  pending->backend = backend;
114  pending->atCommit = false; /* delete if abort */
116  pending->next = pendingDeletes;
117  pendingDeletes = pending;
118 }
119 
120 /*
121  * Perform XLogInsert of an XLOG_SMGR_CREATE record to WAL.
122  */
123 void
125 {
126  xl_smgr_create xlrec;
127 
128  /*
129  * Make an XLOG entry reporting the file creation.
130  */
131  xlrec.rnode = *rnode;
132  xlrec.forkNum = forkNum;
133 
134  XLogBeginInsert();
135  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
137 }
138 
139 /*
140  * RelationDropStorage
141  * Schedule unlinking of physical storage at transaction commit.
142  */
143 void
145 {
146  PendingRelDelete *pending;
147 
148  /* Add the relation to the list of stuff to delete at commit */
149  pending = (PendingRelDelete *)
151  pending->relnode = rel->rd_node;
152  pending->backend = rel->rd_backend;
153  pending->atCommit = true; /* delete if commit */
155  pending->next = pendingDeletes;
156  pendingDeletes = pending;
157 
158  /*
159  * NOTE: if the relation was created in this transaction, it will now be
160  * present in the pending-delete list twice, once with atCommit true and
161  * once with atCommit false. Hence, it will be physically deleted at end
162  * of xact in either case (and the other entry will be ignored by
163  * smgrDoPendingDeletes, so no error will occur). We could instead remove
164  * the existing list entry and delete the physical file immediately, but
165  * for now I'll keep the logic simple.
166  */
167 
168  RelationCloseSmgr(rel);
169 }
170 
171 /*
172  * RelationPreserveStorage
173  * Mark a relation as not to be deleted after all.
174  *
175  * We need this function because relation mapping changes are committed
176  * separately from commit of the whole transaction, so it's still possible
177  * for the transaction to abort after the mapping update is done.
178  * When a new physical relation is installed in the map, it would be
179  * scheduled for delete-on-abort, so we'd delete it, and be in trouble.
180  * The relation mapper fixes this by telling us to not delete such relations
181  * after all as part of its commit.
182  *
183  * We also use this to reuse an old build of an index during ALTER TABLE, this
184  * time removing the delete-at-commit entry.
185  *
186  * No-op if the relation is not among those scheduled for deletion.
187  */
188 void
190 {
191  PendingRelDelete *pending;
192  PendingRelDelete *prev;
194 
195  prev = NULL;
196  for (pending = pendingDeletes; pending != NULL; pending = next)
197  {
198  next = pending->next;
199  if (RelFileNodeEquals(rnode, pending->relnode)
200  && pending->atCommit == atCommit)
201  {
202  /* unlink and delete list entry */
203  if (prev)
204  prev->next = next;
205  else
206  pendingDeletes = next;
207  pfree(pending);
208  /* prev does not change */
209  }
210  else
211  {
212  /* unrelated entry, don't touch it */
213  prev = pending;
214  }
215  }
216 }
217 
218 /*
219  * RelationTruncate
220  * Physically truncate a relation to the specified number of blocks.
221  *
222  * This includes getting rid of any buffers for the blocks that are to be
223  * dropped.
224  */
225 void
227 {
228  bool fsm;
229  bool vm;
230 
231  /* Open it at the smgr level if not already done */
232  RelationOpenSmgr(rel);
233 
234  /*
235  * Make sure smgr_targblock etc aren't pointing somewhere past new end
236  */
240 
241  /* Truncate the FSM first if it exists */
242  fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
243  if (fsm)
244  FreeSpaceMapTruncateRel(rel, nblocks);
245 
246  /* Truncate the visibility map too if it exists. */
248  if (vm)
249  visibilitymap_truncate(rel, nblocks);
250 
251  /*
252  * We WAL-log the truncation before actually truncating, which means
253  * trouble if the truncation fails. If we then crash, the WAL replay
254  * likely isn't going to succeed in the truncation either, and cause a
255  * PANIC. It's tempting to put a critical section here, but that cure
256  * would be worse than the disease. It would turn a usually harmless
257  * failure to truncate, that might spell trouble at WAL replay, into a
258  * certain PANIC.
259  */
260  if (RelationNeedsWAL(rel))
261  {
262  /*
263  * Make an XLOG entry reporting the file truncation.
264  */
265  XLogRecPtr lsn;
266  xl_smgr_truncate xlrec;
267 
268  xlrec.blkno = nblocks;
269  xlrec.rnode = rel->rd_node;
270  xlrec.flags = SMGR_TRUNCATE_ALL;
271 
272  XLogBeginInsert();
273  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
274 
275  lsn = XLogInsert(RM_SMGR_ID,
277 
278  /*
279  * Flush, because otherwise the truncation of the main relation might
280  * hit the disk before the WAL record, and the truncation of the FSM
281  * or visibility map. If we crashed during that window, we'd be left
282  * with a truncated heap, but the FSM or visibility map would still
283  * contain entries for the non-existent heap pages.
284  */
285  if (fsm || vm)
286  XLogFlush(lsn);
287  }
288 
289  /* Do the real work */
290  smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
291 }
292 
293 /*
294  * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
295  *
296  * This also runs when aborting a subxact; we want to clean up a failed
297  * subxact immediately.
298  *
299  * Note: It's possible that we're being asked to remove a relation that has
300  * no physical storage in any fork. In particular, it's possible that we're
301  * cleaning up an old temporary relation for which RemovePgTempFiles has
302  * already recovered the physical storage.
303  */
304 void
305 smgrDoPendingDeletes(bool isCommit)
306 {
308  PendingRelDelete *pending;
309  PendingRelDelete *prev;
311  int nrels = 0,
312  i = 0,
313  maxrels = 0;
314  SMgrRelation *srels = NULL;
315 
316  prev = NULL;
317  for (pending = pendingDeletes; pending != NULL; pending = next)
318  {
319  next = pending->next;
320  if (pending->nestLevel < nestLevel)
321  {
322  /* outer-level entries should not be processed yet */
323  prev = pending;
324  }
325  else
326  {
327  /* unlink list entry first, so we don't retry on failure */
328  if (prev)
329  prev->next = next;
330  else
331  pendingDeletes = next;
332  /* do deletion if called for */
333  if (pending->atCommit == isCommit)
334  {
335  SMgrRelation srel;
336 
337  srel = smgropen(pending->relnode, pending->backend);
338 
339  /* allocate the initial array, or extend it, if needed */
340  if (maxrels == 0)
341  {
342  maxrels = 8;
343  srels = palloc(sizeof(SMgrRelation) * maxrels);
344  }
345  else if (maxrels <= nrels)
346  {
347  maxrels *= 2;
348  srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
349  }
350 
351  srels[nrels++] = srel;
352  }
353  /* must explicitly free the list entry */
354  pfree(pending);
355  /* prev does not change */
356  }
357  }
358 
359  if (nrels > 0)
360  {
361  smgrdounlinkall(srels, nrels, false);
362 
363  for (i = 0; i < nrels; i++)
364  smgrclose(srels[i]);
365 
366  pfree(srels);
367  }
368 }
369 
370 /*
371  * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
372  *
373  * The return value is the number of relations scheduled for termination.
374  * *ptr is set to point to a freshly-palloc'd array of RelFileNodes.
375  * If there are no relations to be deleted, *ptr is set to NULL.
376  *
377  * Only non-temporary relations are included in the returned list. This is OK
378  * because the list is used only in contexts where temporary relations don't
379  * matter: we're either writing to the two-phase state file (and transactions
380  * that have touched temp tables can't be prepared) or we're writing to xlog
381  * (and all temporary files will be zapped if we restart anyway, so no need
382  * for redo to do it also).
383  *
384  * Note that the list does not include anything scheduled for termination
385  * by upper-level transactions.
386  */
387 int
388 smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
389 {
391  int nrels;
392  RelFileNode *rptr;
393  PendingRelDelete *pending;
394 
395  nrels = 0;
396  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
397  {
398  if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
399  && pending->backend == InvalidBackendId)
400  nrels++;
401  }
402  if (nrels == 0)
403  {
404  *ptr = NULL;
405  return 0;
406  }
407  rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode));
408  *ptr = rptr;
409  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
410  {
411  if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
412  && pending->backend == InvalidBackendId)
413  {
414  *rptr = pending->relnode;
415  rptr++;
416  }
417  }
418  return nrels;
419 }
420 
421 /*
422  * PostPrepare_smgr -- Clean up after a successful PREPARE
423  *
424  * What we have to do here is throw away the in-memory state about pending
425  * relation deletes. It's all been recorded in the 2PC state file and
426  * it's no longer smgr's job to worry about it.
427  */
428 void
430 {
431  PendingRelDelete *pending;
433 
434  for (pending = pendingDeletes; pending != NULL; pending = next)
435  {
436  next = pending->next;
437  pendingDeletes = next;
438  /* must explicitly free the list entry */
439  pfree(pending);
440  }
441 }
442 
443 
444 /*
445  * AtSubCommit_smgr() --- Take care of subtransaction commit.
446  *
447  * Reassign all items in the pending-deletes list to the parent transaction.
448  */
449 void
451 {
453  PendingRelDelete *pending;
454 
455  for (pending = pendingDeletes; pending != NULL; pending = pending->next)
456  {
457  if (pending->nestLevel >= nestLevel)
458  pending->nestLevel = nestLevel - 1;
459  }
460 }
461 
462 /*
463  * AtSubAbort_smgr() --- Take care of subtransaction abort.
464  *
465  * Delete created relations and forget about deleted relations.
466  * We can execute these operations immediately because we know this
467  * subtransaction will not commit.
468  */
469 void
471 {
472  smgrDoPendingDeletes(false);
473 }
474 
475 void
477 {
478  XLogRecPtr lsn = record->EndRecPtr;
479  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
480 
481  /* Backup blocks are not used in smgr records */
482  Assert(!XLogRecHasAnyBlockRefs(record));
483 
484  if (info == XLOG_SMGR_CREATE)
485  {
486  xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
487  SMgrRelation reln;
488 
489  reln = smgropen(xlrec->rnode, InvalidBackendId);
490  smgrcreate(reln, xlrec->forkNum, true);
491  }
492  else if (info == XLOG_SMGR_TRUNCATE)
493  {
494  xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
495  SMgrRelation reln;
496  Relation rel;
497 
498  reln = smgropen(xlrec->rnode, InvalidBackendId);
499 
500  /*
501  * Forcibly create relation if it doesn't exist (which suggests that
502  * it was dropped somewhere later in the WAL sequence). As in
503  * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
504  * log as best we can until the drop is seen.
505  */
506  smgrcreate(reln, MAIN_FORKNUM, true);
507 
508  /*
509  * Before we perform the truncation, update minimum recovery point to
510  * cover this WAL record. Once the relation is truncated, there's no
511  * going back. The buffer manager enforces the WAL-first rule for
512  * normal updates to relation files, so that the minimum recovery
513  * point is always updated before the corresponding change in the data
514  * file is flushed to disk. We have to do the same manually here.
515  *
516  * Doing this before the truncation means that if the truncation fails
517  * for some reason, you cannot start up the system even after restart,
518  * until you fix the underlying situation so that the truncation will
519  * succeed. Alternatively, we could update the minimum recovery point
520  * after truncation, but that would leave a small window where the
521  * WAL-first rule could be violated.
522  */
523  XLogFlush(lsn);
524 
525  if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
526  {
527  smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno);
528 
529  /* Also tell xlogutils.c about it */
530  XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno);
531  }
532 
533  /* Truncate FSM and VM too */
534  rel = CreateFakeRelcacheEntry(xlrec->rnode);
535 
536  if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
537  smgrexists(reln, FSM_FORKNUM))
538  FreeSpaceMapTruncateRel(rel, xlrec->blkno);
539  if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
541  visibilitymap_truncate(rel, xlrec->blkno);
542 
544  }
545  else
546  elog(PANIC, "smgr_redo: unknown op code %u", info);
547 }
void RelationPreserveStorage(RelFileNode rnode, bool atCommit)
Definition: storage.c:189
void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum, BlockNumber nblocks)
Definition: xlogutils.c:636
BlockNumber smgr_vm_nblocks
Definition: smgr.h:57
void smgrDoPendingDeletes(bool isCommit)
Definition: storage.c:305
BackendId backend
Definition: storage.c:57
void smgrclose(SMgrRelation reln)
Definition: smgr.c:296
#define SMGR_TRUNCATE_HEAP
Definition: storage_xlog.h:40
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:376
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:71
struct PendingRelDelete PendingRelDelete
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
unsigned char uint8
Definition: c.h:323
#define SMGR_TRUNCATE_FSM
Definition: storage_xlog.h:42
void AtSubCommit_smgr(void)
Definition: storage.c:450
void smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
Definition: smgr.c:684
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:471
#define RelationCloseSmgr(relation)
Definition: rel.h:477
uint32 BlockNumber
Definition: block.h:31
BlockNumber smgr_fsm_nblocks
Definition: smgr.h:56
RelFileNode rnode
Definition: storage_xlog.h:49
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:287
#define PANIC
Definition: elog.h:53
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2783
#define XLOG_SMGR_CREATE
Definition: storage_xlog.h:30
XLogRecPtr EndRecPtr
Definition: xlogreader.h:120
#define RelationOpenSmgr(relation)
Definition: rel.h:465
void pfree(void *pointer)
Definition: mcxt.c:1031
#define XLogRecGetData(decoder)
Definition: xlogreader.h:230
#define ERROR
Definition: elog.h:43
char relpersistence
Definition: pg_class.h:50
void smgr_redo(XLogReaderState *record)
Definition: storage.c:476
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
Definition: storage.c:388
Relation CreateFakeRelcacheEntry(RelFileNode rnode)
Definition: xlogutils.c:550
#define BackendIdForTempRelations()
Definition: backendid.h:34
void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
Definition: freespace.c:259
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:226
MemoryContext TopMemoryContext
Definition: mcxt.c:44
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:137
ForkNumber
Definition: relpath.h:40
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
void FreeFakeRelcacheEntry(Relation fakerel)
Definition: xlogutils.c:591
struct PendingRelDelete * next
Definition: storage.c:60
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
#define InvalidBackendId
Definition: backendid.h:23
void RelationDropStorage(Relation rel)
Definition: storage.c:144
void PostPrepare_smgr(void)
Definition: storage.c:429
int BackendId
Definition: backendid.h:21
RelFileNode node
Definition: relfilenode.h:74
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:753
RelFileNode rd_node
Definition: rel.h:55
ForkNumber forkNum
Definition: storage_xlog.h:36
uint64 XLogRecPtr
Definition: xlogdefs.h:21
BackendId rd_backend
Definition: rel.h:59
#define Assert(condition)
Definition: c.h:699
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
BlockNumber smgr_targblock
Definition: smgr.h:55
void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
void AtSubAbort_smgr(void)
Definition: storage.c:470
#define InvalidBlockNumber
Definition: block.h:33
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define RelationNeedsWAL(relation)
Definition: rel.h:510
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1044
static PendingRelDelete * pendingDeletes
Definition: storage.c:63
void * palloc(Size size)
Definition: mcxt.c:924
RelFileNode relnode
Definition: storage.c:56
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:771
int i
void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
Definition: storage.c:124
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:232
BlockNumber blkno
Definition: storage_xlog.h:48
#define elog
Definition: elog.h:219
#define SMGR_TRUNCATE_ALL
Definition: storage_xlog.h:43
void XLogBeginInsert(void)
Definition: xloginsert.c:120
void RelationCreateStorage(RelFileNode rnode, char relpersistence)
Definition: storage.c:77
RelFileNode rnode
Definition: storage_xlog.h:35
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:226