PostgreSQL Source Code  git master
smgr.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * smgr.c
4  * public interface routines to storage manager switch.
5  *
6  * All file system operations on relations dispatch through these routines.
7  * An SMgrRelation represents physical on-disk relation files that are open
8  * for reading and writing.
9  *
10  * When a relation is first accessed through the relation cache, the
11  * corresponding SMgrRelation entry is opened by calling smgropen(), and the
12  * reference is stored in the relation cache entry.
13  *
14  * Accesses that don't go through the relation cache open the SMgrRelation
15  * directly. That includes flushing buffers from the buffer cache, as well as
16  * all accesses in auxiliary processes like the checkpointer or the WAL redo
17  * in the startup process.
18  *
19  * Operations like CREATE, DROP, ALTER TABLE also hold SMgrRelation references
20  * independent of the relation cache. They need to prepare the physical files
21  * before updating the relation cache.
22  *
23  * There is a hash table that holds all the SMgrRelation entries in the
24  * backend. If you call smgropen() twice for the same rel locator, you get a
25  * reference to the same SMgrRelation. The reference is valid until the end of
26  * transaction. This makes repeated access to the same relation efficient,
27  * and allows caching things like the relation size in the SMgrRelation entry.
28  *
29  * At end of transaction, all SMgrRelation entries that haven't been pinned
30  * are removed. An SMgrRelation can hold kernel file system descriptors for
31  * the underlying files, and we'd like to close those reasonably soon if the
32  * file gets deleted. The SMgrRelations references held by the relcache are
33  * pinned to prevent them from being closed.
34  *
35  * There is another mechanism to close file descriptors early:
36  * PROCSIGNAL_BARRIER_SMGRRELEASE. It is a request to immediately close all
37  * file descriptors. Upon receiving that signal, the backend closes all file
38  * descriptors held open by SMgrRelations, but because it can happen in the
39  * middle of a transaction, we cannot destroy the SMgrRelation objects
40  * themselves, as there could pointers to them in active use. See
41  * smgrrelease() and smgrreleaseall().
42  *
43  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
44  * Portions Copyright (c) 1994, Regents of the University of California
45  *
46  *
47  * IDENTIFICATION
48  * src/backend/storage/smgr/smgr.c
49  *
50  *-------------------------------------------------------------------------
51  */
52 #include "postgres.h"
53 
54 #include "access/xlogutils.h"
55 #include "lib/ilist.h"
56 #include "storage/bufmgr.h"
57 #include "storage/ipc.h"
58 #include "storage/md.h"
59 #include "storage/smgr.h"
60 #include "utils/hsearch.h"
61 #include "utils/inval.h"
62 
63 
64 /*
65  * This struct of function pointers defines the API between smgr.c and
66  * any individual storage manager module. Note that smgr subfunctions are
67  * generally expected to report problems via elog(ERROR). An exception is
68  * that smgr_unlink should use elog(WARNING), rather than erroring out,
69  * because we normally unlink relations during post-commit/abort cleanup,
70  * and so it's too late to raise an error. Also, various conditions that
71  * would normally be errors should be allowed during bootstrap and/or WAL
72  * recovery --- see comments in md.c for details.
73  */
74 typedef struct f_smgr
75 {
76  void (*smgr_init) (void); /* may be NULL */
77  void (*smgr_shutdown) (void); /* may be NULL */
78  void (*smgr_open) (SMgrRelation reln);
79  void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
80  void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
81  bool isRedo);
83  void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
84  bool isRedo);
85  void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
86  BlockNumber blocknum, const void *buffer, bool skipFsync);
87  void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
88  BlockNumber blocknum, int nblocks, bool skipFsync);
90  BlockNumber blocknum, int nblocks);
91  void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
92  BlockNumber blocknum,
93  void **buffers, BlockNumber nblocks);
94  void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum,
95  BlockNumber blocknum,
96  const void **buffers, BlockNumber nblocks,
97  bool skipFsync);
98  void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
99  BlockNumber blocknum, BlockNumber nblocks);
101  void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
102  BlockNumber nblocks);
103  void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
104  void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum);
106 
107 static const f_smgr smgrsw[] = {
108  /* magnetic disk */
109  {
110  .smgr_init = mdinit,
111  .smgr_shutdown = NULL,
112  .smgr_open = mdopen,
113  .smgr_close = mdclose,
114  .smgr_create = mdcreate,
115  .smgr_exists = mdexists,
116  .smgr_unlink = mdunlink,
117  .smgr_extend = mdextend,
118  .smgr_zeroextend = mdzeroextend,
119  .smgr_prefetch = mdprefetch,
120  .smgr_readv = mdreadv,
121  .smgr_writev = mdwritev,
122  .smgr_writeback = mdwriteback,
123  .smgr_nblocks = mdnblocks,
124  .smgr_truncate = mdtruncate,
125  .smgr_immedsync = mdimmedsync,
126  .smgr_registersync = mdregistersync,
127  }
128 };
129 
130 static const int NSmgr = lengthof(smgrsw);
131 
132 /*
133  * Each backend has a hashtable that stores all extant SMgrRelation objects.
134  * In addition, "unpinned" SMgrRelation objects are chained together in a list.
135  */
136 static HTAB *SMgrRelationHash = NULL;
137 
139 
140 /* local function prototypes */
141 static void smgrshutdown(int code, Datum arg);
142 static void smgrdestroy(SMgrRelation reln);
143 
144 
145 /*
146  * smgrinit(), smgrshutdown() -- Initialize or shut down storage
147  * managers.
148  *
149  * Note: smgrinit is called during backend startup (normal or standalone
150  * case), *not* during postmaster start. Therefore, any resources created
151  * here or destroyed in smgrshutdown are backend-local.
152  */
153 void
154 smgrinit(void)
155 {
156  int i;
157 
158  for (i = 0; i < NSmgr; i++)
159  {
160  if (smgrsw[i].smgr_init)
161  smgrsw[i].smgr_init();
162  }
163 
164  /* register the shutdown proc */
166 }
167 
168 /*
169  * on_proc_exit hook for smgr cleanup during backend shutdown
170  */
171 static void
173 {
174  int i;
175 
176  for (i = 0; i < NSmgr; i++)
177  {
178  if (smgrsw[i].smgr_shutdown)
180  }
181 }
182 
183 /*
184  * smgropen() -- Return an SMgrRelation object, creating it if need be.
185  *
186  * In versions of PostgreSQL prior to 17, this function returned an object
187  * with no defined lifetime. Now, however, the object remains valid for the
188  * lifetime of the transaction, up to the point where AtEOXact_SMgr() is
189  * called, making it much easier for callers to know for how long they can
190  * hold on to a pointer to the returned object. If this function is called
191  * outside of a transaction, the object remains valid until smgrdestroy() or
192  * smgrdestroyall() is called. Background processes that use smgr but not
193  * transactions typically do this once per checkpoint cycle.
194  *
195  * This does not attempt to actually open the underlying files.
196  */
199 {
200  RelFileLocatorBackend brlocator;
201  SMgrRelation reln;
202  bool found;
203 
205 
206  if (SMgrRelationHash == NULL)
207  {
208  /* First time through: initialize the hash table */
209  HASHCTL ctl;
210 
211  ctl.keysize = sizeof(RelFileLocatorBackend);
212  ctl.entrysize = sizeof(SMgrRelationData);
213  SMgrRelationHash = hash_create("smgr relation table", 400,
214  &ctl, HASH_ELEM | HASH_BLOBS);
216  }
217 
218  /* Look up or create an entry */
219  brlocator.locator = rlocator;
220  brlocator.backend = backend;
222  &brlocator,
223  HASH_ENTER, &found);
224 
225  /* Initialize it if not present before */
226  if (!found)
227  {
228  /* hash_search already filled in the lookup key */
230  for (int i = 0; i <= MAX_FORKNUM; ++i)
232  reln->smgr_which = 0; /* we only have md.c at present */
233 
234  /* implementation-specific initialization */
235  smgrsw[reln->smgr_which].smgr_open(reln);
236 
237  /* it is not pinned yet */
238  reln->pincount = 0;
240  }
241 
242  return reln;
243 }
244 
245 /*
246  * smgrpin() -- Prevent an SMgrRelation object from being destroyed at end of
247  * of transaction
248  */
249 void
251 {
252  if (reln->pincount == 0)
253  dlist_delete(&reln->node);
254  reln->pincount++;
255 }
256 
257 /*
258  * smgrunpin() -- Allow an SMgrRelation object to be destroyed at end of
259  * transaction
260  *
261  * The object remains valid, but if there are no other pins on it, it is moved
262  * to the unpinned list where it will be destroyed by AtEOXact_SMgr().
263  */
264 void
266 {
267  Assert(reln->pincount > 0);
268  reln->pincount--;
269  if (reln->pincount == 0)
271 }
272 
273 /*
274  * smgrdestroy() -- Delete an SMgrRelation object.
275  */
276 static void
278 {
279  ForkNumber forknum;
280 
281  Assert(reln->pincount == 0);
282 
283  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
284  smgrsw[reln->smgr_which].smgr_close(reln, forknum);
285 
286  dlist_delete(&reln->node);
287 
289  &(reln->smgr_rlocator),
290  HASH_REMOVE, NULL) == NULL)
291  elog(ERROR, "SMgrRelation hashtable corrupted");
292 }
293 
294 /*
295  * smgrrelease() -- Release all resources used by this object.
296  *
297  * The object remains valid.
298  */
299 void
301 {
302  for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
303  {
304  smgrsw[reln->smgr_which].smgr_close(reln, forknum);
305  reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
306  }
308 }
309 
310 /*
311  * smgrclose() -- Close an SMgrRelation object.
312  *
313  * The SMgrRelation reference should not be used after this call. However,
314  * because we don't keep track of the references returned by smgropen(), we
315  * don't know if there are other references still pointing to the same object,
316  * so we cannot remove the SMgrRelation object yet. Therefore, this is just a
317  * synonym for smgrrelease() at the moment.
318  */
319 void
321 {
322  smgrrelease(reln);
323 }
324 
325 /*
326  * smgrdestroyall() -- Release resources used by all unpinned objects.
327  *
328  * It must be known that there are no pointers to SMgrRelations, other than
329  * those pinned with smgrpin().
330  */
331 void
333 {
334  dlist_mutable_iter iter;
335 
336  /*
337  * Zap all unpinned SMgrRelations. We rely on smgrdestroy() to remove
338  * each one from the list.
339  */
341  {
343  iter.cur);
344 
345  smgrdestroy(rel);
346  }
347 }
348 
349 /*
350  * smgrreleaseall() -- Release resources used by all objects.
351  */
352 void
354 {
355  HASH_SEQ_STATUS status;
356  SMgrRelation reln;
357 
358  /* Nothing to do if hashtable not set up */
359  if (SMgrRelationHash == NULL)
360  return;
361 
363 
364  while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
365  {
366  smgrrelease(reln);
367  }
368 }
369 
370 /*
371  * smgrreleaserellocator() -- Release resources for given RelFileLocator, if
372  * it's open.
373  *
374  * This has the same effects as smgrrelease(smgropen(rlocator)), but avoids
375  * uselessly creating a hashtable entry only to drop it again when no
376  * such entry exists already.
377  */
378 void
380 {
381  SMgrRelation reln;
382 
383  /* Nothing to do if hashtable not set up */
384  if (SMgrRelationHash == NULL)
385  return;
386 
388  &rlocator,
389  HASH_FIND, NULL);
390  if (reln != NULL)
391  smgrrelease(reln);
392 }
393 
394 /*
395  * smgrexists() -- Does the underlying file for a fork exist?
396  */
397 bool
399 {
400  return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
401 }
402 
403 /*
404  * smgrcreate() -- Create a new relation.
405  *
406  * Given an already-created (but presumably unused) SMgrRelation,
407  * cause the underlying disk file or other storage for the fork
408  * to be created.
409  */
410 void
411 smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
412 {
413  smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
414 }
415 
416 /*
417  * smgrdosyncall() -- Immediately sync all forks of all given relations
418  *
419  * All forks of all given relations are synced out to the store.
420  *
421  * This is equivalent to FlushRelationBuffers() for each smgr relation,
422  * then calling smgrimmedsync() for all forks of each relation, but it's
423  * significantly quicker so should be preferred when possible.
424  */
425 void
426 smgrdosyncall(SMgrRelation *rels, int nrels)
427 {
428  int i = 0;
429  ForkNumber forknum;
430 
431  if (nrels == 0)
432  return;
433 
434  FlushRelationsAllBuffers(rels, nrels);
435 
436  /*
437  * Sync the physical file(s).
438  */
439  for (i = 0; i < nrels; i++)
440  {
441  int which = rels[i]->smgr_which;
442 
443  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
444  {
445  if (smgrsw[which].smgr_exists(rels[i], forknum))
446  smgrsw[which].smgr_immedsync(rels[i], forknum);
447  }
448  }
449 }
450 
451 /*
452  * smgrdounlinkall() -- Immediately unlink all forks of all given relations
453  *
454  * All forks of all given relations are removed from the store. This
455  * should not be used during transactional operations, since it can't be
456  * undone.
457  *
458  * If isRedo is true, it is okay for the underlying file(s) to be gone
459  * already.
460  */
461 void
462 smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
463 {
464  int i = 0;
465  RelFileLocatorBackend *rlocators;
466  ForkNumber forknum;
467 
468  if (nrels == 0)
469  return;
470 
471  /*
472  * Get rid of any remaining buffers for the relations. bufmgr will just
473  * drop them without bothering to write the contents.
474  */
475  DropRelationsAllBuffers(rels, nrels);
476 
477  /*
478  * create an array which contains all relations to be dropped, and close
479  * each relation's forks at the smgr level while at it
480  */
481  rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels);
482  for (i = 0; i < nrels; i++)
483  {
484  RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator;
485  int which = rels[i]->smgr_which;
486 
487  rlocators[i] = rlocator;
488 
489  /* Close the forks at smgr level */
490  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
491  smgrsw[which].smgr_close(rels[i], forknum);
492  }
493 
494  /*
495  * Send a shared-inval message to force other backends to close any
496  * dangling smgr references they may have for these rels. We should do
497  * this before starting the actual unlinking, in case we fail partway
498  * through that step. Note that the sinval messages will eventually come
499  * back to this backend, too, and thereby provide a backstop that we
500  * closed our own smgr rel.
501  */
502  for (i = 0; i < nrels; i++)
503  CacheInvalidateSmgr(rlocators[i]);
504 
505  /*
506  * Delete the physical file(s).
507  *
508  * Note: smgr_unlink must treat deletion failure as a WARNING, not an
509  * ERROR, because we've already decided to commit or abort the current
510  * xact.
511  */
512 
513  for (i = 0; i < nrels; i++)
514  {
515  int which = rels[i]->smgr_which;
516 
517  for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
518  smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
519  }
520 
521  pfree(rlocators);
522 }
523 
524 
525 /*
526  * smgrextend() -- Add a new block to a file.
527  *
528  * The semantics are nearly the same as smgrwrite(): write at the
529  * specified position. However, this is to be used for the case of
530  * extending a relation (i.e., blocknum is at or beyond the current
531  * EOF). Note that we assume writing a block beyond current EOF
532  * causes intervening file space to become filled with zeroes.
533  */
534 void
536  const void *buffer, bool skipFsync)
537 {
538  smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
539  buffer, skipFsync);
540 
541  /*
542  * Normally we expect this to increase nblocks by one, but if the cached
543  * value isn't as expected, just invalidate it so the next call asks the
544  * kernel.
545  */
546  if (reln->smgr_cached_nblocks[forknum] == blocknum)
547  reln->smgr_cached_nblocks[forknum] = blocknum + 1;
548  else
549  reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
550 }
551 
552 /*
553  * smgrzeroextend() -- Add new zeroed out blocks to a file.
554  *
555  * Similar to smgrextend(), except the relation can be extended by
556  * multiple blocks at once and the added blocks will be filled with
557  * zeroes.
558  */
559 void
561  int nblocks, bool skipFsync)
562 {
563  smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
564  nblocks, skipFsync);
565 
566  /*
567  * Normally we expect this to increase the fork size by nblocks, but if
568  * the cached value isn't as expected, just invalidate it so the next call
569  * asks the kernel.
570  */
571  if (reln->smgr_cached_nblocks[forknum] == blocknum)
572  reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
573  else
574  reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
575 }
576 
577 /*
578  * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
579  *
580  * In recovery only, this can return false to indicate that a file
581  * doesn't exist (presumably it has been dropped by a later WAL
582  * record).
583  */
584 bool
586  int nblocks)
587 {
588  return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
589 }
590 
591 /*
592  * smgrreadv() -- read a particular block range from a relation into the
593  * supplied buffers.
594  *
595  * This routine is called from the buffer manager in order to
596  * instantiate pages in the shared buffer cache. All storage managers
597  * return pages in the format that POSTGRES expects.
598  */
599 void
601  void **buffers, BlockNumber nblocks)
602 {
603  smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers,
604  nblocks);
605 }
606 
607 /*
608  * smgrwritev() -- Write the supplied buffers out.
609  *
610  * This is to be used only for updating already-existing blocks of a
611  * relation (ie, those before the current EOF). To extend a relation,
612  * use smgrextend().
613  *
614  * This is not a synchronous write -- the block is not necessarily
615  * on disk at return, only dumped out to the kernel. However,
616  * provisions will be made to fsync the write before the next checkpoint.
617  *
618  * NB: The mechanism to ensure fsync at next checkpoint assumes that there is
619  * something that prevents a concurrent checkpoint from "racing ahead" of the
620  * write. One way to prevent that is by holding a lock on the buffer; the
621  * buffer manager's writes are protected by that. The bulk writer facility
622  * in bulk_write.c checks the redo pointer and calls smgrimmedsync() if a
623  * checkpoint happened; that relies on the fact that no other backend can be
624  * concurrently modifying the page.
625  *
626  * skipFsync indicates that the caller will make other provisions to
627  * fsync the relation, so we needn't bother. Temporary relations also
628  * do not require fsync.
629  */
630 void
632  const void **buffers, BlockNumber nblocks, bool skipFsync)
633 {
634  smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum,
635  buffers, nblocks, skipFsync);
636 }
637 
638 /*
639  * smgrwriteback() -- Trigger kernel writeback for the supplied range of
640  * blocks.
641  */
642 void
644  BlockNumber nblocks)
645 {
646  smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
647  nblocks);
648 }
649 
650 /*
651  * smgrnblocks() -- Calculate the number of blocks in the
652  * supplied relation.
653  */
656 {
657  BlockNumber result;
658 
659  /* Check and return if we get the cached value for the number of blocks. */
660  result = smgrnblocks_cached(reln, forknum);
661  if (result != InvalidBlockNumber)
662  return result;
663 
664  result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
665 
666  reln->smgr_cached_nblocks[forknum] = result;
667 
668  return result;
669 }
670 
671 /*
672  * smgrnblocks_cached() -- Get the cached number of blocks in the supplied
673  * relation.
674  *
675  * Returns an InvalidBlockNumber when not in recovery and when the relation
676  * fork size is not cached.
677  */
680 {
681  /*
682  * For now, this function uses cached values only in recovery due to lack
683  * of a shared invalidation mechanism for changes in file size. Code
684  * elsewhere reads smgr_cached_nblocks and copes with stale data.
685  */
686  if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber)
687  return reln->smgr_cached_nblocks[forknum];
688 
689  return InvalidBlockNumber;
690 }
691 
692 /*
693  * smgrtruncate() -- Truncate the given forks of supplied relation to
694  * each specified numbers of blocks
695  *
696  * The truncation is done immediately, so this can't be rolled back.
697  *
698  * The caller must hold AccessExclusiveLock on the relation, to ensure that
699  * other backends receive the smgr invalidation event that this function sends
700  * before they access any forks of the relation again.
701  */
702 void
703 smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
704 {
705  int i;
706 
707  /*
708  * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
709  * just drop them without bothering to write the contents.
710  */
711  DropRelationBuffers(reln, forknum, nforks, nblocks);
712 
713  /*
714  * Send a shared-inval message to force other backends to close any smgr
715  * references they may have for this rel. This is useful because they
716  * might have open file pointers to segments that got removed, and/or
717  * smgr_targblock variables pointing past the new rel end. (The inval
718  * message will come back to our backend, too, causing a
719  * probably-unnecessary local smgr flush. But we don't expect that this
720  * is a performance-critical path.) As in the unlink code, we want to be
721  * sure the message is sent before we start changing things on-disk.
722  */
724 
725  /* Do the truncation */
726  for (i = 0; i < nforks; i++)
727  {
728  /* Make the cached size is invalid if we encounter an error. */
729  reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
730 
731  smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
732 
733  /*
734  * We might as well update the local smgr_cached_nblocks values. The
735  * smgr cache inval message that this function sent will cause other
736  * backends to invalidate their copies of smgr_fsm_nblocks and
737  * smgr_vm_nblocks, and these ones too at the next command boundary.
738  * But these ensure they aren't outright wrong until then.
739  */
740  reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
741  }
742 }
743 
744 /*
745  * smgrregistersync() -- Request a relation to be sync'd at next checkpoint
746  *
747  * This can be used after calling smgrwrite() or smgrextend() with skipFsync =
748  * true, to register the fsyncs that were skipped earlier.
749  *
750  * Note: be mindful that a checkpoint could already have happened between the
751  * smgrwrite or smgrextend calls and this! In that case, the checkpoint
752  * already missed fsyncing this relation, and you should use smgrimmedsync
753  * instead. Most callers should use the bulk loading facility in bulk_write.c
754  * which handles all that.
755  */
756 void
758 {
759  smgrsw[reln->smgr_which].smgr_registersync(reln, forknum);
760 }
761 
762 /*
763  * smgrimmedsync() -- Force the specified relation to stable storage.
764  *
765  * Synchronously force all previous writes to the specified relation
766  * down to disk.
767  *
768  * This is useful for building completely new relations (eg, new
769  * indexes). Instead of incrementally WAL-logging the index build
770  * steps, we can just write completed index pages to disk with smgrwrite
771  * or smgrextend, and then fsync the completed index file before
772  * committing the transaction. (This is sufficient for purposes of
773  * crash recovery, since it effectively duplicates forcing a checkpoint
774  * for the completed index. But it is *not* sufficient if one wishes
775  * to use the WAL log for PITR or replication purposes: in that case
776  * we have to make WAL entries as well.)
777  *
778  * The preceding writes should specify skipFsync = true to avoid
779  * duplicative fsyncs.
780  *
781  * Note that you need to do FlushRelationBuffers() first if there is
782  * any possibility that there are dirty buffers for the relation;
783  * otherwise the sync is not very meaningful.
784  *
785  * Most callers should use the bulk loading facility in bulk_write.c
786  * instead of calling this directly.
787  */
788 void
790 {
791  smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
792 }
793 
794 /*
795  * AtEOXact_SMgr
796  *
797  * This routine is called during transaction commit or abort (it doesn't
798  * particularly care which). All unpinned SMgrRelation objects are destroyed.
799  *
800  * We do this as a compromise between wanting transient SMgrRelations to
801  * live awhile (to amortize the costs of blind writes of multiple blocks)
802  * and needing them to not live forever (since we're probably holding open
803  * a kernel file descriptor for the underlying file, and we need to ensure
804  * that gets closed reasonably soon if the file gets deleted).
805  */
806 void
808 {
809  smgrdestroyall();
810 }
811 
812 /*
813  * This routine is called when we are ordered to release all open files by a
814  * ProcSignalBarrier.
815  */
816 bool
818 {
819  smgrreleaseall();
820  return true;
821 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
void FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
Definition: bufmgr.c:4533
void DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Definition: bufmgr.c:3974
void DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
Definition: bufmgr.c:4097
#define Assert(condition)
Definition: c.h:858
unsigned char bool
Definition: c.h:456
#define lengthof(array)
Definition: c.h:788
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1395
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
static void dlist_init(dlist_head *head)
Definition: ilist.h:314
static void dlist_delete(dlist_node *node)
Definition: ilist.h:405
#define dlist_foreach_modify(iter, lhead)
Definition: ilist.h:640
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition: ilist.h:364
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593
void CacheInvalidateSmgr(RelFileLocatorBackend rlocator)
Definition: inval.c:1459
void on_proc_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:309
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc(Size size)
Definition: mcxt.c:1316
void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:307
void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
Definition: md.c:928
bool mdexists(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:171
void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: md.c:810
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1089
void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: md.c:190
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: md.c:460
void mdinit(void)
Definition: md.c:158
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:691
void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: md.c:525
void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: md.c:1030
bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: md.c:714
void mdregistersync(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1240
void mdopen(SMgrRelation reln)
Definition: md.c:680
void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
Definition: md.c:1146
void mdimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1291
void * arg
uintptr_t Datum
Definition: postgres.h:64
int ProcNumber
Definition: procnumber.h:24
tree ctl
Definition: radixtree.h:1847
struct RelFileLocatorBackend RelFileLocatorBackend
ForkNumber
Definition: relpath.h:48
#define MAX_FORKNUM
Definition: relpath.h:62
#define RelFileNumberIsValid(relnumber)
Definition: relpath.h:27
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:655
static HTAB * SMgrRelationHash
Definition: smgr.c:136
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:643
void smgrrelease(SMgrRelation reln)
Definition: smgr.c:300
static void smgrdestroy(SMgrRelation reln)
Definition: smgr.c:277
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:198
void smgrinit(void)
Definition: smgr.c:154
void smgrdestroyall(void)
Definition: smgr.c:332
void smgrreleaseall(void)
Definition: smgr.c:353
static dlist_head unpinned_relns
Definition: smgr.c:138
void smgrpin(SMgrRelation reln)
Definition: smgr.c:250
void smgrunpin(SMgrRelation reln)
Definition: smgr.c:265
void smgrdosyncall(SMgrRelation *rels, int nrels)
Definition: smgr.c:426
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:789
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:411
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:679
void smgrclose(SMgrRelation reln)
Definition: smgr.c:320
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:560
static const int NSmgr
Definition: smgr.c:130
bool ProcessBarrierSmgrRelease(void)
Definition: smgr.c:817
void AtEOXact_SMgr(void)
Definition: smgr.c:807
static void smgrshutdown(int code, Datum arg)
Definition: smgr.c:172
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:535
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
Definition: smgr.c:703
void smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
Definition: smgr.c:631
void smgrreleaserellocator(RelFileLocatorBackend rlocator)
Definition: smgr.c:379
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:398
void smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: smgr.c:600
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:462
void smgrregistersync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:757
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:585
static const f_smgr smgrsw[]
Definition: smgr.c:107
struct f_smgr f_smgr
struct SMgrRelationData SMgrRelationData
SMgrRelationData * SMgrRelation
Definition: smgr.h:71
Definition: dynahash.c:220
RelFileLocator locator
RelFileNumber relNumber
BlockNumber smgr_targblock
Definition: smgr.h:45
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37
dlist_node node
Definition: smgr.h:68
int smgr_which
Definition: smgr.h:54
int pincount
Definition: smgr.h:67
dlist_node * cur
Definition: ilist.h:200
Definition: smgr.c:75
bool(* smgr_prefetch)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:89
void(* smgr_writeback)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:98
void(* smgr_extend)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:85
void(* smgr_create)(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:80
BlockNumber(* smgr_nblocks)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:100
void(* smgr_registersync)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:104
void(* smgr_immedsync)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:103
void(* smgr_zeroextend)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:87
void(* smgr_readv)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: smgr.c:91
void(* smgr_unlink)(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: smgr.c:83
void(* smgr_open)(SMgrRelation reln)
Definition: smgr.c:78
void(* smgr_shutdown)(void)
Definition: smgr.c:77
void(* smgr_writev)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
Definition: smgr.c:94
void(* smgr_init)(void)
Definition: smgr.c:76
bool(* smgr_exists)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:82
void(* smgr_truncate)(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
Definition: smgr.c:101
void(* smgr_close)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:79
bool InRecovery
Definition: xlogutils.c:50