PostgreSQL Source Code git master
smgr.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * smgr.c
4 * public interface routines to storage manager switch.
5 *
6 * All file system operations on relations dispatch through these routines.
7 * An SMgrRelation represents physical on-disk relation files that are open
8 * for reading and writing.
9 *
10 * When a relation is first accessed through the relation cache, the
11 * corresponding SMgrRelation entry is opened by calling smgropen(), and the
12 * reference is stored in the relation cache entry.
13 *
14 * Accesses that don't go through the relation cache open the SMgrRelation
15 * directly. That includes flushing buffers from the buffer cache, as well as
16 * all accesses in auxiliary processes like the checkpointer or the WAL redo
17 * in the startup process.
18 *
19 * Operations like CREATE, DROP, ALTER TABLE also hold SMgrRelation references
20 * independent of the relation cache. They need to prepare the physical files
21 * before updating the relation cache.
22 *
23 * There is a hash table that holds all the SMgrRelation entries in the
24 * backend. If you call smgropen() twice for the same rel locator, you get a
25 * reference to the same SMgrRelation. The reference is valid until the end of
26 * transaction. This makes repeated access to the same relation efficient,
27 * and allows caching things like the relation size in the SMgrRelation entry.
28 *
29 * At end of transaction, all SMgrRelation entries that haven't been pinned
30 * are removed. An SMgrRelation can hold kernel file system descriptors for
31 * the underlying files, and we'd like to close those reasonably soon if the
32 * file gets deleted. The SMgrRelations references held by the relcache are
33 * pinned to prevent them from being closed.
34 *
35 * There is another mechanism to close file descriptors early:
36 * PROCSIGNAL_BARRIER_SMGRRELEASE. It is a request to immediately close all
37 * file descriptors. Upon receiving that signal, the backend closes all file
38 * descriptors held open by SMgrRelations, but because it can happen in the
39 * middle of a transaction, we cannot destroy the SMgrRelation objects
40 * themselves, as there could pointers to them in active use. See
41 * smgrrelease() and smgrreleaseall().
42 *
43 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
44 * Portions Copyright (c) 1994, Regents of the University of California
45 *
46 *
47 * IDENTIFICATION
48 * src/backend/storage/smgr/smgr.c
49 *
50 *-------------------------------------------------------------------------
51 */
52#include "postgres.h"
53
54#include "access/xlogutils.h"
55#include "lib/ilist.h"
56#include "storage/bufmgr.h"
57#include "storage/ipc.h"
58#include "storage/md.h"
59#include "storage/smgr.h"
60#include "utils/hsearch.h"
61#include "utils/inval.h"
62
63
64/*
65 * This struct of function pointers defines the API between smgr.c and
66 * any individual storage manager module. Note that smgr subfunctions are
67 * generally expected to report problems via elog(ERROR). An exception is
68 * that smgr_unlink should use elog(WARNING), rather than erroring out,
69 * because we normally unlink relations during post-commit/abort cleanup,
70 * and so it's too late to raise an error. Also, various conditions that
71 * would normally be errors should be allowed during bootstrap and/or WAL
72 * recovery --- see comments in md.c for details.
73 */
74typedef struct f_smgr
75{
76 void (*smgr_init) (void); /* may be NULL */
77 void (*smgr_shutdown) (void); /* may be NULL */
78 void (*smgr_open) (SMgrRelation reln);
79 void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
80 void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
81 bool isRedo);
82 bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
83 void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
84 bool isRedo);
85 void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
86 BlockNumber blocknum, const void *buffer, bool skipFsync);
87 void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
88 BlockNumber blocknum, int nblocks, bool skipFsync);
89 bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
90 BlockNumber blocknum, int nblocks);
92 BlockNumber blocknum);
93 void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
94 BlockNumber blocknum,
95 void **buffers, BlockNumber nblocks);
96 void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum,
97 BlockNumber blocknum,
98 const void **buffers, BlockNumber nblocks,
99 bool skipFsync);
100 void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
101 BlockNumber blocknum, BlockNumber nblocks);
103 void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
104 BlockNumber old_blocks, BlockNumber nblocks);
105 void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
108
109static const f_smgr smgrsw[] = {
110 /* magnetic disk */
111 {
112 .smgr_init = mdinit,
113 .smgr_shutdown = NULL,
114 .smgr_open = mdopen,
115 .smgr_close = mdclose,
116 .smgr_create = mdcreate,
117 .smgr_exists = mdexists,
118 .smgr_unlink = mdunlink,
119 .smgr_extend = mdextend,
120 .smgr_zeroextend = mdzeroextend,
121 .smgr_prefetch = mdprefetch,
122 .smgr_maxcombine = mdmaxcombine,
123 .smgr_readv = mdreadv,
124 .smgr_writev = mdwritev,
125 .smgr_writeback = mdwriteback,
126 .smgr_nblocks = mdnblocks,
127 .smgr_truncate = mdtruncate,
128 .smgr_immedsync = mdimmedsync,
129 .smgr_registersync = mdregistersync,
130 }
131};
132
133static const int NSmgr = lengthof(smgrsw);
134
135/*
136 * Each backend has a hashtable that stores all extant SMgrRelation objects.
137 * In addition, "unpinned" SMgrRelation objects are chained together in a list.
138 */
139static HTAB *SMgrRelationHash = NULL;
140
142
143/* local function prototypes */
144static void smgrshutdown(int code, Datum arg);
145static void smgrdestroy(SMgrRelation reln);
146
147
148/*
149 * smgrinit(), smgrshutdown() -- Initialize or shut down storage
150 * managers.
151 *
152 * Note: smgrinit is called during backend startup (normal or standalone
153 * case), *not* during postmaster start. Therefore, any resources created
154 * here or destroyed in smgrshutdown are backend-local.
155 */
156void
158{
159 int i;
160
161 for (i = 0; i < NSmgr; i++)
162 {
163 if (smgrsw[i].smgr_init)
164 smgrsw[i].smgr_init();
165 }
166
167 /* register the shutdown proc */
169}
170
171/*
172 * on_proc_exit hook for smgr cleanup during backend shutdown
173 */
174static void
176{
177 int i;
178
179 for (i = 0; i < NSmgr; i++)
180 {
181 if (smgrsw[i].smgr_shutdown)
183 }
184}
185
186/*
187 * smgropen() -- Return an SMgrRelation object, creating it if need be.
188 *
189 * In versions of PostgreSQL prior to 17, this function returned an object
190 * with no defined lifetime. Now, however, the object remains valid for the
191 * lifetime of the transaction, up to the point where AtEOXact_SMgr() is
192 * called, making it much easier for callers to know for how long they can
193 * hold on to a pointer to the returned object. If this function is called
194 * outside of a transaction, the object remains valid until smgrdestroy() or
195 * smgrdestroyall() is called. Background processes that use smgr but not
196 * transactions typically do this once per checkpoint cycle.
197 *
198 * This does not attempt to actually open the underlying files.
199 */
202{
203 RelFileLocatorBackend brlocator;
204 SMgrRelation reln;
205 bool found;
206
208
209 if (SMgrRelationHash == NULL)
210 {
211 /* First time through: initialize the hash table */
212 HASHCTL ctl;
213
214 ctl.keysize = sizeof(RelFileLocatorBackend);
215 ctl.entrysize = sizeof(SMgrRelationData);
216 SMgrRelationHash = hash_create("smgr relation table", 400,
219 }
220
221 /* Look up or create an entry */
222 brlocator.locator = rlocator;
223 brlocator.backend = backend;
225 &brlocator,
226 HASH_ENTER, &found);
227
228 /* Initialize it if not present before */
229 if (!found)
230 {
231 /* hash_search already filled in the lookup key */
233 for (int i = 0; i <= MAX_FORKNUM; ++i)
235 reln->smgr_which = 0; /* we only have md.c at present */
236
237 /* implementation-specific initialization */
238 smgrsw[reln->smgr_which].smgr_open(reln);
239
240 /* it is not pinned yet */
241 reln->pincount = 0;
243 }
244
245 return reln;
246}
247
248/*
249 * smgrpin() -- Prevent an SMgrRelation object from being destroyed at end of
250 * transaction
251 */
252void
254{
255 if (reln->pincount == 0)
256 dlist_delete(&reln->node);
257 reln->pincount++;
258}
259
260/*
261 * smgrunpin() -- Allow an SMgrRelation object to be destroyed at end of
262 * transaction
263 *
264 * The object remains valid, but if there are no other pins on it, it is moved
265 * to the unpinned list where it will be destroyed by AtEOXact_SMgr().
266 */
267void
269{
270 Assert(reln->pincount > 0);
271 reln->pincount--;
272 if (reln->pincount == 0)
274}
275
276/*
277 * smgrdestroy() -- Delete an SMgrRelation object.
278 */
279static void
281{
282 ForkNumber forknum;
283
284 Assert(reln->pincount == 0);
285
286 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
287 smgrsw[reln->smgr_which].smgr_close(reln, forknum);
288
289 dlist_delete(&reln->node);
290
292 &(reln->smgr_rlocator),
293 HASH_REMOVE, NULL) == NULL)
294 elog(ERROR, "SMgrRelation hashtable corrupted");
295}
296
297/*
298 * smgrrelease() -- Release all resources used by this object.
299 *
300 * The object remains valid.
301 */
302void
304{
305 for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
306 {
307 smgrsw[reln->smgr_which].smgr_close(reln, forknum);
309 }
311}
312
313/*
314 * smgrclose() -- Close an SMgrRelation object.
315 *
316 * The SMgrRelation reference should not be used after this call. However,
317 * because we don't keep track of the references returned by smgropen(), we
318 * don't know if there are other references still pointing to the same object,
319 * so we cannot remove the SMgrRelation object yet. Therefore, this is just a
320 * synonym for smgrrelease() at the moment.
321 */
322void
324{
325 smgrrelease(reln);
326}
327
328/*
329 * smgrdestroyall() -- Release resources used by all unpinned objects.
330 *
331 * It must be known that there are no pointers to SMgrRelations, other than
332 * those pinned with smgrpin().
333 */
334void
336{
338
339 /*
340 * Zap all unpinned SMgrRelations. We rely on smgrdestroy() to remove
341 * each one from the list.
342 */
344 {
346 iter.cur);
347
348 smgrdestroy(rel);
349 }
350}
351
352/*
353 * smgrreleaseall() -- Release resources used by all objects.
354 */
355void
357{
358 HASH_SEQ_STATUS status;
359 SMgrRelation reln;
360
361 /* Nothing to do if hashtable not set up */
362 if (SMgrRelationHash == NULL)
363 return;
364
366
367 while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
368 {
369 smgrrelease(reln);
370 }
371}
372
373/*
374 * smgrreleaserellocator() -- Release resources for given RelFileLocator, if
375 * it's open.
376 *
377 * This has the same effects as smgrrelease(smgropen(rlocator)), but avoids
378 * uselessly creating a hashtable entry only to drop it again when no
379 * such entry exists already.
380 */
381void
383{
384 SMgrRelation reln;
385
386 /* Nothing to do if hashtable not set up */
387 if (SMgrRelationHash == NULL)
388 return;
389
391 &rlocator,
392 HASH_FIND, NULL);
393 if (reln != NULL)
394 smgrrelease(reln);
395}
396
397/*
398 * smgrexists() -- Does the underlying file for a fork exist?
399 */
400bool
402{
403 return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
404}
405
406/*
407 * smgrcreate() -- Create a new relation.
408 *
409 * Given an already-created (but presumably unused) SMgrRelation,
410 * cause the underlying disk file or other storage for the fork
411 * to be created.
412 */
413void
414smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
415{
416 smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
417}
418
419/*
420 * smgrdosyncall() -- Immediately sync all forks of all given relations
421 *
422 * All forks of all given relations are synced out to the store.
423 *
424 * This is equivalent to FlushRelationBuffers() for each smgr relation,
425 * then calling smgrimmedsync() for all forks of each relation, but it's
426 * significantly quicker so should be preferred when possible.
427 */
428void
430{
431 int i = 0;
432 ForkNumber forknum;
433
434 if (nrels == 0)
435 return;
436
437 FlushRelationsAllBuffers(rels, nrels);
438
439 /*
440 * Sync the physical file(s).
441 */
442 for (i = 0; i < nrels; i++)
443 {
444 int which = rels[i]->smgr_which;
445
446 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
447 {
448 if (smgrsw[which].smgr_exists(rels[i], forknum))
449 smgrsw[which].smgr_immedsync(rels[i], forknum);
450 }
451 }
452}
453
454/*
455 * smgrdounlinkall() -- Immediately unlink all forks of all given relations
456 *
457 * All forks of all given relations are removed from the store. This
458 * should not be used during transactional operations, since it can't be
459 * undone.
460 *
461 * If isRedo is true, it is okay for the underlying file(s) to be gone
462 * already.
463 */
464void
465smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
466{
467 int i = 0;
468 RelFileLocatorBackend *rlocators;
469 ForkNumber forknum;
470
471 if (nrels == 0)
472 return;
473
474 /*
475 * Get rid of any remaining buffers for the relations. bufmgr will just
476 * drop them without bothering to write the contents.
477 */
478 DropRelationsAllBuffers(rels, nrels);
479
480 /*
481 * create an array which contains all relations to be dropped, and close
482 * each relation's forks at the smgr level while at it
483 */
484 rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels);
485 for (i = 0; i < nrels; i++)
486 {
487 RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator;
488 int which = rels[i]->smgr_which;
489
490 rlocators[i] = rlocator;
491
492 /* Close the forks at smgr level */
493 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
494 smgrsw[which].smgr_close(rels[i], forknum);
495 }
496
497 /*
498 * Send a shared-inval message to force other backends to close any
499 * dangling smgr references they may have for these rels. We should do
500 * this before starting the actual unlinking, in case we fail partway
501 * through that step. Note that the sinval messages will eventually come
502 * back to this backend, too, and thereby provide a backstop that we
503 * closed our own smgr rel.
504 */
505 for (i = 0; i < nrels; i++)
506 CacheInvalidateSmgr(rlocators[i]);
507
508 /*
509 * Delete the physical file(s).
510 *
511 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
512 * ERROR, because we've already decided to commit or abort the current
513 * xact.
514 */
515
516 for (i = 0; i < nrels; i++)
517 {
518 int which = rels[i]->smgr_which;
519
520 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
521 smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
522 }
523
524 pfree(rlocators);
525}
526
527
528/*
529 * smgrextend() -- Add a new block to a file.
530 *
531 * The semantics are nearly the same as smgrwrite(): write at the
532 * specified position. However, this is to be used for the case of
533 * extending a relation (i.e., blocknum is at or beyond the current
534 * EOF). Note that we assume writing a block beyond current EOF
535 * causes intervening file space to become filled with zeroes.
536 */
537void
539 const void *buffer, bool skipFsync)
540{
541 smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
542 buffer, skipFsync);
543
544 /*
545 * Normally we expect this to increase nblocks by one, but if the cached
546 * value isn't as expected, just invalidate it so the next call asks the
547 * kernel.
548 */
549 if (reln->smgr_cached_nblocks[forknum] == blocknum)
550 reln->smgr_cached_nblocks[forknum] = blocknum + 1;
551 else
553}
554
555/*
556 * smgrzeroextend() -- Add new zeroed out blocks to a file.
557 *
558 * Similar to smgrextend(), except the relation can be extended by
559 * multiple blocks at once and the added blocks will be filled with
560 * zeroes.
561 */
562void
564 int nblocks, bool skipFsync)
565{
566 smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
567 nblocks, skipFsync);
568
569 /*
570 * Normally we expect this to increase the fork size by nblocks, but if
571 * the cached value isn't as expected, just invalidate it so the next call
572 * asks the kernel.
573 */
574 if (reln->smgr_cached_nblocks[forknum] == blocknum)
575 reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
576 else
578}
579
580/*
581 * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
582 *
583 * In recovery only, this can return false to indicate that a file
584 * doesn't exist (presumably it has been dropped by a later WAL
585 * record).
586 */
587bool
589 int nblocks)
590{
591 return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
592}
593
594/*
595 * smgrmaxcombine() - Return the maximum number of total blocks that can be
596 * combined with an IO starting at blocknum.
597 *
598 * The returned value includes the IO for blocknum itself.
599 */
600uint32
602 BlockNumber blocknum)
603{
604 return smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum);
605}
606
607/*
608 * smgrreadv() -- read a particular block range from a relation into the
609 * supplied buffers.
610 *
611 * This routine is called from the buffer manager in order to
612 * instantiate pages in the shared buffer cache. All storage managers
613 * return pages in the format that POSTGRES expects.
614 *
615 * If more than one block is intended to be read, callers need to use
616 * smgrmaxcombine() to check how many blocks can be combined into one IO.
617 */
618void
620 void **buffers, BlockNumber nblocks)
621{
622 smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers,
623 nblocks);
624}
625
626/*
627 * smgrwritev() -- Write the supplied buffers out.
628 *
629 * This is to be used only for updating already-existing blocks of a
630 * relation (ie, those before the current EOF). To extend a relation,
631 * use smgrextend().
632 *
633 * This is not a synchronous write -- the block is not necessarily
634 * on disk at return, only dumped out to the kernel. However,
635 * provisions will be made to fsync the write before the next checkpoint.
636 *
637 * NB: The mechanism to ensure fsync at next checkpoint assumes that there is
638 * something that prevents a concurrent checkpoint from "racing ahead" of the
639 * write. One way to prevent that is by holding a lock on the buffer; the
640 * buffer manager's writes are protected by that. The bulk writer facility
641 * in bulk_write.c checks the redo pointer and calls smgrimmedsync() if a
642 * checkpoint happened; that relies on the fact that no other backend can be
643 * concurrently modifying the page.
644 *
645 * skipFsync indicates that the caller will make other provisions to
646 * fsync the relation, so we needn't bother. Temporary relations also
647 * do not require fsync.
648 *
649 * If more than one block is intended to be read, callers need to use
650 * smgrmaxcombine() to check how many blocks can be combined into one IO.
651 */
652void
654 const void **buffers, BlockNumber nblocks, bool skipFsync)
655{
656 smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum,
657 buffers, nblocks, skipFsync);
658}
659
660/*
661 * smgrwriteback() -- Trigger kernel writeback for the supplied range of
662 * blocks.
663 */
664void
666 BlockNumber nblocks)
667{
668 smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
669 nblocks);
670}
671
672/*
673 * smgrnblocks() -- Calculate the number of blocks in the
674 * supplied relation.
675 */
678{
679 BlockNumber result;
680
681 /* Check and return if we get the cached value for the number of blocks. */
682 result = smgrnblocks_cached(reln, forknum);
683 if (result != InvalidBlockNumber)
684 return result;
685
686 result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
687
688 reln->smgr_cached_nblocks[forknum] = result;
689
690 return result;
691}
692
693/*
694 * smgrnblocks_cached() -- Get the cached number of blocks in the supplied
695 * relation.
696 *
697 * Returns an InvalidBlockNumber when not in recovery and when the relation
698 * fork size is not cached.
699 */
702{
703 /*
704 * For now, this function uses cached values only in recovery due to lack
705 * of a shared invalidation mechanism for changes in file size. Code
706 * elsewhere reads smgr_cached_nblocks and copes with stale data.
707 */
708 if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber)
709 return reln->smgr_cached_nblocks[forknum];
710
711 return InvalidBlockNumber;
712}
713
714/*
715 * smgrtruncate() -- Truncate the given forks of supplied relation to
716 * each specified numbers of blocks
717 *
718 * The truncation is done immediately, so this can't be rolled back.
719 *
720 * The caller must hold AccessExclusiveLock on the relation, to ensure that
721 * other backends receive the smgr invalidation event that this function sends
722 * before they access any forks of the relation again. The current size of
723 * the forks should be provided in old_nblocks. This function should normally
724 * be called in a critical section, but the current size must be checked
725 * outside the critical section, and no interrupts or smgr functions relating
726 * to this relation should be called in between.
727 */
728void
729smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks,
730 BlockNumber *old_nblocks, BlockNumber *nblocks)
731{
732 int i;
733
734 /*
735 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
736 * just drop them without bothering to write the contents.
737 */
738 DropRelationBuffers(reln, forknum, nforks, nblocks);
739
740 /*
741 * Send a shared-inval message to force other backends to close any smgr
742 * references they may have for this rel. This is useful because they
743 * might have open file pointers to segments that got removed, and/or
744 * smgr_targblock variables pointing past the new rel end. (The inval
745 * message will come back to our backend, too, causing a
746 * probably-unnecessary local smgr flush. But we don't expect that this
747 * is a performance-critical path.) As in the unlink code, we want to be
748 * sure the message is sent before we start changing things on-disk.
749 */
751
752 /* Do the truncation */
753 for (i = 0; i < nforks; i++)
754 {
755 /* Make the cached size is invalid if we encounter an error. */
756 reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
757
758 smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i],
759 old_nblocks[i], nblocks[i]);
760
761 /*
762 * We might as well update the local smgr_cached_nblocks values. The
763 * smgr cache inval message that this function sent will cause other
764 * backends to invalidate their copies of smgr_cached_nblocks, and
765 * these ones too at the next command boundary. But ensure they aren't
766 * outright wrong until then.
767 */
768 reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
769 }
770}
771
772/*
773 * smgrregistersync() -- Request a relation to be sync'd at next checkpoint
774 *
775 * This can be used after calling smgrwrite() or smgrextend() with skipFsync =
776 * true, to register the fsyncs that were skipped earlier.
777 *
778 * Note: be mindful that a checkpoint could already have happened between the
779 * smgrwrite or smgrextend calls and this! In that case, the checkpoint
780 * already missed fsyncing this relation, and you should use smgrimmedsync
781 * instead. Most callers should use the bulk loading facility in bulk_write.c
782 * which handles all that.
783 */
784void
786{
787 smgrsw[reln->smgr_which].smgr_registersync(reln, forknum);
788}
789
790/*
791 * smgrimmedsync() -- Force the specified relation to stable storage.
792 *
793 * Synchronously force all previous writes to the specified relation
794 * down to disk.
795 *
796 * This is useful for building completely new relations (eg, new
797 * indexes). Instead of incrementally WAL-logging the index build
798 * steps, we can just write completed index pages to disk with smgrwrite
799 * or smgrextend, and then fsync the completed index file before
800 * committing the transaction. (This is sufficient for purposes of
801 * crash recovery, since it effectively duplicates forcing a checkpoint
802 * for the completed index. But it is *not* sufficient if one wishes
803 * to use the WAL log for PITR or replication purposes: in that case
804 * we have to make WAL entries as well.)
805 *
806 * The preceding writes should specify skipFsync = true to avoid
807 * duplicative fsyncs.
808 *
809 * Note that you need to do FlushRelationBuffers() first if there is
810 * any possibility that there are dirty buffers for the relation;
811 * otherwise the sync is not very meaningful.
812 *
813 * Most callers should use the bulk loading facility in bulk_write.c
814 * instead of calling this directly.
815 */
816void
818{
819 smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
820}
821
822/*
823 * AtEOXact_SMgr
824 *
825 * This routine is called during transaction commit or abort (it doesn't
826 * particularly care which). All unpinned SMgrRelation objects are destroyed.
827 *
828 * We do this as a compromise between wanting transient SMgrRelations to
829 * live awhile (to amortize the costs of blind writes of multiple blocks)
830 * and needing them to not live forever (since we're probably holding open
831 * a kernel file descriptor for the underlying file, and we need to ensure
832 * that gets closed reasonably soon if the file gets deleted).
833 */
834void
836{
838}
839
840/*
841 * This routine is called when we are ordered to release all open files by a
842 * ProcSignalBarrier.
843 */
844bool
846{
848 return true;
849}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
void FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
Definition: bufmgr.c:4532
void DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Definition: bufmgr.c:4031
void DropRelationsAllBuffers(SMgrRelation *smgr_reln, int nlocators)
Definition: bufmgr.c:4154
#define Assert(condition)
Definition: c.h:815
uint32_t uint32
Definition: c.h:488
#define lengthof(array)
Definition: c.h:745
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
static void dlist_init(dlist_head *head)
Definition: ilist.h:314
static void dlist_delete(dlist_node *node)
Definition: ilist.h:405
#define dlist_foreach_modify(iter, lhead)
Definition: ilist.h:640
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition: ilist.h:364
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593
void CacheInvalidateSmgr(RelFileLocatorBackend rlocator)
Definition: inval.c:1650
void on_proc_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:309
int i
Definition: isn.c:72
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: md.c:299
void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks)
Definition: md.c:1166
void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
Definition: md.c:938
bool mdexists(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:163
void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: md.c:817
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1102
void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: md.c:182
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: md.c:452
void mdinit(void)
Definition: md.c:150
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:683
void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: md.c:517
void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: md.c:1043
uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: md.c:803
bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: md.c:706
void mdregistersync(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1255
void mdopen(SMgrRelation reln)
Definition: md.c:672
void mdimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: md.c:1306
void * arg
uintptr_t Datum
Definition: postgres.h:69
int ProcNumber
Definition: procnumber.h:24
tree ctl
Definition: radixtree.h:1838
struct RelFileLocatorBackend RelFileLocatorBackend
ForkNumber
Definition: relpath.h:56
#define MAX_FORKNUM
Definition: relpath.h:70
#define RelFileNumberIsValid(relnumber)
Definition: relpath.h:27
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:677
static HTAB * SMgrRelationHash
Definition: smgr.c:139
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:665
void smgrrelease(SMgrRelation reln)
Definition: smgr.c:303
static void smgrdestroy(SMgrRelation reln)
Definition: smgr.c:280
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:201
void smgrinit(void)
Definition: smgr.c:157
void smgrdestroyall(void)
Definition: smgr.c:335
void smgrreleaseall(void)
Definition: smgr.c:356
static dlist_head unpinned_relns
Definition: smgr.c:141
void smgrpin(SMgrRelation reln)
Definition: smgr.c:253
void smgrunpin(SMgrRelation reln)
Definition: smgr.c:268
void smgrdosyncall(SMgrRelation *rels, int nrels)
Definition: smgr.c:429
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:817
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:414
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:701
void smgrclose(SMgrRelation reln)
Definition: smgr.c:323
uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:601
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:563
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *old_nblocks, BlockNumber *nblocks)
Definition: smgr.c:729
static const int NSmgr
Definition: smgr.c:133
bool ProcessBarrierSmgrRelease(void)
Definition: smgr.c:845
void AtEOXact_SMgr(void)
Definition: smgr.c:835
static void smgrshutdown(int code, Datum arg)
Definition: smgr.c:175
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:538
void smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
Definition: smgr.c:653
void smgrreleaserellocator(RelFileLocatorBackend rlocator)
Definition: smgr.c:382
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:401
void smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: smgr.c:619
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition: smgr.c:465
void smgrregistersync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:785
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:588
static const f_smgr smgrsw[]
Definition: smgr.c:109
struct f_smgr f_smgr
struct SMgrRelationData SMgrRelationData
SMgrRelationData * SMgrRelation
Definition: smgr.h:71
Definition: dynahash.c:220
RelFileLocator locator
RelFileNumber relNumber
BlockNumber smgr_targblock
Definition: smgr.h:45
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37
dlist_node node
Definition: smgr.h:68
int smgr_which
Definition: smgr.h:54
int pincount
Definition: smgr.h:67
dlist_node * cur
Definition: ilist.h:200
Definition: smgr.c:75
bool(* smgr_prefetch)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:89
void(* smgr_writeback)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:100
void(* smgr_extend)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:85
void(* smgr_create)(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:80
BlockNumber(* smgr_nblocks)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:102
void(* smgr_truncate)(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)
Definition: smgr.c:103
uint32(* smgr_maxcombine)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:91
void(* smgr_registersync)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:106
void(* smgr_immedsync)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:105
void(* smgr_zeroextend)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:87
void(* smgr_readv)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: smgr.c:93
void(* smgr_unlink)(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition: smgr.c:83
void(* smgr_open)(SMgrRelation reln)
Definition: smgr.c:78
void(* smgr_shutdown)(void)
Definition: smgr.c:77
void(* smgr_writev)(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
Definition: smgr.c:96
void(* smgr_init)(void)
Definition: smgr.c:76
bool(* smgr_exists)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:82
void(* smgr_close)(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:79
bool InRecovery
Definition: xlogutils.c:50