PostgreSQL Source Code git master
Loading...
Searching...
No Matches
freespace.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * freespace.c
4 * POSTGRES free space map for quickly finding free space in relations
5 *
6 *
7 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/storage/freespace/freespace.c
12 *
13 *
14 * NOTES:
15 *
16 * Free Space Map keeps track of the amount of free space on pages, and
17 * allows quickly searching for a page with enough free space. The FSM is
18 * stored in a dedicated relation fork of all heap relations, and those
19 * index access methods that need it (see also indexfsm.c). See README for
20 * more information.
21 *
22 *-------------------------------------------------------------------------
23 */
24#include "postgres.h"
25
26#include "access/htup_details.h"
27#include "access/xloginsert.h"
28#include "access/xlogutils.h"
29#include "miscadmin.h"
30#include "storage/freespace.h"
32#include "storage/smgr.h"
33#include "utils/rel.h"
34
35
36/*
37 * We use just one byte to store the amount of free space on a page, so we
38 * divide the amount of free space a page can have into 256 different
39 * categories. The highest category, 255, represents a page with at least
40 * MaxFSMRequestSize bytes of free space, and the second highest category
41 * represents the range from 254 * FSM_CAT_STEP, inclusive, to
42 * MaxFSMRequestSize, exclusive.
43 *
44 * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
45 * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
46 * categories look like this:
47 *
48 *
49 * Range Category
50 * 0 - 31 0
51 * 32 - 63 1
52 * ... ... ...
53 * 8096 - 8127 253
54 * 8128 - 8163 254
55 * 8164 - 8192 255
56 *
57 * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
58 * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
59 * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
60 * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
61 * completely empty page, that would mean that we could never satisfy a
62 * request of exactly MaxFSMRequestSize bytes.
63 */
64#define FSM_CATEGORIES 256
65#define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES)
66#define MaxFSMRequestSize MaxHeapTupleSize
67
68/*
69 * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
70 * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
71 * 256 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
72 * this means that 4096 bytes is the smallest BLCKSZ that we can get away
73 * with a 3-level tree, and 512 is the smallest we support.
74 */
75#define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
76
77#define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
78#define FSM_BOTTOM_LEVEL 0
79
80/*
81 * The internal FSM routines work on a logical addressing scheme. Each
82 * level of the tree can be thought of as a separately addressable file.
83 */
84typedef struct
85{
86 int level; /* level */
87 int logpageno; /* page number within the level */
89
90/* Address of the root page. */
92
93/* functions to navigate the tree */
94static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
95static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
99
100static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
102
103/* functions to convert amount of free space to a FSM category */
104static uint8 fsm_space_avail_to_cat(Size avail);
107
108/* workhorse functions for various operations */
109static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
114 bool *eof_p);
116
117
118/******** Public API ********/
119
120/*
121 * GetPageWithFreeSpace - try to find a page in the given relation with
122 * at least the specified amount of free space.
123 *
124 * If successful, return the block number; if not, return InvalidBlockNumber.
125 *
126 * The caller must be prepared for the possibility that the returned page
127 * will turn out to have too little space available by the time the caller
128 * gets a lock on it. In that case, the caller should report the actual
129 * amount of free space available on that page and then try again (see
130 * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
131 * extend the relation.
132 *
133 * This can trigger FSM updates if any FSM entry is found to point to a block
134 * past the end of the relation.
135 */
143
144/*
145 * RecordAndGetPageWithFreeSpace - update info about a page and try again.
146 *
147 * We provide this combo form to save some locking overhead, compared to
148 * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
149 * also some effort to return a page close to the old page; if there's a
150 * page with enough free space on the same FSM page where the old one page
151 * is located, it is preferred.
152 */
156{
159 FSMAddress addr;
160 uint16 slot;
161 int search_slot;
162
163 /* Get the location of the FSM byte representing the heap block */
164 addr = fsm_get_location(oldPage, &slot);
165
167
168 /*
169 * If fsm_set_and_search found a suitable new block, return that.
170 * Otherwise, search as usual.
171 */
172 if (search_slot != -1)
173 {
175
176 /*
177 * Check that the blknum is actually in the relation. Don't try to
178 * update the FSM in that case, just fall back to the other case
179 */
181 return blknum;
182 }
183 return fsm_search(rel, search_cat);
184}
185
186/*
187 * RecordPageWithFreeSpace - update info about a page.
188 *
189 * Note that if the new spaceAvail value is higher than the old value stored
190 * in the FSM, the space might not become visible to searchers until the next
191 * FreeSpaceMapVacuum call, which updates the upper level pages.
192 */
193void
195{
197 FSMAddress addr;
198 uint16 slot;
199
200 /* Get the location of the FSM byte representing the heap block */
201 addr = fsm_get_location(heapBlk, &slot);
202
203 fsm_set_and_search(rel, addr, slot, new_cat, 0);
204}
205
206/*
207 * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
208 * WAL replay
209 */
210void
213{
215 FSMAddress addr;
216 uint16 slot;
217 BlockNumber blkno;
218 Buffer buf;
219 Page page;
220
221 /* Get the location of the FSM byte representing the heap block */
222 addr = fsm_get_location(heapBlk, &slot);
223 blkno = fsm_logical_to_physical(addr);
224
225 /* If the page doesn't exist already, extend */
226 buf = XLogReadBufferExtended(rlocator, FSM_FORKNUM, blkno,
229
230 page = BufferGetPage(buf);
231 if (PageIsNew(page))
232 PageInit(page, BLCKSZ, 0);
233
234 /*
235 * Changes to FSM are usually marked as changed using MarkBufferDirtyHint;
236 * however, during recovery, it does nothing if checksums are enabled. It
237 * is assumed that the page should not be dirtied during recovery while
238 * modifying hints to prevent torn pages, since no new WAL data can be
239 * generated at this point to store FPI. This is not relevant to the FSM
240 * case, as its blocks are zeroed when a checksum mismatch occurs. So, we
241 * need to use regular MarkBufferDirty here to mark the FSM block as
242 * modified during recovery, otherwise changes to the FSM may be lost.
243 */
244 if (fsm_set_avail(page, slot, new_cat))
247}
248
249/*
250 * GetRecordedFreeSpace - return the amount of free space on a particular page,
251 * according to the FSM.
252 */
253Size
255{
256 FSMAddress addr;
257 uint16 slot;
258 Buffer buf;
259 uint8 cat;
260
261 /* Get the location of the FSM byte representing the heap block */
262 addr = fsm_get_location(heapBlk, &slot);
263
264 buf = fsm_readbuf(rel, addr, false);
265 if (!BufferIsValid(buf))
266 return 0;
267 cat = fsm_get_avail(BufferGetPage(buf), slot);
269
270 return fsm_space_cat_to_avail(cat);
271}
272
273/*
274 * FreeSpaceMapPrepareTruncateRel - prepare for truncation of a relation.
275 *
276 * nblocks is the new size of the heap.
277 *
278 * Return the number of blocks of new FSM.
279 * If it's InvalidBlockNumber, there is nothing to truncate;
280 * otherwise the caller is responsible for calling smgrtruncate()
281 * to truncate the FSM pages, and FreeSpaceMapVacuumRange()
282 * to update upper-level pages in the FSM.
283 */
286{
290 Buffer buf;
291
292 /*
293 * If no FSM has been created yet for this relation, there's nothing to
294 * truncate.
295 */
297 return InvalidBlockNumber;
298
299 /* Get the location in the FSM of the first removed heap block */
301
302 /*
303 * Zero out the tail of the last remaining FSM page. If the slot
304 * representing the first removed heap block is at a page boundary, as the
305 * first slot on the FSM page that first_removed_address points to, we can
306 * just truncate that page altogether.
307 */
308 if (first_removed_slot > 0)
309 {
311 if (!BufferIsValid(buf))
312 return InvalidBlockNumber; /* nothing to do; the FSM was already
313 * smaller */
315
316 /* NO EREPORT(ERROR) from here till changes are logged */
318
320
321 /*
322 * This change is non-critical, because fsm_does_block_exist() would
323 * stop us from returning a truncated-away block. However, since this
324 * may remove up to SlotsPerFSMPage slots, it's nice to avoid the cost
325 * of that many fsm_does_block_exist() rejections. Use a full
326 * MarkBufferDirty(), not MarkBufferDirtyHint().
327 */
329
330 /*
331 * WAL-log like MarkBufferDirtyHint() might have done, just to avoid
332 * differing from the rest of the file in this respect. This is
333 * optional; see README mention of full page images. XXX consider
334 * XLogSaveBufferForHint() for even closer similarity.
335 *
336 * A higher-level operation calls us at WAL replay. If we crash
337 * before the XLOG_SMGR_TRUNCATE flushes to disk, main fork length has
338 * not changed, and our fork remains valid. If we crash after that
339 * flush, redo will return here.
340 */
342 log_newpage_buffer(buf, false);
343
345
347
349 }
350 else
351 {
354 return InvalidBlockNumber; /* nothing to do; the FSM was already
355 * smaller */
356 }
357
358 return new_nfsmblocks;
359}
360
361/*
362 * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM
363 *
364 * We assume that the bottom-level pages have already been updated with
365 * new free-space information.
366 */
367void
369{
370 bool dummy;
371
372 /* Recursively scan the tree, starting at the root */
375 &dummy);
376}
377
378/*
379 * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM
380 *
381 * As above, but assume that only heap pages between start and end-1 inclusive
382 * have new free-space information, so update only the upper-level slots
383 * covering that block range. end == InvalidBlockNumber is equivalent to
384 * "all the rest of the relation".
385 */
386void
388{
389 bool dummy;
390
391 /* Recursively scan the tree, starting at the root */
392 if (end > start)
393 (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, start, end, &dummy);
394}
395
396/******** Internal routines ********/
397
398/*
399 * Return category corresponding x bytes of free space
400 */
401static uint8
403{
404 int cat;
405
406 Assert(avail < BLCKSZ);
407
408 if (avail >= MaxFSMRequestSize)
409 return 255;
410
411 cat = avail / FSM_CAT_STEP;
412
413 /*
414 * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
415 * more.
416 */
417 if (cat > 254)
418 cat = 254;
419
420 return (uint8) cat;
421}
422
423/*
424 * Return the lower bound of the range of free space represented by given
425 * category.
426 */
427static Size
429{
430 /* The highest category represents exactly MaxFSMRequestSize bytes. */
431 if (cat == 255)
432 return MaxFSMRequestSize;
433 else
434 return cat * FSM_CAT_STEP;
435}
436
437/*
438 * Which category does a page need to have, to accommodate x bytes of data?
439 * While fsm_space_avail_to_cat() rounds down, this needs to round up.
440 */
441static uint8
443{
444 int cat;
445
446 /* Can't ask for more space than the highest category represents */
448 elog(ERROR, "invalid FSM request size %zu", needed);
449
450 if (needed == 0)
451 return 1;
452
453 cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
454
455 if (cat > 255)
456 cat = 255;
457
458 return (uint8) cat;
459}
460
461/*
462 * Returns the physical block number of a FSM page
463 */
464static BlockNumber
466{
467 BlockNumber pages;
468 int leafno;
469 int l;
470
471 /*
472 * Calculate the logical page number of the first leaf page below the
473 * given page.
474 */
475 leafno = addr.logpageno;
476 for (l = 0; l < addr.level; l++)
478
479 /* Count upper level nodes required to address the leaf page */
480 pages = 0;
481 for (l = 0; l < FSM_TREE_DEPTH; l++)
482 {
483 pages += leafno + 1;
485 }
486
487 /*
488 * If the page we were asked for wasn't at the bottom level, subtract the
489 * additional lower level pages we counted above.
490 */
491 pages -= addr.level;
492
493 /* Turn the page count into 0-based block number */
494 return pages - 1;
495}
496
497/*
498 * Return the FSM location corresponding to given heap block.
499 */
500static FSMAddress
502{
503 FSMAddress addr;
504
505 addr.level = FSM_BOTTOM_LEVEL;
507 *slot = heapblk % SlotsPerFSMPage;
508
509 return addr;
510}
511
512/*
513 * Return the heap block number corresponding to given location in the FSM.
514 */
515static BlockNumber
517{
519 return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
520}
521
522/*
523 * Given a logical address of a child page, get the logical page number of
524 * the parent, and the slot within the parent corresponding to the child.
525 */
526static FSMAddress
528{
529 FSMAddress parent;
530
531 Assert(child.level < FSM_ROOT_LEVEL);
532
533 parent.level = child.level + 1;
534 parent.logpageno = child.logpageno / SlotsPerFSMPage;
535 *slot = child.logpageno % SlotsPerFSMPage;
536
537 return parent;
538}
539
540/*
541 * Given a logical address of a parent page and a slot number, get the
542 * logical address of the corresponding child page.
543 */
544static FSMAddress
546{
547 FSMAddress child;
548
549 Assert(parent.level > FSM_BOTTOM_LEVEL);
550
551 child.level = parent.level - 1;
552 child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
553
554 return child;
555}
556
557/*
558 * Read a FSM page.
559 *
560 * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
561 * true, the FSM file is extended.
562 */
563static Buffer
565{
567 Buffer buf;
569
570 /*
571 * If we haven't cached the size of the FSM yet, check it first. Also
572 * recheck if the requested block seems to be past end, since our cached
573 * value might be stale. (We send smgr inval messages on truncation, but
574 * not on extension.)
575 */
576 if (reln->smgr_cached_nblocks[FSM_FORKNUM] == InvalidBlockNumber ||
577 blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
578 {
579 /* Invalidate the cache so smgrnblocks asks the kernel. */
580 reln->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber;
583 else
584 reln->smgr_cached_nblocks[FSM_FORKNUM] = 0;
585 }
586
587 /*
588 * For reading we use ZERO_ON_ERROR mode, and initialize the page if
589 * necessary. The FSM information is not accurate anyway, so it's better
590 * to clear corrupt pages than error out. Since the FSM changes are not
591 * WAL-logged, the so-called torn page problem on crash can lead to pages
592 * with corrupt headers, for example.
593 *
594 * We use the same path below to initialize pages when extending the
595 * relation, as a concurrent extension can end up with vm_extend()
596 * returning an already-initialized page.
597 */
598 if (blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
599 {
600 if (extend)
601 buf = fsm_extend(rel, blkno + 1);
602 else
603 return InvalidBuffer;
604 }
605 else
607
608 /*
609 * Initializing the page when needed is trickier than it looks, because of
610 * the possibility of multiple backends doing this concurrently, and our
611 * desire to not uselessly take the buffer lock in the normal path where
612 * the page is OK. We must take the lock to initialize the page, so
613 * recheck page newness after we have the lock, in case someone else
614 * already did it. Also, because we initially check PageIsNew with no
615 * lock, it's possible to fall through and return the buffer while someone
616 * else is still initializing the page (i.e., we might see pd_upper as set
617 * but other page header fields are still zeroes). This is harmless for
618 * callers that will take a buffer lock themselves, but some callers
619 * inspect the page without any lock at all. The latter is OK only so
620 * long as it doesn't depend on the page header having correct contents.
621 * Current usage is safe because PageGetContents() does not require that.
622 */
624 {
629 }
630 return buf;
631}
632
633/*
634 * Ensure that the FSM fork is at least fsm_nblocks long, extending
635 * it if necessary with empty pages. And by empty, I mean pages filled
636 * with zeros, meaning there's no free space.
637 */
638static Buffer
647
648/*
649 * Set value in given FSM page and slot.
650 *
651 * If minValue > 0, the updated page is also searched for a page with at
652 * least minValue of free space. If one is found, its slot number is
653 * returned, -1 otherwise.
654 */
655static int
658{
659 Buffer buf;
660 Page page;
661 int newslot = -1;
662
663 buf = fsm_readbuf(rel, addr, true);
665
666 page = BufferGetPage(buf);
667
668 if (fsm_set_avail(page, slot, newValue))
669 MarkBufferDirtyHint(buf, false);
670
671 if (minValue != 0)
672 {
673 /* Search while we still hold the lock */
675 addr.level == FSM_BOTTOM_LEVEL,
676 true);
677 }
678
680
681 return newslot;
682}
683
684/*
685 * Search the tree for a heap page with at least min_cat of free space
686 */
687static BlockNumber
689{
690 int restarts = 0;
692
693 for (;;)
694 {
695 int slot;
696 Buffer buf;
697 uint8 max_avail = 0;
698
699 /* Read the FSM page. */
700 buf = fsm_readbuf(rel, addr, false);
701
702 /* Search within the page */
703 if (BufferIsValid(buf))
704 {
707 (addr.level == FSM_BOTTOM_LEVEL),
708 false);
709 if (slot == -1)
710 {
711 max_avail = fsm_get_max_avail(BufferGetPage(buf));
713 }
714 else
715 {
716 /* Keep the pin for possible update below */
718 }
719 }
720 else
721 slot = -1;
722
723 if (slot != -1)
724 {
725 /*
726 * Descend the tree, or return the found block if we're at the
727 * bottom.
728 */
729 if (addr.level == FSM_BOTTOM_LEVEL)
730 {
731 BlockNumber blkno = fsm_get_heap_blk(addr, slot);
732 Page page;
733
734 if (fsm_does_block_exist(rel, blkno))
735 {
737 return blkno;
738 }
739
740 /*
741 * Block is past the end of the relation. Update FSM, and
742 * restart from root. The usual "advancenext" behavior is
743 * pessimal for this rare scenario, since every later slot is
744 * unusable in the same way. We could zero all affected slots
745 * on the same FSM page, but don't bet on the benefits of that
746 * optimization justifying its compiled code bulk.
747 */
748 page = BufferGetPage(buf);
750 fsm_set_avail(page, slot, 0);
751 MarkBufferDirtyHint(buf, false);
753 if (restarts++ > 10000) /* same rationale as below */
754 return InvalidBlockNumber;
755 addr = FSM_ROOT_ADDRESS;
756 }
757 else
758 {
760 }
761 addr = fsm_get_child(addr, slot);
762 }
763 else if (addr.level == FSM_ROOT_LEVEL)
764 {
765 /*
766 * At the root, failure means there's no page with enough free
767 * space in the FSM. Give up.
768 */
769 return InvalidBlockNumber;
770 }
771 else
772 {
774 FSMAddress parent;
775
776 /*
777 * At lower level, failure can happen if the value in the upper-
778 * level node didn't reflect the value on the lower page. Update
779 * the upper node, to avoid falling into the same trap again, and
780 * start over.
781 *
782 * There's a race condition here, if another backend updates this
783 * page right after we release it, and gets the lock on the parent
784 * page before us. We'll then update the parent page with the now
785 * stale information we had. It's OK, because it should happen
786 * rarely, and will be fixed by the next vacuum.
787 */
788 parent = fsm_get_parent(addr, &parentslot);
789 fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
790
791 /*
792 * If the upper pages are badly out of date, we might need to loop
793 * quite a few times, updating them as we go. Any inconsistencies
794 * should eventually be corrected and the loop should end. Looping
795 * indefinitely is nevertheless scary, so provide an emergency
796 * valve.
797 */
798 if (restarts++ > 10000)
799 return InvalidBlockNumber;
800
801 /* Start search all over from the root */
802 addr = FSM_ROOT_ADDRESS;
803 }
804 }
805}
806
807
808/*
809 * Recursive guts of FreeSpaceMapVacuum
810 *
811 * Examine the FSM page indicated by addr, as well as its children, updating
812 * upper-level nodes that cover the heap block range from start to end-1.
813 * (It's okay if end is beyond the actual end of the map.)
814 * Return the maximum freespace value on this page.
815 *
816 * If addr is past the end of the FSM, set *eof_p to true and return 0.
817 *
818 * This traverses the tree in depth-first order. The tree is stored
819 * physically in depth-first order, so this should be pretty I/O efficient.
820 */
821static uint8
824 bool *eof_p)
825{
826 Buffer buf;
827 Page page;
828 uint8 max_avail;
829
830 /* Read the page if it exists, or return EOF */
831 buf = fsm_readbuf(rel, addr, false);
832 if (!BufferIsValid(buf))
833 {
834 *eof_p = true;
835 return 0;
836 }
837 else
838 *eof_p = false;
839
840 page = BufferGetPage(buf);
841
842 /*
843 * If we're above the bottom level, recurse into children, and fix the
844 * information stored about them at this level.
845 */
846 if (addr.level > FSM_BOTTOM_LEVEL)
847 {
849 fsm_end;
852 int slot,
854 end_slot;
855 bool eof = false;
856
857 /*
858 * Compute the range of slots we need to update on this page, given
859 * the requested range of heap blocks to consider. The first slot to
860 * update is the one covering the "start" block, and the last slot is
861 * the one covering "end - 1". (Some of this work will be duplicated
862 * in each recursive call, but it's cheap enough to not worry about.)
863 */
866
867 while (fsm_start.level < addr.level)
868 {
871 }
872 Assert(fsm_start.level == addr.level);
873
874 if (fsm_start.logpageno == addr.logpageno)
876 else if (fsm_start.logpageno > addr.logpageno)
877 start_slot = SlotsPerFSMPage; /* shouldn't get here... */
878 else
879 start_slot = 0;
880
881 if (fsm_end.logpageno == addr.logpageno)
883 else if (fsm_end.logpageno > addr.logpageno)
885 else
886 end_slot = -1; /* shouldn't get here... */
887
888 for (slot = start_slot; slot <= end_slot; slot++)
889 {
890 int child_avail;
891
893
894 /* After we hit end-of-file, just clear the rest of the slots */
895 if (!eof)
896 child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot),
897 start, end,
898 &eof);
899 else
900 child_avail = 0;
901
902 /* Update information about the child */
903 if (fsm_get_avail(page, slot) != child_avail)
904 {
906 fsm_set_avail(page, slot, child_avail);
907 MarkBufferDirtyHint(buf, false);
909 }
910 }
911 }
912
913 /* Now get the maximum value on the page, to return to caller */
914 max_avail = fsm_get_max_avail(page);
915
916 /*
917 * Try to reset the next slot pointer. This encourages the use of
918 * low-numbered pages, increasing the chances that a later vacuum can
919 * truncate the relation. We don't bother with marking the page dirty if
920 * it wasn't already, since this is just a hint.
921 */
924 {
925 ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
926 BufferFinishSetHintBits(buf, false, false);
927 }
928
930
931 return max_avail;
932}
933
934
935/*
936 * Check whether a block number is past the end of the relation. This can
937 * happen after WAL replay, if the FSM reached disk but newly-extended pages
938 * it refers to did not.
939 */
940static bool
942{
943 SMgrRelation smgr = RelationGetSmgr(rel);
944
945 /*
946 * If below the cached nblocks, the block surely exists. Otherwise, we
947 * face a trade-off. We opt to compare to a fresh nblocks, incurring
948 * lseek() overhead. The alternative would be to assume the block does
949 * not exist, but that would cause FSM to set zero space available for
950 * blocks that main fork extension just recorded.
951 */
953 blknumber < smgr->smgr_cached_nblocks[MAIN_FORKNUM]) ||
955}
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
void BufferFinishSetHintBits(Buffer buffer, bool mark_dirty, bool buffer_std)
Definition bufmgr.c:7079
Buffer ExtendBufferedRelTo(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
Definition bufmgr.c:1031
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5595
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5612
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3156
bool BufferBeginSetHintBits(Buffer buffer)
Definition bufmgr.c:7051
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5830
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:926
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:309
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:468
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:212
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:222
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:207
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:334
@ EB_CLEAR_SIZE_CACHE
Definition bufmgr.h:90
@ EB_CREATE_FORK_IF_NEEDED
Definition bufmgr.h:84
@ RBM_ZERO_ON_ERROR
Definition bufmgr.h:51
#define BMR_REL(p_rel)
Definition bufmgr.h:114
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:419
void PageInit(Page page, Size pageSize, Size specialSize)
Definition bufpage.c:42
static bool PageIsNew(const PageData *page)
Definition bufpage.h:258
static char * PageGetContents(Page page)
Definition bufpage.h:282
PageData * Page
Definition bufpage.h:81
uint8_t uint8
Definition c.h:622
#define Assert(condition)
Definition c.h:943
uint16_t uint16
Definition c.h:623
size_t Size
Definition c.h:689
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define MaxFSMRequestSize
Definition freespace.c:66
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, BlockNumber start, BlockNumber end, bool *eof_p)
Definition freespace.c:822
static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot)
Definition freespace.c:545
static const FSMAddress FSM_ROOT_ADDRESS
Definition freespace.c:91
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition freespace.c:387
static bool fsm_does_block_exist(Relation rel, BlockNumber blknumber)
Definition freespace.c:941
static uint8 fsm_space_needed_to_cat(Size needed)
Definition freespace.c:442
#define FSM_BOTTOM_LEVEL
Definition freespace.c:78
static uint8 fsm_space_avail_to_cat(Size avail)
Definition freespace.c:402
static Size fsm_space_cat_to_avail(uint8 cat)
Definition freespace.c:428
static BlockNumber fsm_logical_to_physical(FSMAddress addr)
Definition freespace.c:465
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot)
Definition freespace.c:527
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition freespace.c:154
static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot)
Definition freespace.c:501
static Buffer fsm_extend(Relation rel, BlockNumber fsm_nblocks)
Definition freespace.c:639
static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
Definition freespace.c:564
void FreeSpaceMapVacuum(Relation rel)
Definition freespace.c:368
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition freespace.c:254
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue)
Definition freespace.c:656
#define FSM_ROOT_LEVEL
Definition freespace.c:77
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition freespace.c:194
static BlockNumber fsm_search(Relation rel, uint8 min_cat)
Definition freespace.c:688
void XLogRecordPageWithFreeSpace(RelFileLocator rlocator, BlockNumber heapBlk, Size spaceAvail)
Definition freespace.c:211
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition freespace.c:137
static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot)
Definition freespace.c:516
#define FSM_CAT_STEP
Definition freespace.c:65
BlockNumber FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
Definition freespace.c:285
#define FSM_TREE_DEPTH
Definition freespace.c:75
FSMPageData * FSMPage
#define SlotsPerFSMPage
uint8 fsm_get_avail(Page page, int slot)
Definition fsmpage.c:122
uint8 fsm_get_max_avail(Page page)
Definition fsmpage.c:138
bool fsm_set_avail(Page page, int slot, uint8 value)
Definition fsmpage.c:63
bool fsm_truncate_avail(Page page, int nslots)
Definition fsmpage.c:322
int fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext, bool exclusive_lock_held)
Definition fsmpage.c:158
return str start
#define START_CRIT_SECTION()
Definition miscadmin.h:152
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
#define END_CRIT_SECTION()
Definition miscadmin.h:154
static char buf[DEFAULT_XLOG_SEG_SIZE]
static int fb(int x)
static SMgrRelation RelationGetSmgr(Relation rel)
Definition rel.h:578
#define RelationNeedsWAL(relation)
Definition rel.h:639
@ FSM_FORKNUM
Definition relpath.h:59
@ MAIN_FORKNUM
Definition relpath.h:58
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:819
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:462
int logpageno
Definition freespace.c:87
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition smgr.h:47
#define XLogHintBitIsNeeded()
Definition xlog.h:123
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Buffer XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode, Buffer recent_buffer)
Definition xlogutils.c:460
bool InRecovery
Definition xlogutils.c:50