PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
freespace.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * freespace.c
4 * POSTGRES free space map for quickly finding free space in relations
5 *
6 *
7 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/storage/freespace/freespace.c
12 *
13 *
14 * NOTES:
15 *
16 * Free Space Map keeps track of the amount of free space on pages, and
17 * allows quickly searching for a page with enough free space. The FSM is
18 * stored in a dedicated relation fork of all heap relations, and those
19 * index access methods that need it (see also indexfsm.c). See README for
20 * more information.
21 *
22 *-------------------------------------------------------------------------
23 */
24#include "postgres.h"
25
26#include "access/htup_details.h"
27#include "access/xloginsert.h"
28#include "access/xlogutils.h"
29#include "miscadmin.h"
30#include "storage/freespace.h"
32#include "storage/smgr.h"
33#include "utils/rel.h"
34
35
36/*
37 * We use just one byte to store the amount of free space on a page, so we
38 * divide the amount of free space a page can have into 256 different
39 * categories. The highest category, 255, represents a page with at least
40 * MaxFSMRequestSize bytes of free space, and the second highest category
41 * represents the range from 254 * FSM_CAT_STEP, inclusive, to
42 * MaxFSMRequestSize, exclusive.
43 *
44 * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
45 * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
46 * categories look like this:
47 *
48 *
49 * Range Category
50 * 0 - 31 0
51 * 32 - 63 1
52 * ... ... ...
53 * 8096 - 8127 253
54 * 8128 - 8163 254
55 * 8164 - 8192 255
56 *
57 * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
58 * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
59 * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
60 * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
61 * completely empty page, that would mean that we could never satisfy a
62 * request of exactly MaxFSMRequestSize bytes.
63 */
64#define FSM_CATEGORIES 256
65#define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES)
66#define MaxFSMRequestSize MaxHeapTupleSize
67
68/*
69 * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
70 * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
71 * 256 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
72 * this means that 4096 bytes is the smallest BLCKSZ that we can get away
73 * with a 3-level tree, and 512 is the smallest we support.
74 */
75#define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
76
77#define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
78#define FSM_BOTTOM_LEVEL 0
79
80/*
81 * The internal FSM routines work on a logical addressing scheme. Each
82 * level of the tree can be thought of as a separately addressable file.
83 */
84typedef struct
85{
86 int level; /* level */
87 int logpageno; /* page number within the level */
89
90/* Address of the root page. */
92
93/* functions to navigate the tree */
94static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
95static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
96static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
99
100static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
101static Buffer fsm_extend(Relation rel, BlockNumber fsm_nblocks);
102
103/* functions to convert amount of free space to a FSM category */
104static uint8 fsm_space_avail_to_cat(Size avail);
105static uint8 fsm_space_needed_to_cat(Size needed);
107
108/* workhorse functions for various operations */
109static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
110 uint8 newValue, uint8 minValue);
111static BlockNumber fsm_search(Relation rel, uint8 min_cat);
114 bool *eof_p);
115static bool fsm_does_block_exist(Relation rel, BlockNumber blknumber);
116
117
118/******** Public API ********/
119
120/*
121 * GetPageWithFreeSpace - try to find a page in the given relation with
122 * at least the specified amount of free space.
123 *
124 * If successful, return the block number; if not, return InvalidBlockNumber.
125 *
126 * The caller must be prepared for the possibility that the returned page
127 * will turn out to have too little space available by the time the caller
128 * gets a lock on it. In that case, the caller should report the actual
129 * amount of free space available on that page and then try again (see
130 * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
131 * extend the relation.
132 *
133 * This can trigger FSM updates if any FSM entry is found to point to a block
134 * past the end of the relation.
135 */
138{
139 uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
140
141 return fsm_search(rel, min_cat);
142}
143
144/*
145 * RecordAndGetPageWithFreeSpace - update info about a page and try again.
146 *
147 * We provide this combo form to save some locking overhead, compared to
148 * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
149 * also some effort to return a page close to the old page; if there's a
150 * page with enough free space on the same FSM page where the old one page
151 * is located, it is preferred.
152 */
155 Size oldSpaceAvail, Size spaceNeeded)
156{
157 int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
158 int search_cat = fsm_space_needed_to_cat(spaceNeeded);
159 FSMAddress addr;
160 uint16 slot;
161 int search_slot;
162
163 /* Get the location of the FSM byte representing the heap block */
164 addr = fsm_get_location(oldPage, &slot);
165
166 search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
167
168 /*
169 * If fsm_set_and_search found a suitable new block, return that.
170 * Otherwise, search as usual.
171 */
172 if (search_slot != -1)
173 {
174 BlockNumber blknum = fsm_get_heap_blk(addr, search_slot);
175
176 /*
177 * Check that the blknum is actually in the relation. Don't try to
178 * update the FSM in that case, just fall back to the other case
179 */
180 if (fsm_does_block_exist(rel, blknum))
181 return blknum;
182 }
183 return fsm_search(rel, search_cat);
184}
185
186/*
187 * RecordPageWithFreeSpace - update info about a page.
188 *
189 * Note that if the new spaceAvail value is higher than the old value stored
190 * in the FSM, the space might not become visible to searchers until the next
191 * FreeSpaceMapVacuum call, which updates the upper level pages.
192 */
193void
195{
196 int new_cat = fsm_space_avail_to_cat(spaceAvail);
197 FSMAddress addr;
198 uint16 slot;
199
200 /* Get the location of the FSM byte representing the heap block */
201 addr = fsm_get_location(heapBlk, &slot);
202
203 fsm_set_and_search(rel, addr, slot, new_cat, 0);
204}
205
206/*
207 * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
208 * WAL replay
209 */
210void
212 Size spaceAvail)
213{
214 int new_cat = fsm_space_avail_to_cat(spaceAvail);
215 FSMAddress addr;
216 uint16 slot;
217 BlockNumber blkno;
218 Buffer buf;
219 Page page;
220
221 /* Get the location of the FSM byte representing the heap block */
222 addr = fsm_get_location(heapBlk, &slot);
223 blkno = fsm_logical_to_physical(addr);
224
225 /* If the page doesn't exist already, extend */
226 buf = XLogReadBufferExtended(rlocator, FSM_FORKNUM, blkno,
229
230 page = BufferGetPage(buf);
231 if (PageIsNew(page))
232 PageInit(page, BLCKSZ, 0);
233
234 if (fsm_set_avail(page, slot, new_cat))
235 MarkBufferDirtyHint(buf, false);
237}
238
239/*
240 * GetRecordedFreeSpace - return the amount of free space on a particular page,
241 * according to the FSM.
242 */
243Size
245{
246 FSMAddress addr;
247 uint16 slot;
248 Buffer buf;
249 uint8 cat;
250
251 /* Get the location of the FSM byte representing the heap block */
252 addr = fsm_get_location(heapBlk, &slot);
253
254 buf = fsm_readbuf(rel, addr, false);
255 if (!BufferIsValid(buf))
256 return 0;
257 cat = fsm_get_avail(BufferGetPage(buf), slot);
259
260 return fsm_space_cat_to_avail(cat);
261}
262
263/*
264 * FreeSpaceMapPrepareTruncateRel - prepare for truncation of a relation.
265 *
266 * nblocks is the new size of the heap.
267 *
268 * Return the number of blocks of new FSM.
269 * If it's InvalidBlockNumber, there is nothing to truncate;
270 * otherwise the caller is responsible for calling smgrtruncate()
271 * to truncate the FSM pages, and FreeSpaceMapVacuumRange()
272 * to update upper-level pages in the FSM.
273 */
276{
277 BlockNumber new_nfsmblocks;
278 FSMAddress first_removed_address;
279 uint16 first_removed_slot;
280 Buffer buf;
281
282 /*
283 * If no FSM has been created yet for this relation, there's nothing to
284 * truncate.
285 */
287 return InvalidBlockNumber;
288
289 /* Get the location in the FSM of the first removed heap block */
290 first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
291
292 /*
293 * Zero out the tail of the last remaining FSM page. If the slot
294 * representing the first removed heap block is at a page boundary, as the
295 * first slot on the FSM page that first_removed_address points to, we can
296 * just truncate that page altogether.
297 */
298 if (first_removed_slot > 0)
299 {
300 buf = fsm_readbuf(rel, first_removed_address, false);
301 if (!BufferIsValid(buf))
302 return InvalidBlockNumber; /* nothing to do; the FSM was already
303 * smaller */
305
306 /* NO EREPORT(ERROR) from here till changes are logged */
308
309 fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
310
311 /*
312 * This change is non-critical, because fsm_does_block_exist() would
313 * stop us from returning a truncated-away block. However, since this
314 * may remove up to SlotsPerFSMPage slots, it's nice to avoid the cost
315 * of that many fsm_does_block_exist() rejections. Use a full
316 * MarkBufferDirty(), not MarkBufferDirtyHint().
317 */
319
320 /*
321 * WAL-log like MarkBufferDirtyHint() might have done, just to avoid
322 * differing from the rest of the file in this respect. This is
323 * optional; see README mention of full page images. XXX consider
324 * XLogSaveBufferForHint() for even closer similarity.
325 *
326 * A higher-level operation calls us at WAL replay. If we crash
327 * before the XLOG_SMGR_TRUNCATE flushes to disk, main fork length has
328 * not changed, and our fork remains valid. If we crash after that
329 * flush, redo will return here.
330 */
332 log_newpage_buffer(buf, false);
333
335
337
338 new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
339 }
340 else
341 {
342 new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
343 if (smgrnblocks(RelationGetSmgr(rel), FSM_FORKNUM) <= new_nfsmblocks)
344 return InvalidBlockNumber; /* nothing to do; the FSM was already
345 * smaller */
346 }
347
348 return new_nfsmblocks;
349}
350
351/*
352 * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM
353 *
354 * We assume that the bottom-level pages have already been updated with
355 * new free-space information.
356 */
357void
359{
360 bool dummy;
361
362 /* Recursively scan the tree, starting at the root */
365 &dummy);
366}
367
368/*
369 * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM
370 *
371 * As above, but assume that only heap pages between start and end-1 inclusive
372 * have new free-space information, so update only the upper-level slots
373 * covering that block range. end == InvalidBlockNumber is equivalent to
374 * "all the rest of the relation".
375 */
376void
378{
379 bool dummy;
380
381 /* Recursively scan the tree, starting at the root */
382 if (end > start)
383 (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, start, end, &dummy);
384}
385
386/******** Internal routines ********/
387
388/*
389 * Return category corresponding x bytes of free space
390 */
391static uint8
393{
394 int cat;
395
396 Assert(avail < BLCKSZ);
397
398 if (avail >= MaxFSMRequestSize)
399 return 255;
400
401 cat = avail / FSM_CAT_STEP;
402
403 /*
404 * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
405 * more.
406 */
407 if (cat > 254)
408 cat = 254;
409
410 return (uint8) cat;
411}
412
413/*
414 * Return the lower bound of the range of free space represented by given
415 * category.
416 */
417static Size
419{
420 /* The highest category represents exactly MaxFSMRequestSize bytes. */
421 if (cat == 255)
422 return MaxFSMRequestSize;
423 else
424 return cat * FSM_CAT_STEP;
425}
426
427/*
428 * Which category does a page need to have, to accommodate x bytes of data?
429 * While fsm_space_avail_to_cat() rounds down, this needs to round up.
430 */
431static uint8
433{
434 int cat;
435
436 /* Can't ask for more space than the highest category represents */
437 if (needed > MaxFSMRequestSize)
438 elog(ERROR, "invalid FSM request size %zu", needed);
439
440 if (needed == 0)
441 return 1;
442
443 cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
444
445 if (cat > 255)
446 cat = 255;
447
448 return (uint8) cat;
449}
450
451/*
452 * Returns the physical block number of a FSM page
453 */
454static BlockNumber
456{
457 BlockNumber pages;
458 int leafno;
459 int l;
460
461 /*
462 * Calculate the logical page number of the first leaf page below the
463 * given page.
464 */
465 leafno = addr.logpageno;
466 for (l = 0; l < addr.level; l++)
467 leafno *= SlotsPerFSMPage;
468
469 /* Count upper level nodes required to address the leaf page */
470 pages = 0;
471 for (l = 0; l < FSM_TREE_DEPTH; l++)
472 {
473 pages += leafno + 1;
474 leafno /= SlotsPerFSMPage;
475 }
476
477 /*
478 * If the page we were asked for wasn't at the bottom level, subtract the
479 * additional lower level pages we counted above.
480 */
481 pages -= addr.level;
482
483 /* Turn the page count into 0-based block number */
484 return pages - 1;
485}
486
487/*
488 * Return the FSM location corresponding to given heap block.
489 */
490static FSMAddress
492{
493 FSMAddress addr;
494
495 addr.level = FSM_BOTTOM_LEVEL;
496 addr.logpageno = heapblk / SlotsPerFSMPage;
497 *slot = heapblk % SlotsPerFSMPage;
498
499 return addr;
500}
501
502/*
503 * Return the heap block number corresponding to given location in the FSM.
504 */
505static BlockNumber
507{
509 return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
510}
511
512/*
513 * Given a logical address of a child page, get the logical page number of
514 * the parent, and the slot within the parent corresponding to the child.
515 */
516static FSMAddress
518{
519 FSMAddress parent;
520
521 Assert(child.level < FSM_ROOT_LEVEL);
522
523 parent.level = child.level + 1;
524 parent.logpageno = child.logpageno / SlotsPerFSMPage;
525 *slot = child.logpageno % SlotsPerFSMPage;
526
527 return parent;
528}
529
530/*
531 * Given a logical address of a parent page and a slot number, get the
532 * logical address of the corresponding child page.
533 */
534static FSMAddress
536{
537 FSMAddress child;
538
539 Assert(parent.level > FSM_BOTTOM_LEVEL);
540
541 child.level = parent.level - 1;
542 child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
543
544 return child;
545}
546
547/*
548 * Read a FSM page.
549 *
550 * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
551 * true, the FSM file is extended.
552 */
553static Buffer
554fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
555{
557 Buffer buf;
558 SMgrRelation reln = RelationGetSmgr(rel);
559
560 /*
561 * If we haven't cached the size of the FSM yet, check it first. Also
562 * recheck if the requested block seems to be past end, since our cached
563 * value might be stale. (We send smgr inval messages on truncation, but
564 * not on extension.)
565 */
567 blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
568 {
569 /* Invalidate the cache so smgrnblocks asks the kernel. */
571 if (smgrexists(reln, FSM_FORKNUM))
573 else
575 }
576
577 /*
578 * For reading we use ZERO_ON_ERROR mode, and initialize the page if
579 * necessary. The FSM information is not accurate anyway, so it's better
580 * to clear corrupt pages than error out. Since the FSM changes are not
581 * WAL-logged, the so-called torn page problem on crash can lead to pages
582 * with corrupt headers, for example.
583 *
584 * We use the same path below to initialize pages when extending the
585 * relation, as a concurrent extension can end up with vm_extend()
586 * returning an already-initialized page.
587 */
588 if (blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
589 {
590 if (extend)
591 buf = fsm_extend(rel, blkno + 1);
592 else
593 return InvalidBuffer;
594 }
595 else
597
598 /*
599 * Initializing the page when needed is trickier than it looks, because of
600 * the possibility of multiple backends doing this concurrently, and our
601 * desire to not uselessly take the buffer lock in the normal path where
602 * the page is OK. We must take the lock to initialize the page, so
603 * recheck page newness after we have the lock, in case someone else
604 * already did it. Also, because we initially check PageIsNew with no
605 * lock, it's possible to fall through and return the buffer while someone
606 * else is still initializing the page (i.e., we might see pd_upper as set
607 * but other page header fields are still zeroes). This is harmless for
608 * callers that will take a buffer lock themselves, but some callers
609 * inspect the page without any lock at all. The latter is OK only so
610 * long as it doesn't depend on the page header having correct contents.
611 * Current usage is safe because PageGetContents() does not require that.
612 */
614 {
617 PageInit(BufferGetPage(buf), BLCKSZ, 0);
619 }
620 return buf;
621}
622
623/*
624 * Ensure that the FSM fork is at least fsm_nblocks long, extending
625 * it if necessary with empty pages. And by empty, I mean pages filled
626 * with zeros, meaning there's no free space.
627 */
628static Buffer
630{
631 return ExtendBufferedRelTo(BMR_REL(rel), FSM_FORKNUM, NULL,
634 fsm_nblocks,
636}
637
638/*
639 * Set value in given FSM page and slot.
640 *
641 * If minValue > 0, the updated page is also searched for a page with at
642 * least minValue of free space. If one is found, its slot number is
643 * returned, -1 otherwise.
644 */
645static int
647 uint8 newValue, uint8 minValue)
648{
649 Buffer buf;
650 Page page;
651 int newslot = -1;
652
653 buf = fsm_readbuf(rel, addr, true);
655
656 page = BufferGetPage(buf);
657
658 if (fsm_set_avail(page, slot, newValue))
659 MarkBufferDirtyHint(buf, false);
660
661 if (minValue != 0)
662 {
663 /* Search while we still hold the lock */
664 newslot = fsm_search_avail(buf, minValue,
665 addr.level == FSM_BOTTOM_LEVEL,
666 true);
667 }
668
670
671 return newslot;
672}
673
674/*
675 * Search the tree for a heap page with at least min_cat of free space
676 */
677static BlockNumber
679{
680 int restarts = 0;
682
683 for (;;)
684 {
685 int slot;
686 Buffer buf;
687 uint8 max_avail = 0;
688
689 /* Read the FSM page. */
690 buf = fsm_readbuf(rel, addr, false);
691
692 /* Search within the page */
693 if (BufferIsValid(buf))
694 {
696 slot = fsm_search_avail(buf, min_cat,
697 (addr.level == FSM_BOTTOM_LEVEL),
698 false);
699 if (slot == -1)
700 {
701 max_avail = fsm_get_max_avail(BufferGetPage(buf));
703 }
704 else
705 {
706 /* Keep the pin for possible update below */
708 }
709 }
710 else
711 slot = -1;
712
713 if (slot != -1)
714 {
715 /*
716 * Descend the tree, or return the found block if we're at the
717 * bottom.
718 */
719 if (addr.level == FSM_BOTTOM_LEVEL)
720 {
721 BlockNumber blkno = fsm_get_heap_blk(addr, slot);
722 Page page;
723
724 if (fsm_does_block_exist(rel, blkno))
725 {
727 return blkno;
728 }
729
730 /*
731 * Block is past the end of the relation. Update FSM, and
732 * restart from root. The usual "advancenext" behavior is
733 * pessimal for this rare scenario, since every later slot is
734 * unusable in the same way. We could zero all affected slots
735 * on the same FSM page, but don't bet on the benefits of that
736 * optimization justifying its compiled code bulk.
737 */
738 page = BufferGetPage(buf);
740 fsm_set_avail(page, slot, 0);
741 MarkBufferDirtyHint(buf, false);
743 if (restarts++ > 10000) /* same rationale as below */
744 return InvalidBlockNumber;
745 addr = FSM_ROOT_ADDRESS;
746 }
747 else
748 {
750 }
751 addr = fsm_get_child(addr, slot);
752 }
753 else if (addr.level == FSM_ROOT_LEVEL)
754 {
755 /*
756 * At the root, failure means there's no page with enough free
757 * space in the FSM. Give up.
758 */
759 return InvalidBlockNumber;
760 }
761 else
762 {
763 uint16 parentslot;
764 FSMAddress parent;
765
766 /*
767 * At lower level, failure can happen if the value in the upper-
768 * level node didn't reflect the value on the lower page. Update
769 * the upper node, to avoid falling into the same trap again, and
770 * start over.
771 *
772 * There's a race condition here, if another backend updates this
773 * page right after we release it, and gets the lock on the parent
774 * page before us. We'll then update the parent page with the now
775 * stale information we had. It's OK, because it should happen
776 * rarely, and will be fixed by the next vacuum.
777 */
778 parent = fsm_get_parent(addr, &parentslot);
779 fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
780
781 /*
782 * If the upper pages are badly out of date, we might need to loop
783 * quite a few times, updating them as we go. Any inconsistencies
784 * should eventually be corrected and the loop should end. Looping
785 * indefinitely is nevertheless scary, so provide an emergency
786 * valve.
787 */
788 if (restarts++ > 10000)
789 return InvalidBlockNumber;
790
791 /* Start search all over from the root */
792 addr = FSM_ROOT_ADDRESS;
793 }
794 }
795}
796
797
798/*
799 * Recursive guts of FreeSpaceMapVacuum
800 *
801 * Examine the FSM page indicated by addr, as well as its children, updating
802 * upper-level nodes that cover the heap block range from start to end-1.
803 * (It's okay if end is beyond the actual end of the map.)
804 * Return the maximum freespace value on this page.
805 *
806 * If addr is past the end of the FSM, set *eof_p to true and return 0.
807 *
808 * This traverses the tree in depth-first order. The tree is stored
809 * physically in depth-first order, so this should be pretty I/O efficient.
810 */
811static uint8
814 bool *eof_p)
815{
816 Buffer buf;
817 Page page;
818 uint8 max_avail;
819
820 /* Read the page if it exists, or return EOF */
821 buf = fsm_readbuf(rel, addr, false);
822 if (!BufferIsValid(buf))
823 {
824 *eof_p = true;
825 return 0;
826 }
827 else
828 *eof_p = false;
829
830 page = BufferGetPage(buf);
831
832 /*
833 * If we're above the bottom level, recurse into children, and fix the
834 * information stored about them at this level.
835 */
836 if (addr.level > FSM_BOTTOM_LEVEL)
837 {
838 FSMAddress fsm_start,
839 fsm_end;
840 uint16 fsm_start_slot,
841 fsm_end_slot;
842 int slot,
843 start_slot,
844 end_slot;
845 bool eof = false;
846
847 /*
848 * Compute the range of slots we need to update on this page, given
849 * the requested range of heap blocks to consider. The first slot to
850 * update is the one covering the "start" block, and the last slot is
851 * the one covering "end - 1". (Some of this work will be duplicated
852 * in each recursive call, but it's cheap enough to not worry about.)
853 */
854 fsm_start = fsm_get_location(start, &fsm_start_slot);
855 fsm_end = fsm_get_location(end - 1, &fsm_end_slot);
856
857 while (fsm_start.level < addr.level)
858 {
859 fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot);
860 fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot);
861 }
862 Assert(fsm_start.level == addr.level);
863
864 if (fsm_start.logpageno == addr.logpageno)
865 start_slot = fsm_start_slot;
866 else if (fsm_start.logpageno > addr.logpageno)
867 start_slot = SlotsPerFSMPage; /* shouldn't get here... */
868 else
869 start_slot = 0;
870
871 if (fsm_end.logpageno == addr.logpageno)
872 end_slot = fsm_end_slot;
873 else if (fsm_end.logpageno > addr.logpageno)
874 end_slot = SlotsPerFSMPage - 1;
875 else
876 end_slot = -1; /* shouldn't get here... */
877
878 for (slot = start_slot; slot <= end_slot; slot++)
879 {
880 int child_avail;
881
883
884 /* After we hit end-of-file, just clear the rest of the slots */
885 if (!eof)
886 child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot),
887 start, end,
888 &eof);
889 else
890 child_avail = 0;
891
892 /* Update information about the child */
893 if (fsm_get_avail(page, slot) != child_avail)
894 {
896 fsm_set_avail(page, slot, child_avail);
897 MarkBufferDirtyHint(buf, false);
899 }
900 }
901 }
902
903 /* Now get the maximum value on the page, to return to caller */
904 max_avail = fsm_get_max_avail(page);
905
906 /*
907 * Reset the next slot pointer. This encourages the use of low-numbered
908 * pages, increasing the chances that a later vacuum can truncate the
909 * relation. We don't bother with a lock here, nor with marking the page
910 * dirty if it wasn't already, since this is just a hint.
911 */
912 ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
913
915
916 return max_avail;
917}
918
919
920/*
921 * Check whether a block number is past the end of the relation. This can
922 * happen after WAL replay, if the FSM reached disk but newly-extended pages
923 * it refers to did not.
924 */
925static bool
927{
928 SMgrRelation smgr = RelationGetSmgr(rel);
929
930 /*
931 * If below the cached nblocks, the block surely exists. Otherwise, we
932 * face a trade-off. We opt to compare to a fresh nblocks, incurring
933 * lseek() overhead. The alternative would be to assume the block does
934 * not exist, but that would cause FSM to set zero space available for
935 * blocks that main fork extension just recorded.
936 */
938 blknumber < smgr->smgr_cached_nblocks[MAIN_FORKNUM]) ||
939 blknumber < RelationGetNumberOfBlocks(rel));
940}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
Buffer ExtendBufferedRelTo(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
Definition: bufmgr.c:910
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4988
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:793
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
@ EB_CLEAR_SIZE_CACHE
Definition: bufmgr.h:89
@ EB_CREATE_FORK_IF_NEEDED
Definition: bufmgr.h:83
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:50
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
Pointer Page
Definition: bufpage.h:81
static char * PageGetContents(Page page)
Definition: bufpage.h:257
static bool PageIsNew(Page page)
Definition: bufpage.h:233
uint8_t uint8
Definition: c.h:483
#define Assert(condition)
Definition: c.h:812
uint16_t uint16
Definition: c.h:484
size_t Size
Definition: c.h:559
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define MaxFSMRequestSize
Definition: freespace.c:66
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, BlockNumber start, BlockNumber end, bool *eof_p)
Definition: freespace.c:812
static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot)
Definition: freespace.c:535
static const FSMAddress FSM_ROOT_ADDRESS
Definition: freespace.c:91
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
static bool fsm_does_block_exist(Relation rel, BlockNumber blknumber)
Definition: freespace.c:926
static uint8 fsm_space_needed_to_cat(Size needed)
Definition: freespace.c:432
#define FSM_BOTTOM_LEVEL
Definition: freespace.c:78
static uint8 fsm_space_avail_to_cat(Size avail)
Definition: freespace.c:392
static Size fsm_space_cat_to_avail(uint8 cat)
Definition: freespace.c:418
static BlockNumber fsm_logical_to_physical(FSMAddress addr)
Definition: freespace.c:455
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot)
Definition: freespace.c:517
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:154
static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot)
Definition: freespace.c:491
static Buffer fsm_extend(Relation rel, BlockNumber fsm_nblocks)
Definition: freespace.c:629
static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
Definition: freespace.c:554
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:358
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:244
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue)
Definition: freespace.c:646
#define FSM_ROOT_LEVEL
Definition: freespace.c:77
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:194
static BlockNumber fsm_search(Relation rel, uint8 min_cat)
Definition: freespace.c:678
void XLogRecordPageWithFreeSpace(RelFileLocator rlocator, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:211
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:137
static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot)
Definition: freespace.c:506
#define FSM_CAT_STEP
Definition: freespace.c:65
BlockNumber FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
Definition: freespace.c:275
#define FSM_TREE_DEPTH
Definition: freespace.c:75
FSMPageData * FSMPage
Definition: fsm_internals.h:45
#define SlotsPerFSMPage
Definition: fsm_internals.h:61
uint8 fsm_get_avail(Page page, int slot)
Definition: fsmpage.c:122
uint8 fsm_get_max_avail(Page page)
Definition: fsmpage.c:138
bool fsm_set_avail(Page page, int slot, uint8 value)
Definition: fsmpage.c:63
bool fsm_truncate_avail(Page page, int nslots)
Definition: fsmpage.c:313
int fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext, bool exclusive_lock_held)
Definition: fsmpage.c:158
return str start
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
static char * buf
Definition: pg_test_fsync.c:72
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
#define RelationNeedsWAL(relation)
Definition: rel.h:628
@ FSM_FORKNUM
Definition: relpath.h:59
@ MAIN_FORKNUM
Definition: relpath.h:58
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:677
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:401
int logpageno
Definition: freespace.c:87
int level
Definition: freespace.c:86
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237
Buffer XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode, Buffer recent_buffer)
Definition: xlogutils.c:471
bool InRecovery
Definition: xlogutils.c:50