PostgreSQL Source Code  git master
freespace.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * freespace.c
4  * POSTGRES free space map for quickly finding free space in relations
5  *
6  *
7  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/storage/freespace/freespace.c
12  *
13  *
14  * NOTES:
15  *
16  * Free Space Map keeps track of the amount of free space on pages, and
17  * allows quickly searching for a page with enough free space. The FSM is
18  * stored in a dedicated relation fork of all heap relations, and those
19  * index access methods that need it (see also indexfsm.c). See README for
20  * more information.
21  *
22  *-------------------------------------------------------------------------
23  */
24 #include "postgres.h"
25 
26 #include "access/htup_details.h"
27 #include "access/xloginsert.h"
28 #include "access/xlogutils.h"
29 #include "miscadmin.h"
30 #include "storage/freespace.h"
31 #include "storage/fsm_internals.h"
32 #include "storage/lmgr.h"
33 #include "storage/smgr.h"
34 
35 
36 /*
37  * We use just one byte to store the amount of free space on a page, so we
38  * divide the amount of free space a page can have into 256 different
39  * categories. The highest category, 255, represents a page with at least
40  * MaxFSMRequestSize bytes of free space, and the second highest category
41  * represents the range from 254 * FSM_CAT_STEP, inclusive, to
42  * MaxFSMRequestSize, exclusive.
43  *
44  * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
45  * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
46  * categories look like this:
47  *
48  *
49  * Range Category
50  * 0 - 31 0
51  * 32 - 63 1
52  * ... ... ...
53  * 8096 - 8127 253
54  * 8128 - 8163 254
55  * 8164 - 8192 255
56  *
57  * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
58  * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
59  * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
60  * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
61  * completely empty page, that would mean that we could never satisfy a
62  * request of exactly MaxFSMRequestSize bytes.
63  */
64 #define FSM_CATEGORIES 256
65 #define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES)
66 #define MaxFSMRequestSize MaxHeapTupleSize
67 
68 /*
69  * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
70  * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
71  * 256 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
72  * this means that 4096 bytes is the smallest BLCKSZ that we can get away
73  * with a 3-level tree, and 512 is the smallest we support.
74  */
75 #define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
76 
77 #define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
78 #define FSM_BOTTOM_LEVEL 0
79 
80 /*
81  * The internal FSM routines work on a logical addressing scheme. Each
82  * level of the tree can be thought of as a separately addressable file.
83  */
84 typedef struct
85 {
86  int level; /* level */
87  int logpageno; /* page number within the level */
88 } FSMAddress;
89 
90 /* Address of the root page. */
92 
93 /* functions to navigate the tree */
94 static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
95 static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
96 static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
99 
100 static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
101 static void fsm_extend(Relation rel, BlockNumber fsm_nblocks);
102 
103 /* functions to convert amount of free space to a FSM category */
104 static uint8 fsm_space_avail_to_cat(Size avail);
105 static uint8 fsm_space_needed_to_cat(Size needed);
106 static Size fsm_space_cat_to_avail(uint8 cat);
107 
108 /* workhorse functions for various operations */
109 static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
110  uint8 newValue, uint8 minValue);
111 static BlockNumber fsm_search(Relation rel, uint8 min_cat);
112 static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
113  BlockNumber start, BlockNumber end,
114  bool *eof_p);
115 
116 
117 /******** Public API ********/
118 
119 /*
120  * GetPageWithFreeSpace - try to find a page in the given relation with
121  * at least the specified amount of free space.
122  *
123  * If successful, return the block number; if not, return InvalidBlockNumber.
124  *
125  * The caller must be prepared for the possibility that the returned page
126  * will turn out to have too little space available by the time the caller
127  * gets a lock on it. In that case, the caller should report the actual
128  * amount of free space available on that page and then try again (see
129  * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
130  * extend the relation.
131  */
134 {
135  uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
136 
137  return fsm_search(rel, min_cat);
138 }
139 
140 /*
141  * RecordAndGetPageWithFreeSpace - update info about a page and try again.
142  *
143  * We provide this combo form to save some locking overhead, compared to
144  * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
145  * also some effort to return a page close to the old page; if there's a
146  * page with enough free space on the same FSM page where the old one page
147  * is located, it is preferred.
148  */
151  Size oldSpaceAvail, Size spaceNeeded)
152 {
153  int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
154  int search_cat = fsm_space_needed_to_cat(spaceNeeded);
155  FSMAddress addr;
156  uint16 slot;
157  int search_slot;
158 
159  /* Get the location of the FSM byte representing the heap block */
160  addr = fsm_get_location(oldPage, &slot);
161 
162  search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
163 
164  /*
165  * If fsm_set_and_search found a suitable new block, return that.
166  * Otherwise, search as usual.
167  */
168  if (search_slot != -1)
169  return fsm_get_heap_blk(addr, search_slot);
170  else
171  return fsm_search(rel, search_cat);
172 }
173 
174 /*
175  * RecordPageWithFreeSpace - update info about a page.
176  *
177  * Note that if the new spaceAvail value is higher than the old value stored
178  * in the FSM, the space might not become visible to searchers until the next
179  * FreeSpaceMapVacuum call, which updates the upper level pages.
180  */
181 void
183 {
184  int new_cat = fsm_space_avail_to_cat(spaceAvail);
185  FSMAddress addr;
186  uint16 slot;
187 
188  /* Get the location of the FSM byte representing the heap block */
189  addr = fsm_get_location(heapBlk, &slot);
190 
191  fsm_set_and_search(rel, addr, slot, new_cat, 0);
192 }
193 
194 /*
195  * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
196  * WAL replay
197  */
198 void
200  Size spaceAvail)
201 {
202  int new_cat = fsm_space_avail_to_cat(spaceAvail);
203  FSMAddress addr;
204  uint16 slot;
205  BlockNumber blkno;
206  Buffer buf;
207  Page page;
208 
209  /* Get the location of the FSM byte representing the heap block */
210  addr = fsm_get_location(heapBlk, &slot);
211  blkno = fsm_logical_to_physical(addr);
212 
213  /* If the page doesn't exist already, extend */
214  buf = XLogReadBufferExtended(rlocator, FSM_FORKNUM, blkno,
217 
218  page = BufferGetPage(buf);
219  if (PageIsNew(page))
220  PageInit(page, BLCKSZ, 0);
221 
222  if (fsm_set_avail(page, slot, new_cat))
223  MarkBufferDirtyHint(buf, false);
225 }
226 
227 /*
228  * GetRecordedFreeSpace - return the amount of free space on a particular page,
229  * according to the FSM.
230  */
231 Size
233 {
234  FSMAddress addr;
235  uint16 slot;
236  Buffer buf;
237  uint8 cat;
238 
239  /* Get the location of the FSM byte representing the heap block */
240  addr = fsm_get_location(heapBlk, &slot);
241 
242  buf = fsm_readbuf(rel, addr, false);
243  if (!BufferIsValid(buf))
244  return 0;
245  cat = fsm_get_avail(BufferGetPage(buf), slot);
247 
248  return fsm_space_cat_to_avail(cat);
249 }
250 
251 /*
252  * FreeSpaceMapPrepareTruncateRel - prepare for truncation of a relation.
253  *
254  * nblocks is the new size of the heap.
255  *
256  * Return the number of blocks of new FSM.
257  * If it's InvalidBlockNumber, there is nothing to truncate;
258  * otherwise the caller is responsible for calling smgrtruncate()
259  * to truncate the FSM pages, and FreeSpaceMapVacuumRange()
260  * to update upper-level pages in the FSM.
261  */
264 {
265  BlockNumber new_nfsmblocks;
266  FSMAddress first_removed_address;
267  uint16 first_removed_slot;
268  Buffer buf;
269 
270  /*
271  * If no FSM has been created yet for this relation, there's nothing to
272  * truncate.
273  */
275  return InvalidBlockNumber;
276 
277  /* Get the location in the FSM of the first removed heap block */
278  first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
279 
280  /*
281  * Zero out the tail of the last remaining FSM page. If the slot
282  * representing the first removed heap block is at a page boundary, as the
283  * first slot on the FSM page that first_removed_address points to, we can
284  * just truncate that page altogether.
285  */
286  if (first_removed_slot > 0)
287  {
288  buf = fsm_readbuf(rel, first_removed_address, false);
289  if (!BufferIsValid(buf))
290  return InvalidBlockNumber; /* nothing to do; the FSM was already
291  * smaller */
293 
294  /* NO EREPORT(ERROR) from here till changes are logged */
296 
297  fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
298 
299  /*
300  * Truncation of a relation is WAL-logged at a higher-level, and we
301  * will be called at WAL replay. But if checksums are enabled, we need
302  * to still write a WAL record to protect against a torn page, if the
303  * page is flushed to disk before the truncation WAL record. We cannot
304  * use MarkBufferDirtyHint here, because that will not dirty the page
305  * during recovery.
306  */
309  log_newpage_buffer(buf, false);
310 
312 
314 
315  new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
316  }
317  else
318  {
319  new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
320  if (smgrnblocks(RelationGetSmgr(rel), FSM_FORKNUM) <= new_nfsmblocks)
321  return InvalidBlockNumber; /* nothing to do; the FSM was already
322  * smaller */
323  }
324 
325  return new_nfsmblocks;
326 }
327 
328 /*
329  * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM
330  *
331  * We assume that the bottom-level pages have already been updated with
332  * new free-space information.
333  */
334 void
336 {
337  bool dummy;
338 
339  /* Recursively scan the tree, starting at the root */
340  (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS,
342  &dummy);
343 }
344 
345 /*
346  * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM
347  *
348  * As above, but assume that only heap pages between start and end-1 inclusive
349  * have new free-space information, so update only the upper-level slots
350  * covering that block range. end == InvalidBlockNumber is equivalent to
351  * "all the rest of the relation".
352  */
353 void
355 {
356  bool dummy;
357 
358  /* Recursively scan the tree, starting at the root */
359  if (end > start)
360  (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, start, end, &dummy);
361 }
362 
363 /******** Internal routines ********/
364 
365 /*
366  * Return category corresponding x bytes of free space
367  */
368 static uint8
370 {
371  int cat;
372 
373  Assert(avail < BLCKSZ);
374 
375  if (avail >= MaxFSMRequestSize)
376  return 255;
377 
378  cat = avail / FSM_CAT_STEP;
379 
380  /*
381  * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
382  * more.
383  */
384  if (cat > 254)
385  cat = 254;
386 
387  return (uint8) cat;
388 }
389 
390 /*
391  * Return the lower bound of the range of free space represented by given
392  * category.
393  */
394 static Size
396 {
397  /* The highest category represents exactly MaxFSMRequestSize bytes. */
398  if (cat == 255)
399  return MaxFSMRequestSize;
400  else
401  return cat * FSM_CAT_STEP;
402 }
403 
404 /*
405  * Which category does a page need to have, to accommodate x bytes of data?
406  * While fsm_space_avail_to_cat() rounds down, this needs to round up.
407  */
408 static uint8
410 {
411  int cat;
412 
413  /* Can't ask for more space than the highest category represents */
414  if (needed > MaxFSMRequestSize)
415  elog(ERROR, "invalid FSM request size %zu", needed);
416 
417  if (needed == 0)
418  return 1;
419 
420  cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
421 
422  if (cat > 255)
423  cat = 255;
424 
425  return (uint8) cat;
426 }
427 
428 /*
429  * Returns the physical block number of a FSM page
430  */
431 static BlockNumber
433 {
434  BlockNumber pages;
435  int leafno;
436  int l;
437 
438  /*
439  * Calculate the logical page number of the first leaf page below the
440  * given page.
441  */
442  leafno = addr.logpageno;
443  for (l = 0; l < addr.level; l++)
444  leafno *= SlotsPerFSMPage;
445 
446  /* Count upper level nodes required to address the leaf page */
447  pages = 0;
448  for (l = 0; l < FSM_TREE_DEPTH; l++)
449  {
450  pages += leafno + 1;
451  leafno /= SlotsPerFSMPage;
452  }
453 
454  /*
455  * If the page we were asked for wasn't at the bottom level, subtract the
456  * additional lower level pages we counted above.
457  */
458  pages -= addr.level;
459 
460  /* Turn the page count into 0-based block number */
461  return pages - 1;
462 }
463 
464 /*
465  * Return the FSM location corresponding to given heap block.
466  */
467 static FSMAddress
469 {
470  FSMAddress addr;
471 
472  addr.level = FSM_BOTTOM_LEVEL;
473  addr.logpageno = heapblk / SlotsPerFSMPage;
474  *slot = heapblk % SlotsPerFSMPage;
475 
476  return addr;
477 }
478 
479 /*
480  * Return the heap block number corresponding to given location in the FSM.
481  */
482 static BlockNumber
484 {
485  Assert(addr.level == FSM_BOTTOM_LEVEL);
486  return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
487 }
488 
489 /*
490  * Given a logical address of a child page, get the logical page number of
491  * the parent, and the slot within the parent corresponding to the child.
492  */
493 static FSMAddress
495 {
496  FSMAddress parent;
497 
498  Assert(child.level < FSM_ROOT_LEVEL);
499 
500  parent.level = child.level + 1;
501  parent.logpageno = child.logpageno / SlotsPerFSMPage;
502  *slot = child.logpageno % SlotsPerFSMPage;
503 
504  return parent;
505 }
506 
507 /*
508  * Given a logical address of a parent page and a slot number, get the
509  * logical address of the corresponding child page.
510  */
511 static FSMAddress
513 {
514  FSMAddress child;
515 
516  Assert(parent.level > FSM_BOTTOM_LEVEL);
517 
518  child.level = parent.level - 1;
519  child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
520 
521  return child;
522 }
523 
524 /*
525  * Read a FSM page.
526  *
527  * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
528  * true, the FSM file is extended.
529  */
530 static Buffer
531 fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
532 {
533  BlockNumber blkno = fsm_logical_to_physical(addr);
534  Buffer buf;
535  SMgrRelation reln;
536 
537  /*
538  * Caution: re-using this smgr pointer could fail if the relcache entry
539  * gets closed. It's safe as long as we only do smgr-level operations
540  * between here and the last use of the pointer.
541  */
542  reln = RelationGetSmgr(rel);
543 
544  /*
545  * If we haven't cached the size of the FSM yet, check it first. Also
546  * recheck if the requested block seems to be past end, since our cached
547  * value might be stale. (We send smgr inval messages on truncation, but
548  * not on extension.)
549  */
551  blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
552  {
553  /* Invalidate the cache so smgrnblocks asks the kernel. */
555  if (smgrexists(reln, FSM_FORKNUM))
556  smgrnblocks(reln, FSM_FORKNUM);
557  else
558  reln->smgr_cached_nblocks[FSM_FORKNUM] = 0;
559  }
560 
561  /* Handle requests beyond EOF */
562  if (blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
563  {
564  if (extend)
565  fsm_extend(rel, blkno + 1);
566  else
567  return InvalidBuffer;
568  }
569 
570  /*
571  * Use ZERO_ON_ERROR mode, and initialize the page if necessary. The FSM
572  * information is not accurate anyway, so it's better to clear corrupt
573  * pages than error out. Since the FSM changes are not WAL-logged, the
574  * so-called torn page problem on crash can lead to pages with corrupt
575  * headers, for example.
576  *
577  * The initialize-the-page part is trickier than it looks, because of the
578  * possibility of multiple backends doing this concurrently, and our
579  * desire to not uselessly take the buffer lock in the normal path where
580  * the page is OK. We must take the lock to initialize the page, so
581  * recheck page newness after we have the lock, in case someone else
582  * already did it. Also, because we initially check PageIsNew with no
583  * lock, it's possible to fall through and return the buffer while someone
584  * else is still initializing the page (i.e., we might see pd_upper as set
585  * but other page header fields are still zeroes). This is harmless for
586  * callers that will take a buffer lock themselves, but some callers
587  * inspect the page without any lock at all. The latter is OK only so
588  * long as it doesn't depend on the page header having correct contents.
589  * Current usage is safe because PageGetContents() does not require that.
590  */
591  buf = ReadBufferExtended(rel, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL);
593  {
596  PageInit(BufferGetPage(buf), BLCKSZ, 0);
598  }
599  return buf;
600 }
601 
602 /*
603  * Ensure that the FSM fork is at least fsm_nblocks long, extending
604  * it if necessary with empty pages. And by empty, I mean pages filled
605  * with zeros, meaning there's no free space.
606  */
607 static void
608 fsm_extend(Relation rel, BlockNumber fsm_nblocks)
609 {
610  BlockNumber fsm_nblocks_now;
611  PGAlignedBlock pg;
612  SMgrRelation reln;
613 
614  PageInit((Page) pg.data, BLCKSZ, 0);
615 
616  /*
617  * We use the relation extension lock to lock out other backends trying to
618  * extend the FSM at the same time. It also locks out extension of the
619  * main fork, unnecessarily, but extending the FSM happens seldom enough
620  * that it doesn't seem worthwhile to have a separate lock tag type for
621  * it.
622  *
623  * Note that another backend might have extended or created the relation
624  * by the time we get the lock.
625  */
627 
628  /*
629  * Caution: re-using this smgr pointer could fail if the relcache entry
630  * gets closed. It's safe as long as we only do smgr-level operations
631  * between here and the last use of the pointer.
632  */
633  reln = RelationGetSmgr(rel);
634 
635  /*
636  * Create the FSM file first if it doesn't exist. If
637  * smgr_cached_nblocks[FSM_FORKNUM] is positive then it must exist, no
638  * need for an smgrexists call.
639  */
640  if ((reln->smgr_cached_nblocks[FSM_FORKNUM] == 0 ||
642  !smgrexists(reln, FSM_FORKNUM))
643  smgrcreate(reln, FSM_FORKNUM, false);
644 
645  /* Invalidate cache so that smgrnblocks() asks the kernel. */
647  fsm_nblocks_now = smgrnblocks(reln, FSM_FORKNUM);
648 
649  /* Extend as needed. */
650  while (fsm_nblocks_now < fsm_nblocks)
651  {
652  PageSetChecksumInplace((Page) pg.data, fsm_nblocks_now);
653 
654  smgrextend(reln, FSM_FORKNUM, fsm_nblocks_now,
655  pg.data, false);
656  fsm_nblocks_now++;
657  }
658 
660 }
661 
662 /*
663  * Set value in given FSM page and slot.
664  *
665  * If minValue > 0, the updated page is also searched for a page with at
666  * least minValue of free space. If one is found, its slot number is
667  * returned, -1 otherwise.
668  */
669 static int
671  uint8 newValue, uint8 minValue)
672 {
673  Buffer buf;
674  Page page;
675  int newslot = -1;
676 
677  buf = fsm_readbuf(rel, addr, true);
679 
680  page = BufferGetPage(buf);
681 
682  if (fsm_set_avail(page, slot, newValue))
683  MarkBufferDirtyHint(buf, false);
684 
685  if (minValue != 0)
686  {
687  /* Search while we still hold the lock */
688  newslot = fsm_search_avail(buf, minValue,
689  addr.level == FSM_BOTTOM_LEVEL,
690  true);
691  }
692 
694 
695  return newslot;
696 }
697 
698 /*
699  * Search the tree for a heap page with at least min_cat of free space
700  */
701 static BlockNumber
702 fsm_search(Relation rel, uint8 min_cat)
703 {
704  int restarts = 0;
706 
707  for (;;)
708  {
709  int slot;
710  Buffer buf;
711  uint8 max_avail = 0;
712 
713  /* Read the FSM page. */
714  buf = fsm_readbuf(rel, addr, false);
715 
716  /* Search within the page */
717  if (BufferIsValid(buf))
718  {
720  slot = fsm_search_avail(buf, min_cat,
721  (addr.level == FSM_BOTTOM_LEVEL),
722  false);
723  if (slot == -1)
724  max_avail = fsm_get_max_avail(BufferGetPage(buf));
726  }
727  else
728  slot = -1;
729 
730  if (slot != -1)
731  {
732  /*
733  * Descend the tree, or return the found block if we're at the
734  * bottom.
735  */
736  if (addr.level == FSM_BOTTOM_LEVEL)
737  return fsm_get_heap_blk(addr, slot);
738 
739  addr = fsm_get_child(addr, slot);
740  }
741  else if (addr.level == FSM_ROOT_LEVEL)
742  {
743  /*
744  * At the root, failure means there's no page with enough free
745  * space in the FSM. Give up.
746  */
747  return InvalidBlockNumber;
748  }
749  else
750  {
751  uint16 parentslot;
752  FSMAddress parent;
753 
754  /*
755  * At lower level, failure can happen if the value in the upper-
756  * level node didn't reflect the value on the lower page. Update
757  * the upper node, to avoid falling into the same trap again, and
758  * start over.
759  *
760  * There's a race condition here, if another backend updates this
761  * page right after we release it, and gets the lock on the parent
762  * page before us. We'll then update the parent page with the now
763  * stale information we had. It's OK, because it should happen
764  * rarely, and will be fixed by the next vacuum.
765  */
766  parent = fsm_get_parent(addr, &parentslot);
767  fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
768 
769  /*
770  * If the upper pages are badly out of date, we might need to loop
771  * quite a few times, updating them as we go. Any inconsistencies
772  * should eventually be corrected and the loop should end. Looping
773  * indefinitely is nevertheless scary, so provide an emergency
774  * valve.
775  */
776  if (restarts++ > 10000)
777  return InvalidBlockNumber;
778 
779  /* Start search all over from the root */
780  addr = FSM_ROOT_ADDRESS;
781  }
782  }
783 }
784 
785 
786 /*
787  * Recursive guts of FreeSpaceMapVacuum
788  *
789  * Examine the FSM page indicated by addr, as well as its children, updating
790  * upper-level nodes that cover the heap block range from start to end-1.
791  * (It's okay if end is beyond the actual end of the map.)
792  * Return the maximum freespace value on this page.
793  *
794  * If addr is past the end of the FSM, set *eof_p to true and return 0.
795  *
796  * This traverses the tree in depth-first order. The tree is stored
797  * physically in depth-first order, so this should be pretty I/O efficient.
798  */
799 static uint8
801  BlockNumber start, BlockNumber end,
802  bool *eof_p)
803 {
804  Buffer buf;
805  Page page;
806  uint8 max_avail;
807 
808  /* Read the page if it exists, or return EOF */
809  buf = fsm_readbuf(rel, addr, false);
810  if (!BufferIsValid(buf))
811  {
812  *eof_p = true;
813  return 0;
814  }
815  else
816  *eof_p = false;
817 
818  page = BufferGetPage(buf);
819 
820  /*
821  * If we're above the bottom level, recurse into children, and fix the
822  * information stored about them at this level.
823  */
824  if (addr.level > FSM_BOTTOM_LEVEL)
825  {
826  FSMAddress fsm_start,
827  fsm_end;
828  uint16 fsm_start_slot,
829  fsm_end_slot;
830  int slot,
831  start_slot,
832  end_slot;
833  bool eof = false;
834 
835  /*
836  * Compute the range of slots we need to update on this page, given
837  * the requested range of heap blocks to consider. The first slot to
838  * update is the one covering the "start" block, and the last slot is
839  * the one covering "end - 1". (Some of this work will be duplicated
840  * in each recursive call, but it's cheap enough to not worry about.)
841  */
842  fsm_start = fsm_get_location(start, &fsm_start_slot);
843  fsm_end = fsm_get_location(end - 1, &fsm_end_slot);
844 
845  while (fsm_start.level < addr.level)
846  {
847  fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot);
848  fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot);
849  }
850  Assert(fsm_start.level == addr.level);
851 
852  if (fsm_start.logpageno == addr.logpageno)
853  start_slot = fsm_start_slot;
854  else if (fsm_start.logpageno > addr.logpageno)
855  start_slot = SlotsPerFSMPage; /* shouldn't get here... */
856  else
857  start_slot = 0;
858 
859  if (fsm_end.logpageno == addr.logpageno)
860  end_slot = fsm_end_slot;
861  else if (fsm_end.logpageno > addr.logpageno)
862  end_slot = SlotsPerFSMPage - 1;
863  else
864  end_slot = -1; /* shouldn't get here... */
865 
866  for (slot = start_slot; slot <= end_slot; slot++)
867  {
868  int child_avail;
869 
871 
872  /* After we hit end-of-file, just clear the rest of the slots */
873  if (!eof)
874  child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot),
875  start, end,
876  &eof);
877  else
878  child_avail = 0;
879 
880  /* Update information about the child */
881  if (fsm_get_avail(page, slot) != child_avail)
882  {
884  fsm_set_avail(page, slot, child_avail);
885  MarkBufferDirtyHint(buf, false);
887  }
888  }
889  }
890 
891  /* Now get the maximum value on the page, to return to caller */
892  max_avail = fsm_get_max_avail(page);
893 
894  /*
895  * Reset the next slot pointer. This encourages the use of low-numbered
896  * pages, increasing the chances that a later vacuum can truncate the
897  * relation. We don't bother with a lock here, nor with marking the page
898  * dirty if it wasn't already, since this is just a hint.
899  */
900  ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
901 
903 
904  return max_avail;
905 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3931
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3954
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1583
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4172
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4001
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:759
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:105
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:106
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:280
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:107
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:44
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:228
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1539
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
static char * PageGetContents(Page page)
Definition: bufpage.h:254
Pointer Page
Definition: bufpage.h:78
static bool PageIsNew(Page page)
Definition: bufpage.h:230
unsigned short uint16
Definition: c.h:441
unsigned char uint8
Definition: c.h:440
size_t Size
Definition: c.h:541
#define ERROR
Definition: elog.h:39
#define MaxFSMRequestSize
Definition: freespace.c:66
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, BlockNumber start, BlockNumber end, bool *eof_p)
Definition: freespace.c:800
static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot)
Definition: freespace.c:512
static const FSMAddress FSM_ROOT_ADDRESS
Definition: freespace.c:91
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
static uint8 fsm_space_needed_to_cat(Size needed)
Definition: freespace.c:409
#define FSM_BOTTOM_LEVEL
Definition: freespace.c:78
static uint8 fsm_space_avail_to_cat(Size avail)
Definition: freespace.c:369
static void fsm_extend(Relation rel, BlockNumber fsm_nblocks)
Definition: freespace.c:608
static Size fsm_space_cat_to_avail(uint8 cat)
Definition: freespace.c:395
static BlockNumber fsm_logical_to_physical(FSMAddress addr)
Definition: freespace.c:432
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot)
Definition: freespace.c:494
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:150
static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot)
Definition: freespace.c:468
static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
Definition: freespace.c:531
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:335
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:232
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue)
Definition: freespace.c:670
#define FSM_ROOT_LEVEL
Definition: freespace.c:77
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:182
static BlockNumber fsm_search(Relation rel, uint8 min_cat)
Definition: freespace.c:702
void XLogRecordPageWithFreeSpace(RelFileLocator rlocator, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:199
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:133
static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot)
Definition: freespace.c:483
#define FSM_CAT_STEP
Definition: freespace.c:65
BlockNumber FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
Definition: freespace.c:263
#define FSM_TREE_DEPTH
Definition: freespace.c:75
FSMPageData * FSMPage
Definition: fsm_internals.h:45
#define SlotsPerFSMPage
Definition: fsm_internals.h:61
uint8 fsm_get_avail(Page page, int slot)
Definition: fsmpage.c:122
uint8 fsm_get_max_avail(Page page)
Definition: fsmpage.c:138
bool fsm_set_avail(Page page, int slot, uint8 value)
Definition: fsmpage.c:63
bool fsm_truncate_avail(Page page, int nslots)
Definition: fsmpage.c:313
int fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext, bool exclusive_lock_held)
Definition: fsmpage.c:158
Assert(fmt[strlen(fmt) - 1] !='\n')
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:431
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:481
#define ExclusiveLock
Definition: lockdefs.h:42
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
static char * buf
Definition: pg_test_fsync.c:67
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:569
#define RelationNeedsWAL(relation)
Definition: rel.h:626
@ FSM_FORKNUM
Definition: relpath.h:51
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:579
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:493
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:369
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
int logpageno
Definition: freespace.c:87
int level
Definition: freespace.c:86
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:54
char data[BLCKSZ]
Definition: c.h:1079
#define XLogHintBitIsNeeded()
Definition: xlog.h:115
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1191
Buffer XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode, Buffer recent_buffer)
Definition: xlogutils.c:474
bool InRecovery
Definition: xlogutils.c:53