PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
freespace.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * freespace.c
4  * POSTGRES free space map for quickly finding free space in relations
5  *
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/storage/freespace/freespace.c
12  *
13  *
14  * NOTES:
15  *
16  * Free Space Map keeps track of the amount of free space on pages, and
17  * allows quickly searching for a page with enough free space. The FSM is
18  * stored in a dedicated relation fork of all heap relations, and those
19  * index access methods that need it (see also indexfsm.c). See README for
20  * more information.
21  *
22  *-------------------------------------------------------------------------
23  */
24 #include "postgres.h"
25 
26 #include "access/htup_details.h"
27 #include "access/xlogutils.h"
28 #include "miscadmin.h"
29 #include "storage/freespace.h"
30 #include "storage/fsm_internals.h"
31 #include "storage/lmgr.h"
32 #include "storage/smgr.h"
33 
34 
35 /*
36  * We use just one byte to store the amount of free space on a page, so we
37  * divide the amount of free space a page can have into 256 different
38  * categories. The highest category, 255, represents a page with at least
39  * MaxFSMRequestSize bytes of free space, and the second highest category
40  * represents the range from 254 * FSM_CAT_STEP, inclusive, to
41  * MaxFSMRequestSize, exclusive.
42  *
43  * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
44  * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
45  * categories look like this:
46  *
47  *
48  * Range Category
49  * 0 - 31 0
50  * 32 - 63 1
51  * ... ... ...
52  * 8096 - 8127 253
53  * 8128 - 8163 254
54  * 8164 - 8192 255
55  *
56  * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
57  * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
58  * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
59  * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
60  * completely empty page, that would mean that we could never satisfy a
61  * request of exactly MaxFSMRequestSize bytes.
62  */
63 #define FSM_CATEGORIES 256
64 #define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES)
65 #define MaxFSMRequestSize MaxHeapTupleSize
66 
67 /*
68  * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
69  * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
70  * 216 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
71  * this means that 4096 bytes is the smallest BLCKSZ that we can get away
72  * with a 3-level tree, and 512 is the smallest we support.
73  */
74 #define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
75 
76 #define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
77 #define FSM_BOTTOM_LEVEL 0
78 
79 /*
80  * The internal FSM routines work on a logical addressing scheme. Each
81  * level of the tree can be thought of as a separately addressable file.
82  */
83 typedef struct
84 {
85  int level; /* level */
86  int logpageno; /* page number within the level */
87 } FSMAddress;
88 
89 /* Address of the root page. */
91 
92 /* functions to navigate the tree */
93 static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
94 static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
95 static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
98 
99 static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
100 static void fsm_extend(Relation rel, BlockNumber fsm_nblocks);
101 
102 /* functions to convert amount of free space to a FSM category */
103 static uint8 fsm_space_avail_to_cat(Size avail);
104 static uint8 fsm_space_needed_to_cat(Size needed);
105 static Size fsm_space_cat_to_avail(uint8 cat);
106 
107 /* workhorse functions for various operations */
108 static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
109  uint8 newValue, uint8 minValue);
110 static BlockNumber fsm_search(Relation rel, uint8 min_cat);
111 static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof);
113 static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat);
114 
115 
116 /******** Public API ********/
117 
118 /*
119  * GetPageWithFreeSpace - try to find a page in the given relation with
120  * at least the specified amount of free space.
121  *
122  * If successful, return the block number; if not, return InvalidBlockNumber.
123  *
124  * The caller must be prepared for the possibility that the returned page
125  * will turn out to have too little space available by the time the caller
126  * gets a lock on it. In that case, the caller should report the actual
127  * amount of free space available on that page and then try again (see
128  * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
129  * extend the relation.
130  */
133 {
134  uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
135 
136  return fsm_search(rel, min_cat);
137 }
138 
139 /*
140  * RecordAndGetPageWithFreeSpace - update info about a page and try again.
141  *
142  * We provide this combo form to save some locking overhead, compared to
143  * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
144  * also some effort to return a page close to the old page; if there's a
145  * page with enough free space on the same FSM page where the old one page
146  * is located, it is preferred.
147  */
150  Size oldSpaceAvail, Size spaceNeeded)
151 {
152  int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
153  int search_cat = fsm_space_needed_to_cat(spaceNeeded);
154  FSMAddress addr;
155  uint16 slot;
156  int search_slot;
157 
158  /* Get the location of the FSM byte representing the heap block */
159  addr = fsm_get_location(oldPage, &slot);
160 
161  search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
162 
163  /*
164  * If fsm_set_and_search found a suitable new block, return that.
165  * Otherwise, search as usual.
166  */
167  if (search_slot != -1)
168  return fsm_get_heap_blk(addr, search_slot);
169  else
170  return fsm_search(rel, search_cat);
171 }
172 
173 /*
174  * RecordPageWithFreeSpace - update info about a page.
175  *
176  * Note that if the new spaceAvail value is higher than the old value stored
177  * in the FSM, the space might not become visible to searchers until the next
178  * FreeSpaceMapVacuum call, which updates the upper level pages.
179  */
180 void
182 {
183  int new_cat = fsm_space_avail_to_cat(spaceAvail);
184  FSMAddress addr;
185  uint16 slot;
186 
187  /* Get the location of the FSM byte representing the heap block */
188  addr = fsm_get_location(heapBlk, &slot);
189 
190  fsm_set_and_search(rel, addr, slot, new_cat, 0);
191 }
192 
193 /*
194  * Update the upper levels of the free space map all the way up to the root
195  * to make sure we don't lose track of new blocks we just inserted. This is
196  * intended to be used after adding many new blocks to the relation; we judge
197  * it not worth updating the upper levels of the tree every time data for
198  * a single page changes, but for a bulk-extend it's worth it.
199  */
200 void
202  BlockNumber endBlkNum, Size freespace)
203 {
204  int new_cat = fsm_space_avail_to_cat(freespace);
205  FSMAddress addr;
206  uint16 slot;
207  BlockNumber blockNum;
208  BlockNumber lastBlkOnPage;
209 
210  blockNum = startBlkNum;
211 
212  while (blockNum <= endBlkNum)
213  {
214  /*
215  * Find FSM address for this block; update tree all the way to the
216  * root.
217  */
218  addr = fsm_get_location(blockNum, &slot);
219  fsm_update_recursive(rel, addr, new_cat);
220 
221  /*
222  * Get the last block number on this FSM page. If that's greater than
223  * or equal to our endBlkNum, we're done. Otherwise, advance to the
224  * first block on the next page.
225  */
226  lastBlkOnPage = fsm_get_lastblckno(rel, addr);
227  if (lastBlkOnPage >= endBlkNum)
228  break;
229  blockNum = lastBlkOnPage + 1;
230  }
231 }
232 
233 /*
234  * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
235  * WAL replay
236  */
237 void
239  Size spaceAvail)
240 {
241  int new_cat = fsm_space_avail_to_cat(spaceAvail);
242  FSMAddress addr;
243  uint16 slot;
244  BlockNumber blkno;
245  Buffer buf;
246  Page page;
247 
248  /* Get the location of the FSM byte representing the heap block */
249  addr = fsm_get_location(heapBlk, &slot);
250  blkno = fsm_logical_to_physical(addr);
251 
252  /* If the page doesn't exist already, extend */
255 
256  page = BufferGetPage(buf);
257  if (PageIsNew(page))
258  PageInit(page, BLCKSZ, 0);
259 
260  if (fsm_set_avail(page, slot, new_cat))
261  MarkBufferDirtyHint(buf, false);
262  UnlockReleaseBuffer(buf);
263 }
264 
265 /*
266  * GetRecordedFreePage - return the amount of free space on a particular page,
267  * according to the FSM.
268  */
269 Size
271 {
272  FSMAddress addr;
273  uint16 slot;
274  Buffer buf;
275  uint8 cat;
276 
277  /* Get the location of the FSM byte representing the heap block */
278  addr = fsm_get_location(heapBlk, &slot);
279 
280  buf = fsm_readbuf(rel, addr, false);
281  if (!BufferIsValid(buf))
282  return 0;
283  cat = fsm_get_avail(BufferGetPage(buf), slot);
284  ReleaseBuffer(buf);
285 
286  return fsm_space_cat_to_avail(cat);
287 }
288 
289 /*
290  * FreeSpaceMapTruncateRel - adjust for truncation of a relation.
291  *
292  * The caller must hold AccessExclusiveLock on the relation, to ensure that
293  * other backends receive the smgr invalidation event that this function sends
294  * before they access the FSM again.
295  *
296  * nblocks is the new size of the heap.
297  */
298 void
300 {
301  BlockNumber new_nfsmblocks;
302  FSMAddress first_removed_address;
303  uint16 first_removed_slot;
304  Buffer buf;
305 
306  RelationOpenSmgr(rel);
307 
308  /*
309  * If no FSM has been created yet for this relation, there's nothing to
310  * truncate.
311  */
312  if (!smgrexists(rel->rd_smgr, FSM_FORKNUM))
313  return;
314 
315  /* Get the location in the FSM of the first removed heap block */
316  first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
317 
318  /*
319  * Zero out the tail of the last remaining FSM page. If the slot
320  * representing the first removed heap block is at a page boundary, as the
321  * first slot on the FSM page that first_removed_address points to, we can
322  * just truncate that page altogether.
323  */
324  if (first_removed_slot > 0)
325  {
326  buf = fsm_readbuf(rel, first_removed_address, false);
327  if (!BufferIsValid(buf))
328  return; /* nothing to do; the FSM was already smaller */
330 
331  /* NO EREPORT(ERROR) from here till changes are logged */
333 
334  fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
335 
336  /*
337  * Truncation of a relation is WAL-logged at a higher-level, and we
338  * will be called at WAL replay. But if checksums are enabled, we need
339  * to still write a WAL record to protect against a torn page, if the
340  * page is flushed to disk before the truncation WAL record. We cannot
341  * use MarkBufferDirtyHint here, because that will not dirty the page
342  * during recovery.
343  */
344  MarkBufferDirty(buf);
346  log_newpage_buffer(buf, false);
347 
349 
350  UnlockReleaseBuffer(buf);
351 
352  new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
353  }
354  else
355  {
356  new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
357  if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks)
358  return; /* nothing to do; the FSM was already smaller */
359  }
360 
361  /* Truncate the unused FSM pages, and send smgr inval message */
362  smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks);
363 
364  /*
365  * We might as well update the local smgr_fsm_nblocks setting.
366  * smgrtruncate sent an smgr cache inval message, which will cause other
367  * backends to invalidate their copy of smgr_fsm_nblocks, and this one too
368  * at the next command boundary. But this ensures it isn't outright wrong
369  * until then.
370  */
371  if (rel->rd_smgr)
372  rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks;
373 }
374 
375 /*
376  * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM
377  */
378 void
380 {
381  bool dummy;
382 
383  /*
384  * Traverse the tree in depth-first order. The tree is stored physically
385  * in depth-first order, so this should be pretty I/O efficient.
386  */
387  fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy);
388 }
389 
390 /******** Internal routines ********/
391 
392 /*
393  * Return category corresponding x bytes of free space
394  */
395 static uint8
397 {
398  int cat;
399 
400  Assert(avail < BLCKSZ);
401 
402  if (avail >= MaxFSMRequestSize)
403  return 255;
404 
405  cat = avail / FSM_CAT_STEP;
406 
407  /*
408  * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
409  * more.
410  */
411  if (cat > 254)
412  cat = 254;
413 
414  return (uint8) cat;
415 }
416 
417 /*
418  * Return the lower bound of the range of free space represented by given
419  * category.
420  */
421 static Size
423 {
424  /* The highest category represents exactly MaxFSMRequestSize bytes. */
425  if (cat == 255)
426  return MaxFSMRequestSize;
427  else
428  return cat * FSM_CAT_STEP;
429 }
430 
431 /*
432  * Which category does a page need to have, to accommodate x bytes of data?
433  * While fsm_size_to_avail_cat() rounds down, this needs to round up.
434  */
435 static uint8
437 {
438  int cat;
439 
440  /* Can't ask for more space than the highest category represents */
441  if (needed > MaxFSMRequestSize)
442  elog(ERROR, "invalid FSM request size %zu", needed);
443 
444  if (needed == 0)
445  return 1;
446 
447  cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
448 
449  if (cat > 255)
450  cat = 255;
451 
452  return (uint8) cat;
453 }
454 
455 /*
456  * Returns the physical block number of a FSM page
457  */
458 static BlockNumber
460 {
461  BlockNumber pages;
462  int leafno;
463  int l;
464 
465  /*
466  * Calculate the logical page number of the first leaf page below the
467  * given page.
468  */
469  leafno = addr.logpageno;
470  for (l = 0; l < addr.level; l++)
471  leafno *= SlotsPerFSMPage;
472 
473  /* Count upper level nodes required to address the leaf page */
474  pages = 0;
475  for (l = 0; l < FSM_TREE_DEPTH; l++)
476  {
477  pages += leafno + 1;
478  leafno /= SlotsPerFSMPage;
479  }
480 
481  /*
482  * If the page we were asked for wasn't at the bottom level, subtract the
483  * additional lower level pages we counted above.
484  */
485  pages -= addr.level;
486 
487  /* Turn the page count into 0-based block number */
488  return pages - 1;
489 }
490 
491 /*
492  * Return the FSM location corresponding to given heap block.
493  */
494 static FSMAddress
496 {
497  FSMAddress addr;
498 
499  addr.level = FSM_BOTTOM_LEVEL;
500  addr.logpageno = heapblk / SlotsPerFSMPage;
501  *slot = heapblk % SlotsPerFSMPage;
502 
503  return addr;
504 }
505 
506 /*
507  * Return the heap block number corresponding to given location in the FSM.
508  */
509 static BlockNumber
511 {
512  Assert(addr.level == FSM_BOTTOM_LEVEL);
513  return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
514 }
515 
516 /*
517  * Given a logical address of a child page, get the logical page number of
518  * the parent, and the slot within the parent corresponding to the child.
519  */
520 static FSMAddress
522 {
523  FSMAddress parent;
524 
525  Assert(child.level < FSM_ROOT_LEVEL);
526 
527  parent.level = child.level + 1;
528  parent.logpageno = child.logpageno / SlotsPerFSMPage;
529  *slot = child.logpageno % SlotsPerFSMPage;
530 
531  return parent;
532 }
533 
534 /*
535  * Given a logical address of a parent page and a slot number, get the
536  * logical address of the corresponding child page.
537  */
538 static FSMAddress
540 {
541  FSMAddress child;
542 
543  Assert(parent.level > FSM_BOTTOM_LEVEL);
544 
545  child.level = parent.level - 1;
546  child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
547 
548  return child;
549 }
550 
551 /*
552  * Read a FSM page.
553  *
554  * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
555  * true, the FSM file is extended.
556  */
557 static Buffer
558 fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
559 {
560  BlockNumber blkno = fsm_logical_to_physical(addr);
561  Buffer buf;
562 
563  RelationOpenSmgr(rel);
564 
565  /*
566  * If we haven't cached the size of the FSM yet, check it first. Also
567  * recheck if the requested block seems to be past end, since our cached
568  * value might be stale. (We send smgr inval messages on truncation, but
569  * not on extension.)
570  */
572  blkno >= rel->rd_smgr->smgr_fsm_nblocks)
573  {
574  if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
576  FSM_FORKNUM);
577  else
578  rel->rd_smgr->smgr_fsm_nblocks = 0;
579  }
580 
581  /* Handle requests beyond EOF */
582  if (blkno >= rel->rd_smgr->smgr_fsm_nblocks)
583  {
584  if (extend)
585  fsm_extend(rel, blkno + 1);
586  else
587  return InvalidBuffer;
588  }
589 
590  /*
591  * Use ZERO_ON_ERROR mode, and initialize the page if necessary. The FSM
592  * information is not accurate anyway, so it's better to clear corrupt
593  * pages than error out. Since the FSM changes are not WAL-logged, the
594  * so-called torn page problem on crash can lead to pages with corrupt
595  * headers, for example.
596  */
597  buf = ReadBufferExtended(rel, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL);
598  if (PageIsNew(BufferGetPage(buf)))
599  PageInit(BufferGetPage(buf), BLCKSZ, 0);
600  return buf;
601 }
602 
603 /*
604  * Ensure that the FSM fork is at least fsm_nblocks long, extending
605  * it if necessary with empty pages. And by empty, I mean pages filled
606  * with zeros, meaning there's no free space.
607  */
608 static void
609 fsm_extend(Relation rel, BlockNumber fsm_nblocks)
610 {
611  BlockNumber fsm_nblocks_now;
612  Page pg;
613 
614  pg = (Page) palloc(BLCKSZ);
615  PageInit(pg, BLCKSZ, 0);
616 
617  /*
618  * We use the relation extension lock to lock out other backends trying to
619  * extend the FSM at the same time. It also locks out extension of the
620  * main fork, unnecessarily, but extending the FSM happens seldom enough
621  * that it doesn't seem worthwhile to have a separate lock tag type for
622  * it.
623  *
624  * Note that another backend might have extended or created the relation
625  * by the time we get the lock.
626  */
628 
629  /* Might have to re-open if a cache flush happened */
630  RelationOpenSmgr(rel);
631 
632  /*
633  * Create the FSM file first if it doesn't exist. If smgr_fsm_nblocks is
634  * positive then it must exist, no need for an smgrexists call.
635  */
636  if ((rel->rd_smgr->smgr_fsm_nblocks == 0 ||
639  smgrcreate(rel->rd_smgr, FSM_FORKNUM, false);
640 
641  fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM);
642 
643  while (fsm_nblocks_now < fsm_nblocks)
644  {
645  PageSetChecksumInplace(pg, fsm_nblocks_now);
646 
647  smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now,
648  (char *) pg, false);
649  fsm_nblocks_now++;
650  }
651 
652  /* Update local cache with the up-to-date size */
653  rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now;
654 
656 
657  pfree(pg);
658 }
659 
660 /*
661  * Set value in given FSM page and slot.
662  *
663  * If minValue > 0, the updated page is also searched for a page with at
664  * least minValue of free space. If one is found, its slot number is
665  * returned, -1 otherwise.
666  */
667 static int
669  uint8 newValue, uint8 minValue)
670 {
671  Buffer buf;
672  Page page;
673  int newslot = -1;
674 
675  buf = fsm_readbuf(rel, addr, true);
677 
678  page = BufferGetPage(buf);
679 
680  if (fsm_set_avail(page, slot, newValue))
681  MarkBufferDirtyHint(buf, false);
682 
683  if (minValue != 0)
684  {
685  /* Search while we still hold the lock */
686  newslot = fsm_search_avail(buf, minValue,
687  addr.level == FSM_BOTTOM_LEVEL,
688  true);
689  }
690 
691  UnlockReleaseBuffer(buf);
692 
693  return newslot;
694 }
695 
696 /*
697  * Search the tree for a heap page with at least min_cat of free space
698  */
699 static BlockNumber
700 fsm_search(Relation rel, uint8 min_cat)
701 {
702  int restarts = 0;
704 
705  for (;;)
706  {
707  int slot;
708  Buffer buf;
709  uint8 max_avail = 0;
710 
711  /* Read the FSM page. */
712  buf = fsm_readbuf(rel, addr, false);
713 
714  /* Search within the page */
715  if (BufferIsValid(buf))
716  {
718  slot = fsm_search_avail(buf, min_cat,
719  (addr.level == FSM_BOTTOM_LEVEL),
720  false);
721  if (slot == -1)
722  max_avail = fsm_get_max_avail(BufferGetPage(buf));
723  UnlockReleaseBuffer(buf);
724  }
725  else
726  slot = -1;
727 
728  if (slot != -1)
729  {
730  /*
731  * Descend the tree, or return the found block if we're at the
732  * bottom.
733  */
734  if (addr.level == FSM_BOTTOM_LEVEL)
735  return fsm_get_heap_blk(addr, slot);
736 
737  addr = fsm_get_child(addr, slot);
738  }
739  else if (addr.level == FSM_ROOT_LEVEL)
740  {
741  /*
742  * At the root, failure means there's no page with enough free
743  * space in the FSM. Give up.
744  */
745  return InvalidBlockNumber;
746  }
747  else
748  {
749  uint16 parentslot;
750  FSMAddress parent;
751 
752  /*
753  * At lower level, failure can happen if the value in the upper-
754  * level node didn't reflect the value on the lower page. Update
755  * the upper node, to avoid falling into the same trap again, and
756  * start over.
757  *
758  * There's a race condition here, if another backend updates this
759  * page right after we release it, and gets the lock on the parent
760  * page before us. We'll then update the parent page with the now
761  * stale information we had. It's OK, because it should happen
762  * rarely, and will be fixed by the next vacuum.
763  */
764  parent = fsm_get_parent(addr, &parentslot);
765  fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
766 
767  /*
768  * If the upper pages are badly out of date, we might need to loop
769  * quite a few times, updating them as we go. Any inconsistencies
770  * should eventually be corrected and the loop should end. Looping
771  * indefinitely is nevertheless scary, so provide an emergency
772  * valve.
773  */
774  if (restarts++ > 10000)
775  return InvalidBlockNumber;
776 
777  /* Start search all over from the root */
778  addr = FSM_ROOT_ADDRESS;
779  }
780  }
781 }
782 
783 
784 /*
785  * Recursive guts of FreeSpaceMapVacuum
786  */
787 static uint8
788 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
789 {
790  Buffer buf;
791  Page page;
792  uint8 max_avail;
793 
794  /* Read the page if it exists, or return EOF */
795  buf = fsm_readbuf(rel, addr, false);
796  if (!BufferIsValid(buf))
797  {
798  *eof_p = true;
799  return 0;
800  }
801  else
802  *eof_p = false;
803 
804  page = BufferGetPage(buf);
805 
806  /*
807  * Recurse into children, and fix the information stored about them at
808  * this level.
809  */
810  if (addr.level > FSM_BOTTOM_LEVEL)
811  {
812  int slot;
813  bool eof = false;
814 
815  for (slot = 0; slot < SlotsPerFSMPage; slot++)
816  {
817  int child_avail;
818 
820 
821  /* After we hit end-of-file, just clear the rest of the slots */
822  if (!eof)
823  child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof);
824  else
825  child_avail = 0;
826 
827  /* Update information about the child */
828  if (fsm_get_avail(page, slot) != child_avail)
829  {
831  fsm_set_avail(BufferGetPage(buf), slot, child_avail);
832  MarkBufferDirtyHint(buf, false);
834  }
835  }
836  }
837 
838  max_avail = fsm_get_max_avail(BufferGetPage(buf));
839 
840  /*
841  * Reset the next slot pointer. This encourages the use of low-numbered
842  * pages, increasing the chances that a later vacuum can truncate the
843  * relation.
844  */
845  ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
846 
847  ReleaseBuffer(buf);
848 
849  return max_avail;
850 }
851 
852 /*
853  * This function will return the last block number stored on given
854  * FSM page address.
855  */
856 static BlockNumber
858 {
859  int slot;
860 
861  /*
862  * Get the last slot number on the given address and convert that to block
863  * number
864  */
865  slot = SlotsPerFSMPage - 1;
866  return fsm_get_heap_blk(addr, slot);
867 }
868 
869 /*
870  * Recursively update the FSM tree from given address to
871  * all the way up to root.
872  */
873 static void
875 {
876  uint16 parentslot;
877  FSMAddress parent;
878 
879  if (addr.level == FSM_ROOT_LEVEL)
880  return;
881 
882  /*
883  * Get the parent page and our slot in the parent page, and update the
884  * information in that.
885  */
886  parent = fsm_get_parent(addr, &parentslot);
887  fsm_set_and_search(rel, parent, parentslot, new_cat, 0);
888  fsm_update_recursive(rel, parent, new_cat);
889 }
#define SlotsPerFSMPage
Definition: fsm_internals.h:61
int logpageno
Definition: freespace.c:86
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
uint8 fsm_get_max_avail(Page page)
Definition: fsmpage.c:138
bool fsm_truncate_avail(Page page, int nslots)
Definition: fsmpage.c:313
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1009
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:376
#define FSM_BOTTOM_LEVEL
Definition: freespace.c:77
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3379
static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot)
Definition: freespace.c:521
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define ExclusiveLock
Definition: lockdefs.h:44
bool fsm_set_avail(Page page, int slot, uint8 value)
Definition: fsmpage.c:63
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
bool InRecovery
Definition: xlog.c:194
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode)
Definition: xlogutils.c:438
unsigned char uint8
Definition: c.h:256
#define InvalidBuffer
Definition: buf.h:25
void smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
Definition: smgr.c:684
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot)
Definition: freespace.c:510
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
BlockNumber smgr_fsm_nblocks
Definition: smgr.h:56
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:287
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define FSM_CAT_STEP
Definition: freespace.c:64
static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot)
Definition: freespace.c:539
#define RelationOpenSmgr(relation)
Definition: rel.h:460
static const FSMAddress FSM_ROOT_ADDRESS
Definition: freespace.c:90
unsigned short uint16
Definition: c.h:257
void pfree(void *pointer)
Definition: mcxt.c:949
#define FSM_ROOT_LEVEL
Definition: freespace.c:76
static Size fsm_space_cat_to_avail(uint8 cat)
Definition: freespace.c:422
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
int level
Definition: freespace.c:85
void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:238
FSMPageData * FSMPage
Definition: fsm_internals.h:45
#define FSM_TREE_DEPTH
Definition: freespace.c:74
static char * buf
Definition: pg_test_fsync.c:67
void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
Definition: freespace.c:299
static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot)
Definition: freespace.c:495
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:379
static uint8 fsm_space_avail_to_cat(Size avail)
Definition: freespace.c:396
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:270
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define PageGetContents(page)
Definition: bufpage.h:242
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
static void fsm_extend(Relation rel, BlockNumber fsm_nblocks)
Definition: freespace.c:609
static uint8 fsm_space_needed_to_cat(Size needed)
Definition: freespace.c:436
static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue)
Definition: freespace.c:668
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:672
#define Assert(condition)
Definition: c.h:664
static BlockNumber fsm_get_lastblckno(Relation rel, FSMAddress addr)
Definition: freespace.c:857
size_t Size
Definition: c.h:350
void UpdateFreeSpaceMap(Relation rel, BlockNumber startBlkNum, BlockNumber endBlkNum, Size freespace)
Definition: freespace.c:201
#define InvalidBlockNumber
Definition: block.h:33
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1199
static BlockNumber fsm_logical_to_physical(FSMAddress addr)
Definition: freespace.c:459
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
uint8 fsm_get_avail(Page page, int slot)
Definition: fsmpage.c:122
#define RelationNeedsWAL(relation)
Definition: rel.h:505
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:600
#define PageIsNew(page)
Definition: bufpage.h:225
void * palloc(Size size)
Definition: mcxt.c:848
static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
Definition: freespace.c:558
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof)
Definition: freespace.c:788
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:132
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:149
#define elog
Definition: elog.h:219
#define MaxFSMRequestSize
Definition: freespace.c:65
int Buffer
Definition: buf.h:23
#define XLogHintBitIsNeeded()
Definition: xlog.h:157
Pointer Page
Definition: bufpage.h:74
static BlockNumber fsm_search(Relation rel, uint8 min_cat)
Definition: freespace.c:700
int fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext, bool exclusive_lock_held)
Definition: fsmpage.c:158
static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat)
Definition: freespace.c:874
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:41