PostgreSQL Source Code  git master
fsmpage.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * fsmpage.c
4  * routines to search and manipulate one FSM page.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/storage/freespace/fsmpage.c
12  *
13  * NOTES:
14  *
15  * The public functions in this file form an API that hides the internal
16  * structure of a FSM page. This allows freespace.c to treat each FSM page
17  * as a black box with SlotsPerPage "slots". fsm_set_avail() and
18  * fsm_get_avail() let you get/set the value of a slot, and
19  * fsm_search_avail() lets you search for a slot with value >= X.
20  *
21  *-------------------------------------------------------------------------
22  */
23 #include "postgres.h"
24 
25 #include "storage/bufmgr.h"
26 #include "storage/fsm_internals.h"
27 
28 /* Macros to navigate the tree within a page. Root has index zero. */
29 #define leftchild(x) (2 * (x) + 1)
30 #define rightchild(x) (2 * (x) + 2)
31 #define parentof(x) (((x) - 1) / 2)
32 
33 /*
34  * Find right neighbor of x, wrapping around within the level
35  */
36 static int
38 {
39  /*
40  * Move right. This might wrap around, stepping to the leftmost node at
41  * the next level.
42  */
43  x++;
44 
45  /*
46  * Check if we stepped to the leftmost node at next level, and correct if
47  * so. The leftmost nodes at each level are numbered x = 2^level - 1, so
48  * check if (x + 1) is a power of two, using a standard
49  * twos-complement-arithmetic trick.
50  */
51  if (((x + 1) & x) == 0)
52  x = parentof(x);
53 
54  return x;
55 }
56 
57 /*
58  * Sets the value of a slot on page. Returns true if the page was modified.
59  *
60  * The caller must hold an exclusive lock on the page.
61  */
62 bool
63 fsm_set_avail(Page page, int slot, uint8 value)
64 {
65  int nodeno = NonLeafNodesPerPage + slot;
66  FSMPage fsmpage = (FSMPage) PageGetContents(page);
67  uint8 oldvalue;
68 
69  Assert(slot < LeafNodesPerPage);
70 
71  oldvalue = fsmpage->fp_nodes[nodeno];
72 
73  /* If the value hasn't changed, we don't need to do anything */
74  if (oldvalue == value && value <= fsmpage->fp_nodes[0])
75  return false;
76 
77  fsmpage->fp_nodes[nodeno] = value;
78 
79  /*
80  * Propagate up, until we hit the root or a node that doesn't need to be
81  * updated.
82  */
83  do
84  {
85  uint8 newvalue = 0;
86  int lchild;
87  int rchild;
88 
89  nodeno = parentof(nodeno);
90  lchild = leftchild(nodeno);
91  rchild = lchild + 1;
92 
93  newvalue = fsmpage->fp_nodes[lchild];
94  if (rchild < NodesPerPage)
95  newvalue = Max(newvalue,
96  fsmpage->fp_nodes[rchild]);
97 
98  oldvalue = fsmpage->fp_nodes[nodeno];
99  if (oldvalue == newvalue)
100  break;
101 
102  fsmpage->fp_nodes[nodeno] = newvalue;
103  } while (nodeno > 0);
104 
105  /*
106  * sanity check: if the new value is (still) higher than the value at the
107  * top, the tree is corrupt. If so, rebuild.
108  */
109  if (value > fsmpage->fp_nodes[0])
110  fsm_rebuild_page(page);
111 
112  return true;
113 }
114 
115 /*
116  * Returns the value of given slot on page.
117  *
118  * Since this is just a read-only access of a single byte, the page doesn't
119  * need to be locked.
120  */
121 uint8
122 fsm_get_avail(Page page, int slot)
123 {
124  FSMPage fsmpage = (FSMPage) PageGetContents(page);
125 
126  Assert(slot < LeafNodesPerPage);
127 
128  return fsmpage->fp_nodes[NonLeafNodesPerPage + slot];
129 }
130 
131 /*
132  * Returns the value at the root of a page.
133  *
134  * Since this is just a read-only access of a single byte, the page doesn't
135  * need to be locked.
136  */
137 uint8
139 {
140  FSMPage fsmpage = (FSMPage) PageGetContents(page);
141 
142  return fsmpage->fp_nodes[0];
143 }
144 
145 /*
146  * Searches for a slot with category at least minvalue.
147  * Returns slot number, or -1 if none found.
148  *
149  * The caller must hold at least a shared lock on the page, and this
150  * function can unlock and lock the page again in exclusive mode if it
151  * needs to be updated. exclusive_lock_held should be set to true if the
152  * caller is already holding an exclusive lock, to avoid extra work.
153  *
154  * If advancenext is false, fp_next_slot is set to point to the returned
155  * slot, and if it's true, to the slot after the returned slot.
156  */
157 int
158 fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
159  bool exclusive_lock_held)
160 {
161  Page page = BufferGetPage(buf);
162  FSMPage fsmpage = (FSMPage) PageGetContents(page);
163  int nodeno;
164  int target;
165  uint16 slot;
166 
167 restart:
168 
169  /*
170  * Check the root first, and exit quickly if there's no leaf with enough
171  * free space
172  */
173  if (fsmpage->fp_nodes[0] < minvalue)
174  return -1;
175 
176  /*
177  * Start search using fp_next_slot. It's just a hint, so check that it's
178  * sane. (This also handles wrapping around when the prior call returned
179  * the last slot on the page.)
180  */
181  target = fsmpage->fp_next_slot;
182  if (target < 0 || target >= LeafNodesPerPage)
183  target = 0;
184  target += NonLeafNodesPerPage;
185 
186  /*----------
187  * Start the search from the target slot. At every step, move one
188  * node to the right, then climb up to the parent. Stop when we reach
189  * a node with enough free space (as we must, since the root has enough
190  * space).
191  *
192  * The idea is to gradually expand our "search triangle", that is, all
193  * nodes covered by the current node, and to be sure we search to the
194  * right from the start point. At the first step, only the target slot
195  * is examined. When we move up from a left child to its parent, we are
196  * adding the right-hand subtree of that parent to the search triangle.
197  * When we move right then up from a right child, we are dropping the
198  * current search triangle (which we know doesn't contain any suitable
199  * page) and instead looking at the next-larger-size triangle to its
200  * right. So we never look left from our original start point, and at
201  * each step the size of the search triangle doubles, ensuring it takes
202  * only log2(N) work to search N pages.
203  *
204  * The "move right" operation will wrap around if it hits the right edge
205  * of the tree, so the behavior is still good if we start near the right.
206  * Note also that the move-and-climb behavior ensures that we can't end
207  * up on one of the missing nodes at the right of the leaf level.
208  *
209  * For example, consider this tree:
210  *
211  * 7
212  * 7 6
213  * 5 7 6 5
214  * 4 5 5 7 2 6 5 2
215  * T
216  *
217  * Assume that the target node is the node indicated by the letter T,
218  * and we're searching for a node with value of 6 or higher. The search
219  * begins at T. At the first iteration, we move to the right, then to the
220  * parent, arriving at the rightmost 5. At the second iteration, we move
221  * to the right, wrapping around, then climb up, arriving at the 7 on the
222  * third level. 7 satisfies our search, so we descend down to the bottom,
223  * following the path of sevens. This is in fact the first suitable page
224  * to the right of (allowing for wraparound) our start point.
225  *----------
226  */
227  nodeno = target;
228  while (nodeno > 0)
229  {
230  if (fsmpage->fp_nodes[nodeno] >= minvalue)
231  break;
232 
233  /*
234  * Move to the right, wrapping around on same level if necessary, then
235  * climb up.
236  */
237  nodeno = parentof(rightneighbor(nodeno));
238  }
239 
240  /*
241  * We're now at a node with enough free space, somewhere in the middle of
242  * the tree. Descend to the bottom, following a path with enough free
243  * space, preferring to move left if there's a choice.
244  */
245  while (nodeno < NonLeafNodesPerPage)
246  {
247  int childnodeno = leftchild(nodeno);
248 
249  if (childnodeno < NodesPerPage &&
250  fsmpage->fp_nodes[childnodeno] >= minvalue)
251  {
252  nodeno = childnodeno;
253  continue;
254  }
255  childnodeno++; /* point to right child */
256  if (childnodeno < NodesPerPage &&
257  fsmpage->fp_nodes[childnodeno] >= minvalue)
258  {
259  nodeno = childnodeno;
260  }
261  else
262  {
263  /*
264  * Oops. The parent node promised that either left or right child
265  * has enough space, but neither actually did. This can happen in
266  * case of a "torn page", IOW if we crashed earlier while writing
267  * the page to disk, and only part of the page made it to disk.
268  *
269  * Fix the corruption and restart.
270  */
271  RelFileLocator rlocator;
272  ForkNumber forknum;
273  BlockNumber blknum;
274 
275  BufferGetTag(buf, &rlocator, &forknum, &blknum);
276  elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
277  blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber);
278 
279  /* make sure we hold an exclusive lock */
280  if (!exclusive_lock_held)
281  {
284  exclusive_lock_held = true;
285  }
286  fsm_rebuild_page(page);
287  MarkBufferDirtyHint(buf, false);
288  goto restart;
289  }
290  }
291 
292  /* We're now at the bottom level, at a node with enough space. */
293  slot = nodeno - NonLeafNodesPerPage;
294 
295  /*
296  * Update the next-target pointer. Note that we do this even if we're only
297  * holding a shared lock, on the grounds that it's better to use a shared
298  * lock and get a garbled next pointer every now and then, than take the
299  * concurrency hit of an exclusive lock.
300  *
301  * Wrap-around is handled at the beginning of this function.
302  */
303  fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
304 
305  return slot;
306 }
307 
308 /*
309  * Sets the available space to zero for all slots numbered >= nslots.
310  * Returns true if the page was modified.
311  */
312 bool
313 fsm_truncate_avail(Page page, int nslots)
314 {
315  FSMPage fsmpage = (FSMPage) PageGetContents(page);
316  uint8 *ptr;
317  bool changed = false;
318 
319  Assert(nslots >= 0 && nslots < LeafNodesPerPage);
320 
321  /* Clear all truncated leaf nodes */
322  ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots];
323  for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++)
324  {
325  if (*ptr != 0)
326  changed = true;
327  *ptr = 0;
328  }
329 
330  /* Fix upper nodes. */
331  if (changed)
332  fsm_rebuild_page(page);
333 
334  return changed;
335 }
336 
337 /*
338  * Reconstructs the upper levels of a page. Returns true if the page
339  * was modified.
340  */
341 bool
343 {
344  FSMPage fsmpage = (FSMPage) PageGetContents(page);
345  bool changed = false;
346  int nodeno;
347 
348  /*
349  * Start from the lowest non-leaf level, at last node, working our way
350  * backwards, through all non-leaf nodes at all levels, up to the root.
351  */
352  for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--)
353  {
354  int lchild = leftchild(nodeno);
355  int rchild = lchild + 1;
356  uint8 newvalue = 0;
357 
358  /* The first few nodes we examine might have zero or one child. */
359  if (lchild < NodesPerPage)
360  newvalue = fsmpage->fp_nodes[lchild];
361 
362  if (rchild < NodesPerPage)
363  newvalue = Max(newvalue,
364  fsmpage->fp_nodes[rchild]);
365 
366  if (fsmpage->fp_nodes[nodeno] != newvalue)
367  {
368  fsmpage->fp_nodes[nodeno] = newvalue;
369  changed = true;
370  }
371  }
372 
373  return changed;
374 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:3745
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4988
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
static char * PageGetContents(Page page)
Definition: bufpage.h:257
Pointer Page
Definition: bufpage.h:81
uint8_t uint8
Definition: c.h:483
#define Max(x, y)
Definition: c.h:952
#define Assert(condition)
Definition: c.h:812
uint16_t uint16
Definition: c.h:484
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:225
#define NodesPerPage
Definition: fsm_internals.h:51
FSMPageData * FSMPage
Definition: fsm_internals.h:45
#define NonLeafNodesPerPage
Definition: fsm_internals.h:54
#define LeafNodesPerPage
Definition: fsm_internals.h:55
static int rightneighbor(int x)
Definition: fsmpage.c:37
uint8 fsm_get_avail(Page page, int slot)
Definition: fsmpage.c:122
bool fsm_rebuild_page(Page page)
Definition: fsmpage.c:342
uint8 fsm_get_max_avail(Page page)
Definition: fsmpage.c:138
bool fsm_set_avail(Page page, int slot, uint8 value)
Definition: fsmpage.c:63
bool fsm_truncate_avail(Page page, int nslots)
Definition: fsmpage.c:313
int fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext, bool exclusive_lock_held)
Definition: fsmpage.c:158
#define parentof(x)
Definition: fsmpage.c:31
#define leftchild(x)
Definition: fsmpage.c:29
static struct @160 value
int x
Definition: isn.c:70
static char * buf
Definition: pg_test_fsync.c:72
ForkNumber
Definition: relpath.h:56
uint8 fp_nodes[FLEXIBLE_ARRAY_MEMBER]
Definition: fsm_internals.h:42
RelFileNumber relNumber