PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
visibilitymap.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * visibilitymap.c
4  * bitmap for tracking visibility of heap tuples
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/visibilitymap.c
12  *
13  * INTERFACE ROUTINES
14  * visibilitymap_clear - clear bits for one page in the visibility map
15  * visibilitymap_pin - pin a map page for setting a bit
16  * visibilitymap_pin_ok - check whether correct map page is already pinned
17  * visibilitymap_set - set a bit in a previously pinned page
18  * visibilitymap_get_status - get status of bits
19  * visibilitymap_count - count number of bits set in visibility map
20  * visibilitymap_truncate - truncate the visibility map
21  *
22  * NOTES
23  *
24  * The visibility map is a bitmap with two bits (all-visible and all-frozen)
25  * per heap page. A set all-visible bit means that all tuples on the page are
26  * known visible to all transactions, and therefore the page doesn't need to
27  * be vacuumed. A set all-frozen bit means that all tuples on the page are
28  * completely frozen, and therefore the page doesn't need to be vacuumed even
29  * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
30  * The all-frozen bit must be set only when the page is already all-visible.
31  *
32  * The map is conservative in the sense that we make sure that whenever a bit
33  * is set, we know the condition is true, but if a bit is not set, it might or
34  * might not be true.
35  *
36  * Clearing visibility map bits is not separately WAL-logged. The callers
37  * must make sure that whenever a bit is cleared, the bit is cleared on WAL
38  * replay of the updating operation as well.
39  *
40  * When we *set* a visibility map during VACUUM, we must write WAL. This may
41  * seem counterintuitive, since the bit is basically a hint: if it is clear,
42  * it may still be the case that every tuple on the page is visible to all
43  * transactions; we just don't know that for certain. The difficulty is that
44  * there are two bits which are typically set together: the PD_ALL_VISIBLE bit
45  * on the page itself, and the visibility map bit. If a crash occurs after the
46  * visibility map page makes it to disk and before the updated heap page makes
47  * it to disk, redo must set the bit on the heap page. Otherwise, the next
48  * insert, update, or delete on the heap page will fail to realize that the
49  * visibility map bit must be cleared, possibly causing index-only scans to
50  * return wrong answers.
51  *
52  * VACUUM will normally skip pages for which the visibility map bit is set;
53  * such pages can't contain any dead tuples and therefore don't need vacuuming.
54  *
55  * LOCKING
56  *
57  * In heapam.c, whenever a page is modified so that not all tuples on the
58  * page are visible to everyone anymore, the corresponding bit in the
59  * visibility map is cleared. In order to be crash-safe, we need to do this
60  * while still holding a lock on the heap page and in the same critical
61  * section that logs the page modification. However, we don't want to hold
62  * the buffer lock over any I/O that may be required to read in the visibility
63  * map page. To avoid this, we examine the heap page before locking it;
64  * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
65  * bit. Then, we lock the buffer. But this creates a race condition: there
66  * is a possibility that in the time it takes to lock the buffer, the
67  * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the
68  * buffer, pin the visibility map page, and relock the buffer. This shouldn't
69  * happen often, because only VACUUM currently sets visibility map bits,
70  * and the race will only occur if VACUUM processes a given page at almost
71  * exactly the same time that someone tries to further modify it.
72  *
73  * To set a bit, you need to hold a lock on the heap page. That prevents
74  * the race condition where VACUUM sees that all tuples on the page are
75  * visible to everyone, but another backend modifies the page before VACUUM
76  * sets the bit in the visibility map.
77  *
78  * When a bit is set, the LSN of the visibility map page is updated to make
79  * sure that the visibility map update doesn't get written to disk before the
80  * WAL record of the changes that made it possible to set the bit is flushed.
81  * But when a bit is cleared, we don't have to do that because it's always
82  * safe to clear a bit in the map from correctness point of view.
83  *
84  *-------------------------------------------------------------------------
85  */
86 #include "postgres.h"
87 
88 #include "access/heapam_xlog.h"
89 #include "access/visibilitymap.h"
90 #include "access/xlog.h"
91 #include "miscadmin.h"
92 #include "storage/bufmgr.h"
93 #include "storage/lmgr.h"
94 #include "storage/smgr.h"
95 #include "utils/inval.h"
96 
97 
98 /*#define TRACE_VISIBILITYMAP */
99 
100 /*
101  * Size of the bitmap on each visibility map page, in bytes. There's no
102  * extra headers, so the whole page minus the standard page header is
103  * used for the bitmap.
104  */
105 #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
106 
107 /* Number of heap blocks we can represent in one byte */
108 #define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
109 
110 /* Number of heap blocks we can represent in one visibility map page. */
111 #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
112 
113 /* Mapping from heap block number to the right bit in the visibility map */
114 #define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
115 #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
116 #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
117 
118 /* tables for fast counting of set bits for visible and frozen */
119 static const uint8 number_of_ones_for_visible[256] = {
120  0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
121  1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
122  0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
123  1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
124  1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
125  2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
126  1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
127  2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
128  0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
129  1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
130  0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
131  1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
132  1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
133  2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
134  1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
135  2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
136 };
137 static const uint8 number_of_ones_for_frozen[256] = {
138  0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
139  0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
140  1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
141  1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
142  0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
143  0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
144  1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
145  1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
146  1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
147  1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
148  2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
149  2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
150  1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
151  1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
152  2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
153  2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
154 };
155 
156 /* prototypes for internal routines */
157 static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
158 static void vm_extend(Relation rel, BlockNumber nvmblocks);
159 
160 
161 /*
162  * visibilitymap_clear - clear specified bits for one page in visibility map
163  *
164  * You must pass a buffer containing the correct map page to this function.
165  * Call visibilitymap_pin first to pin the right one. This function doesn't do
166  * any I/O. Returns true if any bits have been cleared and false otherwise.
167  */
168 bool
170 {
171  BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
172  int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
173  int mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
174  uint8 mask = flags << mapOffset;
175  char *map;
176  bool cleared = false;
177 
179 
180 #ifdef TRACE_VISIBILITYMAP
181  elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
182 #endif
183 
184  if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock)
185  elog(ERROR, "wrong buffer passed to visibilitymap_clear");
186 
188  map = PageGetContents(BufferGetPage(buf));
189 
190  if (map[mapByte] & mask)
191  {
192  map[mapByte] &= ~mask;
193 
194  MarkBufferDirty(buf);
195  cleared = true;
196  }
197 
199 
200  return cleared;
201 }
202 
203 /*
204  * visibilitymap_pin - pin a map page for setting a bit
205  *
206  * Setting a bit in the visibility map is a two-phase operation. First, call
207  * visibilitymap_pin, to pin the visibility map page containing the bit for
208  * the heap page. Because that can require I/O to read the map page, you
209  * shouldn't hold a lock on the heap page while doing that. Then, call
210  * visibilitymap_set to actually set the bit.
211  *
212  * On entry, *buf should be InvalidBuffer or a valid buffer returned by
213  * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
214  * relation. On return, *buf is a valid buffer with the map page containing
215  * the bit for heapBlk.
216  *
217  * If the page doesn't exist in the map file yet, it is extended.
218  */
219 void
221 {
222  BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
223 
224  /* Reuse the old pinned buffer if possible */
225  if (BufferIsValid(*buf))
226  {
227  if (BufferGetBlockNumber(*buf) == mapBlock)
228  return;
229 
230  ReleaseBuffer(*buf);
231  }
232  *buf = vm_readbuf(rel, mapBlock, true);
233 }
234 
235 /*
236  * visibilitymap_pin_ok - do we already have the correct page pinned?
237  *
238  * On entry, buf should be InvalidBuffer or a valid buffer returned by
239  * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
240  * relation. The return value indicates whether the buffer covers the
241  * given heapBlk.
242  */
243 bool
245 {
246  BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
247 
248  return BufferIsValid(buf) && BufferGetBlockNumber(buf) == mapBlock;
249 }
250 
251 /*
252  * visibilitymap_set - set bit(s) on a previously pinned page
253  *
254  * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
255  * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
256  * one provided; in normal running, we generate a new XLOG record and set the
257  * page LSN to that value. cutoff_xid is the largest xmin on the page being
258  * marked all-visible; it is needed for Hot Standby, and can be
259  * InvalidTransactionId if the page contains no tuples. It can also be set
260  * to InvalidTransactionId when a page that is already all-visible is being
261  * marked all-frozen.
262  *
263  * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
264  * this function. Except in recovery, caller should also pass the heap
265  * buffer. When checksums are enabled and we're not in recovery, we must add
266  * the heap buffer to the WAL chain to protect it from being torn.
267  *
268  * You must pass a buffer containing the correct map page to this function.
269  * Call visibilitymap_pin first to pin the right one. This function doesn't do
270  * any I/O.
271  */
272 void
274  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
275  uint8 flags)
276 {
277  BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
278  uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
279  uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
280  Page page;
281  uint8 *map;
282 
283 #ifdef TRACE_VISIBILITYMAP
284  elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
285 #endif
286 
288  Assert(InRecovery || BufferIsValid(heapBuf));
290 
291  /* Check that we have the right heap page pinned, if present */
292  if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
293  elog(ERROR, "wrong heap buffer passed to visibilitymap_set");
294 
295  /* Check that we have the right VM page pinned */
296  if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
297  elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
298 
299  page = BufferGetPage(vmBuf);
300  map = (uint8 *) PageGetContents(page);
302 
303  if (flags != (map[mapByte] >> mapOffset & VISIBILITYMAP_VALID_BITS))
304  {
306 
307  map[mapByte] |= (flags << mapOffset);
308  MarkBufferDirty(vmBuf);
309 
310  if (RelationNeedsWAL(rel))
311  {
312  if (XLogRecPtrIsInvalid(recptr))
313  {
314  Assert(!InRecovery);
315  recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
316  cutoff_xid, flags);
317 
318  /*
319  * If data checksums are enabled (or wal_log_hints=on), we
320  * need to protect the heap page from being torn.
321  */
322  if (XLogHintBitIsNeeded())
323  {
324  Page heapPage = BufferGetPage(heapBuf);
325 
326  /* caller is expected to set PD_ALL_VISIBLE first */
327  Assert(PageIsAllVisible(heapPage));
328  PageSetLSN(heapPage, recptr);
329  }
330  }
331  PageSetLSN(page, recptr);
332  }
333 
335  }
336 
338 }
339 
340 /*
341  * visibilitymap_get_status - get status of bits
342  *
343  * Are all tuples on heapBlk visible to all or are marked frozen, according
344  * to the visibility map?
345  *
346  * On entry, *buf should be InvalidBuffer or a valid buffer returned by an
347  * earlier call to visibilitymap_pin or visibilitymap_get_status on the same
348  * relation. On return, *buf is a valid buffer with the map page containing
349  * the bit for heapBlk, or InvalidBuffer. The caller is responsible for
350  * releasing *buf after it's done testing and setting bits.
351  *
352  * NOTE: This function is typically called without a lock on the heap page,
353  * so somebody else could change the bit just after we look at it. In fact,
354  * since we don't lock the visibility map page either, it's even possible that
355  * someone else could have changed the bit just before we look at it, but yet
356  * we might see the old value. It is the caller's responsibility to deal with
357  * all concurrency issues!
358  */
359 uint8
361 {
362  BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
363  uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
364  uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
365  char *map;
366  uint8 result;
367 
368 #ifdef TRACE_VISIBILITYMAP
369  elog(DEBUG1, "vm_get_status %s %d", RelationGetRelationName(rel), heapBlk);
370 #endif
371 
372  /* Reuse the old pinned buffer if possible */
373  if (BufferIsValid(*buf))
374  {
375  if (BufferGetBlockNumber(*buf) != mapBlock)
376  {
377  ReleaseBuffer(*buf);
378  *buf = InvalidBuffer;
379  }
380  }
381 
382  if (!BufferIsValid(*buf))
383  {
384  *buf = vm_readbuf(rel, mapBlock, false);
385  if (!BufferIsValid(*buf))
386  return false;
387  }
388 
389  map = PageGetContents(BufferGetPage(*buf));
390 
391  /*
392  * A single byte read is atomic. There could be memory-ordering effects
393  * here, but for performance reasons we make it the caller's job to worry
394  * about that.
395  */
396  result = ((map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS);
397  return result;
398 }
399 
400 /*
401  * visibilitymap_count - count number of bits set in visibility map
402  *
403  * Note: we ignore the possibility of race conditions when the table is being
404  * extended concurrently with the call. New pages added to the table aren't
405  * going to be marked all-visible or all-frozen, so they won't affect the result.
406  */
407 void
408 visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
409 {
410  BlockNumber mapBlock;
411 
412  /* all_visible must be specified */
413  Assert(all_visible);
414 
415  *all_visible = 0;
416  if (all_frozen)
417  *all_frozen = 0;
418 
419  for (mapBlock = 0;; mapBlock++)
420  {
421  Buffer mapBuffer;
422  unsigned char *map;
423  int i;
424 
425  /*
426  * Read till we fall off the end of the map. We assume that any extra
427  * bytes in the last page are zeroed, so we don't bother excluding
428  * them from the count.
429  */
430  mapBuffer = vm_readbuf(rel, mapBlock, false);
431  if (!BufferIsValid(mapBuffer))
432  break;
433 
434  /*
435  * We choose not to lock the page, since the result is going to be
436  * immediately stale anyway if anyone is concurrently setting or
437  * clearing bits, and we only really need an approximate value.
438  */
439  map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
440 
441  for (i = 0; i < MAPSIZE; i++)
442  {
443  *all_visible += number_of_ones_for_visible[map[i]];
444  if (all_frozen)
445  *all_frozen += number_of_ones_for_frozen[map[i]];
446  }
447 
448  ReleaseBuffer(mapBuffer);
449  }
450 }
451 
452 /*
453  * visibilitymap_truncate - truncate the visibility map
454  *
455  * The caller must hold AccessExclusiveLock on the relation, to ensure that
456  * other backends receive the smgr invalidation event that this function sends
457  * before they access the VM again.
458  *
459  * nheapblocks is the new size of the heap.
460  */
461 void
463 {
464  BlockNumber newnblocks;
465 
466  /* last remaining block, byte, and bit */
467  BlockNumber truncBlock = HEAPBLK_TO_MAPBLOCK(nheapblocks);
468  uint32 truncByte = HEAPBLK_TO_MAPBYTE(nheapblocks);
469  uint8 truncOffset = HEAPBLK_TO_OFFSET(nheapblocks);
470 
471 #ifdef TRACE_VISIBILITYMAP
472  elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks);
473 #endif
474 
475  RelationOpenSmgr(rel);
476 
477  /*
478  * If no visibility map has been created yet for this relation, there's
479  * nothing to truncate.
480  */
482  return;
483 
484  /*
485  * Unless the new size is exactly at a visibility map page boundary, the
486  * tail bits in the last remaining map page, representing truncated heap
487  * blocks, need to be cleared. This is not only tidy, but also necessary
488  * because we don't get a chance to clear the bits if the heap is extended
489  * again.
490  */
491  if (truncByte != 0 || truncOffset != 0)
492  {
493  Buffer mapBuffer;
494  Page page;
495  char *map;
496 
497  newnblocks = truncBlock + 1;
498 
499  mapBuffer = vm_readbuf(rel, truncBlock, false);
500  if (!BufferIsValid(mapBuffer))
501  {
502  /* nothing to do, the file was already smaller */
503  return;
504  }
505 
506  page = BufferGetPage(mapBuffer);
507  map = PageGetContents(page);
508 
509  LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);
510 
511  /* NO EREPORT(ERROR) from here till changes are logged */
513 
514  /* Clear out the unwanted bytes. */
515  MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1));
516 
517  /*----
518  * Mask out the unwanted bits of the last remaining byte.
519  *
520  * ((1 << 0) - 1) = 00000000
521  * ((1 << 1) - 1) = 00000001
522  * ...
523  * ((1 << 6) - 1) = 00111111
524  * ((1 << 7) - 1) = 01111111
525  *----
526  */
527  map[truncByte] &= (1 << truncOffset) - 1;
528 
529  /*
530  * Truncation of a relation is WAL-logged at a higher-level, and we
531  * will be called at WAL replay. But if checksums are enabled, we need
532  * to still write a WAL record to protect against a torn page, if the
533  * page is flushed to disk before the truncation WAL record. We cannot
534  * use MarkBufferDirtyHint here, because that will not dirty the page
535  * during recovery.
536  */
537  MarkBufferDirty(mapBuffer);
539  log_newpage_buffer(mapBuffer, false);
540 
542 
543  UnlockReleaseBuffer(mapBuffer);
544  }
545  else
546  newnblocks = truncBlock;
547 
548  if (smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM) <= newnblocks)
549  {
550  /* nothing to do, the file was already smaller than requested size */
551  return;
552  }
553 
554  /* Truncate the unused VM pages, and send smgr inval message */
555  smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks);
556 
557  /*
558  * We might as well update the local smgr_vm_nblocks setting. smgrtruncate
559  * sent an smgr cache inval message, which will cause other backends to
560  * invalidate their copy of smgr_vm_nblocks, and this one too at the next
561  * command boundary. But this ensures it isn't outright wrong until then.
562  */
563  if (rel->rd_smgr)
564  rel->rd_smgr->smgr_vm_nblocks = newnblocks;
565 }
566 
567 /*
568  * Read a visibility map page.
569  *
570  * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
571  * true, the visibility map file is extended.
572  */
573 static Buffer
574 vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
575 {
576  Buffer buf;
577 
578  /*
579  * We might not have opened the relation at the smgr level yet, or we
580  * might have been forced to close it by a sinval message. The code below
581  * won't necessarily notice relation extension immediately when extend =
582  * false, so we rely on sinval messages to ensure that our ideas about the
583  * size of the map aren't too far out of date.
584  */
585  RelationOpenSmgr(rel);
586 
587  /*
588  * If we haven't cached the size of the visibility map fork yet, check it
589  * first.
590  */
592  {
596  else
597  rel->rd_smgr->smgr_vm_nblocks = 0;
598  }
599 
600  /* Handle requests beyond EOF */
601  if (blkno >= rel->rd_smgr->smgr_vm_nblocks)
602  {
603  if (extend)
604  vm_extend(rel, blkno + 1);
605  else
606  return InvalidBuffer;
607  }
608 
609  /*
610  * Use ZERO_ON_ERROR mode, and initialize the page if necessary. It's
611  * always safe to clear bits, so it's better to clear corrupt pages than
612  * error out.
613  */
614  buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, blkno,
616  if (PageIsNew(BufferGetPage(buf)))
617  PageInit(BufferGetPage(buf), BLCKSZ, 0);
618  return buf;
619 }
620 
621 /*
622  * Ensure that the visibility map fork is at least vm_nblocks long, extending
623  * it if necessary with zeroed pages.
624  */
625 static void
627 {
628  BlockNumber vm_nblocks_now;
629  Page pg;
630 
631  pg = (Page) palloc(BLCKSZ);
632  PageInit(pg, BLCKSZ, 0);
633 
634  /*
635  * We use the relation extension lock to lock out other backends trying to
636  * extend the visibility map at the same time. It also locks out extension
637  * of the main fork, unnecessarily, but extending the visibility map
638  * happens seldom enough that it doesn't seem worthwhile to have a
639  * separate lock tag type for it.
640  *
641  * Note that another backend might have extended or created the relation
642  * by the time we get the lock.
643  */
645 
646  /* Might have to re-open if a cache flush happened */
647  RelationOpenSmgr(rel);
648 
649  /*
650  * Create the file first if it doesn't exist. If smgr_vm_nblocks is
651  * positive then it must exist, no need for an smgrexists call.
652  */
653  if ((rel->rd_smgr->smgr_vm_nblocks == 0 ||
657 
658  vm_nblocks_now = smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
659 
660  /* Now extend the file */
661  while (vm_nblocks_now < vm_nblocks)
662  {
663  PageSetChecksumInplace(pg, vm_nblocks_now);
664 
665  smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
666  (char *) pg, false);
667  vm_nblocks_now++;
668  }
669 
670  /*
671  * Send a shared-inval message to force other backends to close any smgr
672  * references they may have for this rel, which we are about to change.
673  * This is a useful optimization because it means that backends don't have
674  * to keep checking for creation or extension of the file, which happens
675  * infrequently.
676  */
678 
679  /* Update local cache with the up-to-date size */
680  rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
681 
683 
684  pfree(pg);
685 }
static const uint8 number_of_ones_for_frozen[256]
void CacheInvalidateSmgr(RelFileNodeBackend rnode)
Definition: inval.c:1324
static void vm_extend(Relation rel, BlockNumber nvmblocks)
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
BlockNumber smgr_vm_nblocks
Definition: smgr.h:57
#define DEBUG1
Definition: elog.h:25
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1010
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:376
#define PageIsAllVisible(page)
Definition: bufpage.h:382
uint32 TransactionId
Definition: c.h:394
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1445
#define ExclusiveLock
Definition: lockdefs.h:44
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
bool InRecovery
Definition: xlog.c:191
#define END_CRIT_SECTION()
Definition: miscadmin.h:132
unsigned char uint8
Definition: c.h:263
#define HEAPBLK_TO_MAPBYTE(x)
#define InvalidBuffer
Definition: buf.h:25
void smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
Definition: smgr.c:684
#define START_CRIT_SECTION()
Definition: miscadmin.h:130
#define MemSet(start, val, len)
Definition: c.h:853
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3292
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:287
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define VISIBILITYMAP_VALID_BITS
Definition: visibilitymap.h:28
#define RelationOpenSmgr(relation)
Definition: rel.h:457
void pfree(void *pointer)
Definition: mcxt.c:992
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3315
#define ERROR
Definition: elog.h:43
XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, TransactionId cutoff_xid, uint8 vmflags)
Definition: heapam.c:7478
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
static char * buf
Definition: pg_test_fsync.c:65
#define RelationGetRelationName(relation)
Definition: rel.h:433
unsigned int uint32
Definition: c.h:265
#define MAPSIZE
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define PageGetContents(page)
Definition: bufpage.h:243
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3529
RelFileNode rd_node
Definition: rel.h:85
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:672
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:671
void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
#define HEAPBLK_TO_OFFSET(x)
#define InvalidBlockNumber
Definition: block.h:33
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1172
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define RelationNeedsWAL(relation)
Definition: rel.h:502
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:600
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2588
#define PageIsNew(page)
Definition: bufpage.h:226
void * palloc(Size size)
Definition: mcxt.c:891
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
int i
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
#define elog
Definition: elog.h:219
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
static const uint8 number_of_ones_for_visible[256]
#define PageSetLSN(page, lsn)
Definition: bufpage.h:365
int Buffer
Definition: buf.h:23
#define XLogHintBitIsNeeded()
Definition: xlog.h:156
Pointer Page
Definition: bufpage.h:74
#define HEAPBLK_TO_MAPBLOCK(x)
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:41