PostgreSQL Source Code  git master
pg_visibility.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_visibility.c
4  * display visibility map information and page-level visibility bits
5  *
6  * Copyright (c) 2016-2024, PostgreSQL Global Development Group
7  *
8  * contrib/pg_visibility/pg_visibility.c
9  *-------------------------------------------------------------------------
10  */
11 #include "postgres.h"
12 
13 #include "access/heapam.h"
14 #include "access/htup_details.h"
15 #include "access/visibilitymap.h"
16 #include "access/xloginsert.h"
17 #include "catalog/pg_type.h"
18 #include "catalog/storage_xlog.h"
19 #include "funcapi.h"
20 #include "miscadmin.h"
21 #include "storage/bufmgr.h"
22 #include "storage/proc.h"
23 #include "storage/procarray.h"
24 #include "storage/read_stream.h"
25 #include "storage/smgr.h"
26 #include "utils/rel.h"
27 #include "utils/snapmgr.h"
28 
30 
31 typedef struct vbits
32 {
37 
38 typedef struct corrupt_items
39 {
44 
45 /* for collect_corrupt_items_read_stream_next_block */
47 {
48  bool all_frozen;
54 };
55 
64 
65 static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
66 static vbits *collect_visibility_data(Oid relid, bool include_pd);
67 static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
68  bool all_frozen);
70 static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
71  Buffer buffer);
72 static void check_relation_relkind(Relation rel);
73 
74 /*
75  * Visibility map information for a single block of a relation.
76  *
77  * Note: the VM code will silently return zeroes for pages past the end
78  * of the map, so we allow probes up to MaxBlockNumber regardless of the
79  * actual relation size.
80  */
81 Datum
83 {
84  Oid relid = PG_GETARG_OID(0);
85  int64 blkno = PG_GETARG_INT64(1);
86  int32 mapbits;
87  Relation rel;
88  Buffer vmbuffer = InvalidBuffer;
89  TupleDesc tupdesc;
90  Datum values[2];
91  bool nulls[2] = {0};
92 
93  rel = relation_open(relid, AccessShareLock);
94 
95  /* Only some relkinds have a visibility map */
97 
98  if (blkno < 0 || blkno > MaxBlockNumber)
99  ereport(ERROR,
100  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
101  errmsg("invalid block number")));
102 
103  tupdesc = pg_visibility_tupdesc(false, false);
104 
105  mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
106  if (vmbuffer != InvalidBuffer)
107  ReleaseBuffer(vmbuffer);
108  values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
109  values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
110 
112 
114 }
115 
116 /*
117  * Visibility map information for a single block of a relation, plus the
118  * page-level information for the same block.
119  */
120 Datum
122 {
123  Oid relid = PG_GETARG_OID(0);
124  int64 blkno = PG_GETARG_INT64(1);
125  int32 mapbits;
126  Relation rel;
127  Buffer vmbuffer = InvalidBuffer;
128  Buffer buffer;
129  Page page;
130  TupleDesc tupdesc;
131  Datum values[3];
132  bool nulls[3] = {0};
133 
134  rel = relation_open(relid, AccessShareLock);
135 
136  /* Only some relkinds have a visibility map */
138 
139  if (blkno < 0 || blkno > MaxBlockNumber)
140  ereport(ERROR,
141  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
142  errmsg("invalid block number")));
143 
144  tupdesc = pg_visibility_tupdesc(false, true);
145 
146  mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
147  if (vmbuffer != InvalidBuffer)
148  ReleaseBuffer(vmbuffer);
149  values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
150  values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
151 
152  /* Here we have to explicitly check rel size ... */
153  if (blkno < RelationGetNumberOfBlocks(rel))
154  {
155  buffer = ReadBuffer(rel, blkno);
156  LockBuffer(buffer, BUFFER_LOCK_SHARE);
157 
158  page = BufferGetPage(buffer);
160 
161  UnlockReleaseBuffer(buffer);
162  }
163  else
164  {
165  /* As with the vismap, silently return 0 for pages past EOF */
166  values[2] = BoolGetDatum(false);
167  }
168 
170 
172 }
173 
174 /*
175  * Visibility map information for every block in a relation.
176  */
177 Datum
179 {
180  FuncCallContext *funcctx;
181  vbits *info;
182 
183  if (SRF_IS_FIRSTCALL())
184  {
185  Oid relid = PG_GETARG_OID(0);
186  MemoryContext oldcontext;
187 
188  funcctx = SRF_FIRSTCALL_INIT();
189  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
190  funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
191  /* collect_visibility_data will verify the relkind */
192  funcctx->user_fctx = collect_visibility_data(relid, false);
193  MemoryContextSwitchTo(oldcontext);
194  }
195 
196  funcctx = SRF_PERCALL_SETUP();
197  info = (vbits *) funcctx->user_fctx;
198 
199  if (info->next < info->count)
200  {
201  Datum values[3];
202  bool nulls[3] = {0};
203  HeapTuple tuple;
204 
205  values[0] = Int64GetDatum(info->next);
206  values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
207  values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
208  info->next++;
209 
210  tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
211  SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
212  }
213 
214  SRF_RETURN_DONE(funcctx);
215 }
216 
217 /*
218  * Visibility map information for every block in a relation, plus the page
219  * level information for each block.
220  */
221 Datum
223 {
224  FuncCallContext *funcctx;
225  vbits *info;
226 
227  if (SRF_IS_FIRSTCALL())
228  {
229  Oid relid = PG_GETARG_OID(0);
230  MemoryContext oldcontext;
231 
232  funcctx = SRF_FIRSTCALL_INIT();
233  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
234  funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
235  /* collect_visibility_data will verify the relkind */
236  funcctx->user_fctx = collect_visibility_data(relid, true);
237  MemoryContextSwitchTo(oldcontext);
238  }
239 
240  funcctx = SRF_PERCALL_SETUP();
241  info = (vbits *) funcctx->user_fctx;
242 
243  if (info->next < info->count)
244  {
245  Datum values[4];
246  bool nulls[4] = {0};
247  HeapTuple tuple;
248 
249  values[0] = Int64GetDatum(info->next);
250  values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
251  values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
252  values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
253  info->next++;
254 
255  tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
256  SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
257  }
258 
259  SRF_RETURN_DONE(funcctx);
260 }
261 
262 /*
263  * Count the number of all-visible and all-frozen pages in the visibility
264  * map for a particular relation.
265  */
266 Datum
268 {
269  Oid relid = PG_GETARG_OID(0);
270  Relation rel;
271  BlockNumber nblocks;
272  BlockNumber blkno;
273  Buffer vmbuffer = InvalidBuffer;
274  int64 all_visible = 0;
275  int64 all_frozen = 0;
276  TupleDesc tupdesc;
277  Datum values[2];
278  bool nulls[2] = {0};
279 
280  rel = relation_open(relid, AccessShareLock);
281 
282  /* Only some relkinds have a visibility map */
284 
285  nblocks = RelationGetNumberOfBlocks(rel);
286 
287  for (blkno = 0; blkno < nblocks; ++blkno)
288  {
289  int32 mapbits;
290 
291  /* Make sure we are interruptible. */
293 
294  /* Get map info. */
295  mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
296  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
297  ++all_visible;
298  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
299  ++all_frozen;
300  }
301 
302  /* Clean up. */
303  if (vmbuffer != InvalidBuffer)
304  ReleaseBuffer(vmbuffer);
306 
307  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
308  elog(ERROR, "return type must be a row type");
309 
310  values[0] = Int64GetDatum(all_visible);
311  values[1] = Int64GetDatum(all_frozen);
312 
314 }
315 
316 /*
317  * Return the TIDs of non-frozen tuples present in pages marked all-frozen
318  * in the visibility map. We hope no one will ever find any, but there could
319  * be bugs, database corruption, etc.
320  */
321 Datum
323 {
324  FuncCallContext *funcctx;
326 
327  if (SRF_IS_FIRSTCALL())
328  {
329  Oid relid = PG_GETARG_OID(0);
330  MemoryContext oldcontext;
331 
332  funcctx = SRF_FIRSTCALL_INIT();
333  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
334  /* collect_corrupt_items will verify the relkind */
335  funcctx->user_fctx = collect_corrupt_items(relid, false, true);
336  MemoryContextSwitchTo(oldcontext);
337  }
338 
339  funcctx = SRF_PERCALL_SETUP();
340  items = (corrupt_items *) funcctx->user_fctx;
341 
342  if (items->next < items->count)
343  SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
344 
345  SRF_RETURN_DONE(funcctx);
346 }
347 
348 /*
349  * Return the TIDs of not-all-visible tuples in pages marked all-visible
350  * in the visibility map. We hope no one will ever find any, but there could
351  * be bugs, database corruption, etc.
352  */
353 Datum
355 {
356  FuncCallContext *funcctx;
358 
359  if (SRF_IS_FIRSTCALL())
360  {
361  Oid relid = PG_GETARG_OID(0);
362  MemoryContext oldcontext;
363 
364  funcctx = SRF_FIRSTCALL_INIT();
365  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
366  /* collect_corrupt_items will verify the relkind */
367  funcctx->user_fctx = collect_corrupt_items(relid, true, false);
368  MemoryContextSwitchTo(oldcontext);
369  }
370 
371  funcctx = SRF_PERCALL_SETUP();
372  items = (corrupt_items *) funcctx->user_fctx;
373 
374  if (items->next < items->count)
375  SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
376 
377  SRF_RETURN_DONE(funcctx);
378 }
379 
380 /*
381  * Remove the visibility map fork for a relation. If there turn out to be
382  * any bugs in the visibility map code that require rebuilding the VM, this
383  * provides users with a way to do it that is cleaner than shutting down the
384  * server and removing files by hand.
385  *
386  * This is a cut-down version of RelationTruncate.
387  */
388 Datum
390 {
391  Oid relid = PG_GETARG_OID(0);
392  Relation rel;
393  ForkNumber fork;
394  BlockNumber block;
395 
396  rel = relation_open(relid, AccessExclusiveLock);
397 
398  /* Only some relkinds have a visibility map */
400 
401  /* Forcibly reset cached file size */
403 
404  block = visibilitymap_prepare_truncate(rel, 0);
405  if (BlockNumberIsValid(block))
406  {
407  fork = VISIBILITYMAP_FORKNUM;
408  smgrtruncate(RelationGetSmgr(rel), &fork, 1, &block);
409  }
410 
411  if (RelationNeedsWAL(rel))
412  {
413  xl_smgr_truncate xlrec;
414 
415  xlrec.blkno = 0;
416  xlrec.rlocator = rel->rd_locator;
417  xlrec.flags = SMGR_TRUNCATE_VM;
418 
419  XLogBeginInsert();
420  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
421 
423  }
424 
425  /*
426  * Release the lock right away, not at commit time.
427  *
428  * It would be a problem to release the lock prior to commit if this
429  * truncate operation sends any transactional invalidation messages. Other
430  * backends would potentially be able to lock the relation without
431  * processing them in the window of time between when we release the lock
432  * here and when we sent the messages at our eventual commit. However,
433  * we're currently only sending a non-transactional smgr invalidation,
434  * which will have been posted to shared memory immediately from within
435  * smgr_truncate. Therefore, there should be no race here.
436  *
437  * The reason why it's desirable to release the lock early here is because
438  * of the possibility that someone will need to use this to blow away many
439  * visibility map forks at once. If we can't release the lock until
440  * commit time, the transaction doing this will accumulate
441  * AccessExclusiveLocks on all of those relations at the same time, which
442  * is undesirable. However, if this turns out to be unsafe we may have no
443  * choice...
444  */
446 
447  /* Nothing to return. */
448  PG_RETURN_VOID();
449 }
450 
451 /*
452  * Helper function to construct whichever TupleDesc we need for a particular
453  * call.
454  */
455 static TupleDesc
456 pg_visibility_tupdesc(bool include_blkno, bool include_pd)
457 {
458  TupleDesc tupdesc;
459  AttrNumber maxattr = 2;
460  AttrNumber a = 0;
461 
462  if (include_blkno)
463  ++maxattr;
464  if (include_pd)
465  ++maxattr;
466  tupdesc = CreateTemplateTupleDesc(maxattr);
467  if (include_blkno)
468  TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
469  TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
470  TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
471  if (include_pd)
472  TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
473  Assert(a == maxattr);
474 
475  return BlessTupleDesc(tupdesc);
476 }
477 
478 /*
479  * Collect visibility data about a relation.
480  *
481  * Checks relkind of relid and will throw an error if the relation does not
482  * have a VM.
483  */
484 static vbits *
485 collect_visibility_data(Oid relid, bool include_pd)
486 {
487  Relation rel;
488  BlockNumber nblocks;
489  vbits *info;
490  BlockNumber blkno;
491  Buffer vmbuffer = InvalidBuffer;
494  ReadStream *stream = NULL;
495 
496  rel = relation_open(relid, AccessShareLock);
497 
498  /* Only some relkinds have a visibility map */
500 
501  nblocks = RelationGetNumberOfBlocks(rel);
502  info = palloc0(offsetof(vbits, bits) + nblocks);
503  info->next = 0;
504  info->count = nblocks;
505 
506  /* Create a stream if reading main fork. */
507  if (include_pd)
508  {
509  p.current_blocknum = 0;
510  p.last_exclusive = nblocks;
512  bstrategy,
513  rel,
514  MAIN_FORKNUM,
516  &p,
517  0);
518  }
519 
520  for (blkno = 0; blkno < nblocks; ++blkno)
521  {
522  int32 mapbits;
523 
524  /* Make sure we are interruptible. */
526 
527  /* Get map info. */
528  mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
529  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
530  info->bits[blkno] |= (1 << 0);
531  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
532  info->bits[blkno] |= (1 << 1);
533 
534  /*
535  * Page-level data requires reading every block, so only get it if the
536  * caller needs it. Use a buffer access strategy, too, to prevent
537  * cache-trashing.
538  */
539  if (include_pd)
540  {
541  Buffer buffer;
542  Page page;
543 
544  buffer = read_stream_next_buffer(stream, NULL);
545  LockBuffer(buffer, BUFFER_LOCK_SHARE);
546 
547  page = BufferGetPage(buffer);
548  if (PageIsAllVisible(page))
549  info->bits[blkno] |= (1 << 2);
550 
551  UnlockReleaseBuffer(buffer);
552  }
553  }
554 
555  if (include_pd)
556  {
557  Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
558  read_stream_end(stream);
559  }
560 
561  /* Clean up. */
562  if (vmbuffer != InvalidBuffer)
563  ReleaseBuffer(vmbuffer);
565 
566  return info;
567 }
568 
569 /*
570  * The "strict" version of GetOldestNonRemovableTransactionId(). The
571  * pg_visibility check can tolerate false positives (don't report some of the
572  * errors), but can't tolerate false negatives (report false errors). Normally,
573  * horizons move forwards, but there are cases when it could move backward
574  * (see comment for ComputeXidHorizons()).
575  *
576  * This is why we have to implement our own function for xid horizon, which
577  * would be guaranteed to be newer or equal to any xid horizon computed before.
578  * We have to do the following to achieve this.
579  *
580  * 1. Ignore processes xmin's, because they consider connection to other
581  * databases that were ignored before.
582  * 2. Ignore KnownAssignedXids, as they are not database-aware. Although we
583  * now perform minimal checking on a standby by always using nextXid, this
584  * approach is better than nothing and will at least catch extremely broken
585  * cases where a xid is in the future.
586  * 3. Ignore walsender xmin, because it could go backward if some replication
587  * connections don't use replication slots.
588  *
589  * While it might seem like we could use KnownAssignedXids for shared
590  * catalogs, since shared catalogs rely on a global horizon rather than a
591  * database-specific one - there are potential edge cases. For example, a
592  * transaction may crash on the primary without writing a commit/abort record.
593  * This would lead to a situation where it appears to still be running on the
594  * standby, even though it has already ended on the primary. For this reason,
595  * it's safer to ignore KnownAssignedXids, even for shared catalogs.
596  *
597  * As a result, we're using only currently running xids to compute the horizon.
598  * Surely these would significantly sacrifice accuracy. But we have to do so
599  * to avoid reporting false errors.
600  */
601 static TransactionId
603 {
604  RunningTransactions runningTransactions;
605 
606  if (RecoveryInProgress())
607  {
608  TransactionId result;
609 
610  /* As we ignore KnownAssignedXids on standby, just pick nextXid */
611  LWLockAcquire(XidGenLock, LW_SHARED);
613  LWLockRelease(XidGenLock);
614  return result;
615  }
616  else if (rel == NULL || rel->rd_rel->relisshared)
617  {
618  /* Shared relation: take into account all running xids */
619  runningTransactions = GetRunningTransactionData();
620  LWLockRelease(ProcArrayLock);
621  LWLockRelease(XidGenLock);
622  return runningTransactions->oldestRunningXid;
623  }
624  else if (!RELATION_IS_LOCAL(rel))
625  {
626  /*
627  * Normal relation: take into account xids running within the current
628  * database
629  */
630  runningTransactions = GetRunningTransactionData();
631  LWLockRelease(ProcArrayLock);
632  LWLockRelease(XidGenLock);
633  return runningTransactions->oldestDatabaseRunningXid;
634  }
635  else
636  {
637  /*
638  * For temporary relations, ComputeXidHorizons() uses only
639  * TransamVariables->latestCompletedXid and MyProc->xid. These two
640  * shouldn't go backwards. So we're fine with this horizon.
641  */
643  }
644 }
645 
646 /*
647  * Callback function to get next block for read stream object used in
648  * collect_corrupt_items() function.
649  */
650 static BlockNumber
652  void *callback_private_data,
653  void *per_buffer_data)
654 {
655  struct collect_corrupt_items_read_stream_private *p = callback_private_data;
656 
657  for (; p->current_blocknum < p->last_exclusive; p->current_blocknum++)
658  {
659  bool check_frozen = false;
660  bool check_visible = false;
661 
662  /* Make sure we are interruptible. */
664 
665  if (p->all_frozen && VM_ALL_FROZEN(p->rel, p->current_blocknum, &p->vmbuffer))
666  check_frozen = true;
667  if (p->all_visible && VM_ALL_VISIBLE(p->rel, p->current_blocknum, &p->vmbuffer))
668  check_visible = true;
669  if (!check_visible && !check_frozen)
670  continue;
671 
672  return p->current_blocknum++;
673  }
674 
675  return InvalidBlockNumber;
676 }
677 
678 /*
679  * Returns a list of items whose visibility map information does not match
680  * the status of the tuples on the page.
681  *
682  * If all_visible is passed as true, this will include all items which are
683  * on pages marked as all-visible in the visibility map but which do not
684  * seem to in fact be all-visible.
685  *
686  * If all_frozen is passed as true, this will include all items which are
687  * on pages marked as all-frozen but which do not seem to in fact be frozen.
688  *
689  * Checks relkind of relid and will throw an error if the relation does not
690  * have a VM.
691  */
692 static corrupt_items *
694 {
695  Relation rel;
699  TransactionId OldestXmin = InvalidTransactionId;
701  ReadStream *stream;
702  Buffer buffer;
703 
705 
706  /* Only some relkinds have a visibility map */
708 
709  if (all_visible)
711 
712  /*
713  * Guess an initial array size. We don't expect many corrupted tuples, so
714  * start with a small array. This function uses the "next" field to track
715  * the next offset where we can store an item (which is the same thing as
716  * the number of items found so far) and the "count" field to track the
717  * number of entries allocated. We'll repurpose these fields before
718  * returning.
719  */
720  items = palloc0(sizeof(corrupt_items));
721  items->next = 0;
722  items->count = 64;
723  items->tids = palloc(items->count * sizeof(ItemPointerData));
724 
725  p.current_blocknum = 0;
727  p.rel = rel;
732  bstrategy,
733  rel,
734  MAIN_FORKNUM,
736  &p,
737  0);
738 
739  /* Loop over every block in the relation. */
740  while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
741  {
742  bool check_frozen = all_frozen;
743  bool check_visible = all_visible;
744  Page page;
745  OffsetNumber offnum,
746  maxoff;
747  BlockNumber blkno;
748 
749  /* Make sure we are interruptible. */
751 
752  LockBuffer(buffer, BUFFER_LOCK_SHARE);
753 
754  page = BufferGetPage(buffer);
755  maxoff = PageGetMaxOffsetNumber(page);
756  blkno = BufferGetBlockNumber(buffer);
757 
758  /*
759  * The visibility map bits might have changed while we were acquiring
760  * the page lock. Recheck to avoid returning spurious results.
761  */
762  if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
763  check_frozen = false;
764  if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
765  check_visible = false;
766  if (!check_visible && !check_frozen)
767  {
768  UnlockReleaseBuffer(buffer);
769  continue;
770  }
771 
772  /* Iterate over each tuple on the page. */
773  for (offnum = FirstOffsetNumber;
774  offnum <= maxoff;
775  offnum = OffsetNumberNext(offnum))
776  {
777  HeapTupleData tuple;
778  ItemId itemid;
779 
780  itemid = PageGetItemId(page, offnum);
781 
782  /* Unused or redirect line pointers are of no interest. */
783  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
784  continue;
785 
786  /* Dead line pointers are neither all-visible nor frozen. */
787  if (ItemIdIsDead(itemid))
788  {
789  ItemPointerSet(&(tuple.t_self), blkno, offnum);
791  continue;
792  }
793 
794  /* Initialize a HeapTupleData structure for checks below. */
795  ItemPointerSet(&(tuple.t_self), blkno, offnum);
796  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
797  tuple.t_len = ItemIdGetLength(itemid);
798  tuple.t_tableOid = relid;
799 
800  /*
801  * If we're checking whether the page is all-visible, we expect
802  * the tuple to be all-visible.
803  */
804  if (check_visible &&
805  !tuple_all_visible(&tuple, OldestXmin, buffer))
806  {
807  TransactionId RecomputedOldestXmin;
808 
809  /*
810  * Time has passed since we computed OldestXmin, so it's
811  * possible that this tuple is all-visible in reality even
812  * though it doesn't appear so based on our
813  * previously-computed value. Let's compute a new value so we
814  * can be certain whether there is a problem.
815  *
816  * From a concurrency point of view, it sort of sucks to
817  * retake ProcArrayLock here while we're holding the buffer
818  * exclusively locked, but it should be safe against
819  * deadlocks, because surely
820  * GetStrictOldestNonRemovableTransactionId() should never
821  * take a buffer lock. And this shouldn't happen often, so
822  * it's worth being careful so as to avoid false positives.
823  */
824  RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
825 
826  if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
828  else
829  {
830  OldestXmin = RecomputedOldestXmin;
831  if (!tuple_all_visible(&tuple, OldestXmin, buffer))
833  }
834  }
835 
836  /*
837  * If we're checking whether the page is all-frozen, we expect the
838  * tuple to be in a state where it will never need freezing.
839  */
840  if (check_frozen)
841  {
844  }
845  }
846 
847  UnlockReleaseBuffer(buffer);
848  }
849  read_stream_end(stream);
850 
851  /* Clean up. */
852  if (vmbuffer != InvalidBuffer)
854  if (p.vmbuffer != InvalidBuffer)
857 
858  /*
859  * Before returning, repurpose the fields to match caller's expectations.
860  * next is now the next item that should be read (rather than written) and
861  * count is now the number of items we wrote (rather than the number we
862  * allocated).
863  */
864  items->count = items->next;
865  items->next = 0;
866 
867  return items;
868 }
869 
870 /*
871  * Remember one corrupt item.
872  */
873 static void
875 {
876  /* enlarge output array if needed. */
877  if (items->next >= items->count)
878  {
879  items->count *= 2;
880  items->tids = repalloc(items->tids,
881  items->count * sizeof(ItemPointerData));
882  }
883  /* and add the new item */
884  items->tids[items->next++] = *tid;
885 }
886 
887 /*
888  * Check whether a tuple is all-visible relative to a given OldestXmin value.
889  * The buffer should contain the tuple and should be locked and pinned.
890  */
891 static bool
893 {
895  TransactionId xmin;
896 
897  state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
898  if (state != HEAPTUPLE_LIVE)
899  return false; /* all-visible implies live */
900 
901  /*
902  * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
903  * all-visible unless every tuple is hinted committed. However, those hint
904  * bits could be lost after a crash, so we can't be certain that they'll
905  * be set here. So just check the xmin.
906  */
907 
908  xmin = HeapTupleHeaderGetXmin(tup->t_data);
909  if (!TransactionIdPrecedes(xmin, OldestXmin))
910  return false; /* xmin not old enough for all to see */
911 
912  return true;
913 }
914 
915 /*
916  * check_relation_relkind - convenience routine to check that relation
917  * is of the relkind supported by the callers
918  */
919 static void
921 {
922  if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
923  ereport(ERROR,
924  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
925  errmsg("relation \"%s\" is of wrong relation kind",
928 }
int16 AttrNumber
Definition: attnum.h:21
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define MaxBlockNumber
Definition: block.h:35
static Datum values[MAXATTR]
Definition: bootstrap.c:150
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
@ BAS_BULKREAD
Definition: bufmgr.h:36
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
signed int int32
Definition: c.h:497
#define Assert(condition)
Definition: c.h:861
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:401
unsigned char uint8
Definition: c.h:507
uint32 TransactionId
Definition: c.h:655
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2158
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7564
HTSV_Result
Definition: heapam.h:125
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1116
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
int a
Definition: isn.c:69
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define AccessShareLock
Definition: lockdefs.h:36
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
void * palloc0(Size size)
Definition: mcxt.c:1347
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
int errdetail_relkind_not_supported(char relkind)
Definition: pg_class.c:24
static BlockNumber collect_corrupt_items_read_stream_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
static corrupt_items * collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
Datum pg_visibility_map_summary(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
Definition: pg_visibility.c:29
Datum pg_visibility_rel(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(pg_visibility_map)
struct corrupt_items corrupt_items
static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd)
static void check_relation_relkind(Relation rel)
static void record_corrupt_item(corrupt_items *items, ItemPointer tid)
static TransactionId GetStrictOldestNonRemovableTransactionId(Relation rel)
Datum pg_visibility_map(PG_FUNCTION_ARGS)
Definition: pg_visibility.c:82
struct vbits vbits
static vbits * collect_visibility_data(Oid relid, bool include_pd)
Datum pg_visibility_map_rel(PG_FUNCTION_ARGS)
Datum pg_check_visible(PG_FUNCTION_ARGS)
Datum pg_check_frozen(PG_FUNCTION_ARGS)
Datum pg_visibility(PG_FUNCTION_ARGS)
static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
Datum pg_truncate_visibility_map(PG_FUNCTION_ARGS)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
unsigned int Oid
Definition: postgres_ext.h:31
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:2005
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2693
MemoryContextSwitchTo(old_ctx)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:552
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:606
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:847
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.c:172
#define READ_STREAM_FULL
Definition: read_stream.h:43
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationNeedsWAL(relation)
Definition: rel.h:628
ForkNumber
Definition: relpath.h:56
@ VISIBILITYMAP_FORKNUM
Definition: relpath.h:60
@ MAIN_FORKNUM
Definition: relpath.h:58
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
Definition: smgr.c:725
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
void * user_fctx
Definition: funcapi.h:82
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc tuple_desc
Definition: funcapi.h:112
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
RelFileLocator rd_locator
Definition: rel.h:57
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId oldestDatabaseRunningXid
Definition: standby.h:93
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46
FullTransactionId nextXid
Definition: transam.h:220
ItemPointer tids
Definition: pg_visibility.c:42
BlockNumber count
Definition: pg_visibility.c:41
BlockNumber next
Definition: pg_visibility.c:40
Definition: regguts.h:323
BlockNumber next
Definition: pg_visibility.c:33
uint8 bits[FLEXIBLE_ARRAY_MEMBER]
Definition: pg_visibility.c:35
BlockNumber count
Definition: pg_visibility.c:34
RelFileLocator rlocator
Definition: storage_xlog.h:49
BlockNumber blkno
Definition: storage_xlog.h:48
static ItemArray items
Definition: test_tidstore.c:49
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define XidFromFullTransactionId(x)
Definition: transam.h:48
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:67
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:651
TransamVariablesData * TransamVariables
Definition: varsup.c:34
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
bool RecoveryInProgress(void)
Definition: xlog.c:6333
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:82