PostgreSQL Source Code  git master
pg_visibility.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_visibility.c
4  * display visibility map information and page-level visibility bits
5  *
6  * Copyright (c) 2016-2024, PostgreSQL Global Development Group
7  *
8  * contrib/pg_visibility/pg_visibility.c
9  *-------------------------------------------------------------------------
10  */
11 #include "postgres.h"
12 
13 #include "access/heapam.h"
14 #include "access/htup_details.h"
15 #include "access/visibilitymap.h"
16 #include "access/xloginsert.h"
17 #include "catalog/pg_type.h"
18 #include "catalog/storage_xlog.h"
19 #include "funcapi.h"
20 #include "miscadmin.h"
21 #include "storage/bufmgr.h"
22 #include "storage/proc.h"
23 #include "storage/procarray.h"
24 #include "storage/smgr.h"
25 #include "utils/rel.h"
26 #include "utils/snapmgr.h"
27 
29 
30 typedef struct vbits
31 {
36 
37 typedef struct corrupt_items
38 {
43 
52 
53 static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
54 static vbits *collect_visibility_data(Oid relid, bool include_pd);
55 static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
56  bool all_frozen);
58 static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
59  Buffer buffer);
60 static void check_relation_relkind(Relation rel);
61 
62 /*
63  * Visibility map information for a single block of a relation.
64  *
65  * Note: the VM code will silently return zeroes for pages past the end
66  * of the map, so we allow probes up to MaxBlockNumber regardless of the
67  * actual relation size.
68  */
69 Datum
71 {
72  Oid relid = PG_GETARG_OID(0);
73  int64 blkno = PG_GETARG_INT64(1);
74  int32 mapbits;
75  Relation rel;
76  Buffer vmbuffer = InvalidBuffer;
77  TupleDesc tupdesc;
78  Datum values[2];
79  bool nulls[2] = {0};
80 
81  rel = relation_open(relid, AccessShareLock);
82 
83  /* Only some relkinds have a visibility map */
85 
86  if (blkno < 0 || blkno > MaxBlockNumber)
87  ereport(ERROR,
88  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
89  errmsg("invalid block number")));
90 
91  tupdesc = pg_visibility_tupdesc(false, false);
92 
93  mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
94  if (vmbuffer != InvalidBuffer)
95  ReleaseBuffer(vmbuffer);
96  values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
97  values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
98 
100 
102 }
103 
104 /*
105  * Visibility map information for a single block of a relation, plus the
106  * page-level information for the same block.
107  */
108 Datum
110 {
111  Oid relid = PG_GETARG_OID(0);
112  int64 blkno = PG_GETARG_INT64(1);
113  int32 mapbits;
114  Relation rel;
115  Buffer vmbuffer = InvalidBuffer;
116  Buffer buffer;
117  Page page;
118  TupleDesc tupdesc;
119  Datum values[3];
120  bool nulls[3] = {0};
121 
122  rel = relation_open(relid, AccessShareLock);
123 
124  /* Only some relkinds have a visibility map */
126 
127  if (blkno < 0 || blkno > MaxBlockNumber)
128  ereport(ERROR,
129  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
130  errmsg("invalid block number")));
131 
132  tupdesc = pg_visibility_tupdesc(false, true);
133 
134  mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
135  if (vmbuffer != InvalidBuffer)
136  ReleaseBuffer(vmbuffer);
137  values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
138  values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
139 
140  /* Here we have to explicitly check rel size ... */
141  if (blkno < RelationGetNumberOfBlocks(rel))
142  {
143  buffer = ReadBuffer(rel, blkno);
144  LockBuffer(buffer, BUFFER_LOCK_SHARE);
145 
146  page = BufferGetPage(buffer);
148 
149  UnlockReleaseBuffer(buffer);
150  }
151  else
152  {
153  /* As with the vismap, silently return 0 for pages past EOF */
154  values[2] = BoolGetDatum(false);
155  }
156 
158 
160 }
161 
162 /*
163  * Visibility map information for every block in a relation.
164  */
165 Datum
167 {
168  FuncCallContext *funcctx;
169  vbits *info;
170 
171  if (SRF_IS_FIRSTCALL())
172  {
173  Oid relid = PG_GETARG_OID(0);
174  MemoryContext oldcontext;
175 
176  funcctx = SRF_FIRSTCALL_INIT();
177  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
178  funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
179  /* collect_visibility_data will verify the relkind */
180  funcctx->user_fctx = collect_visibility_data(relid, false);
181  MemoryContextSwitchTo(oldcontext);
182  }
183 
184  funcctx = SRF_PERCALL_SETUP();
185  info = (vbits *) funcctx->user_fctx;
186 
187  if (info->next < info->count)
188  {
189  Datum values[3];
190  bool nulls[3] = {0};
191  HeapTuple tuple;
192 
193  values[0] = Int64GetDatum(info->next);
194  values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
195  values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
196  info->next++;
197 
198  tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
199  SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
200  }
201 
202  SRF_RETURN_DONE(funcctx);
203 }
204 
205 /*
206  * Visibility map information for every block in a relation, plus the page
207  * level information for each block.
208  */
209 Datum
211 {
212  FuncCallContext *funcctx;
213  vbits *info;
214 
215  if (SRF_IS_FIRSTCALL())
216  {
217  Oid relid = PG_GETARG_OID(0);
218  MemoryContext oldcontext;
219 
220  funcctx = SRF_FIRSTCALL_INIT();
221  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
222  funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
223  /* collect_visibility_data will verify the relkind */
224  funcctx->user_fctx = collect_visibility_data(relid, true);
225  MemoryContextSwitchTo(oldcontext);
226  }
227 
228  funcctx = SRF_PERCALL_SETUP();
229  info = (vbits *) funcctx->user_fctx;
230 
231  if (info->next < info->count)
232  {
233  Datum values[4];
234  bool nulls[4] = {0};
235  HeapTuple tuple;
236 
237  values[0] = Int64GetDatum(info->next);
238  values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
239  values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
240  values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
241  info->next++;
242 
243  tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
244  SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
245  }
246 
247  SRF_RETURN_DONE(funcctx);
248 }
249 
250 /*
251  * Count the number of all-visible and all-frozen pages in the visibility
252  * map for a particular relation.
253  */
254 Datum
256 {
257  Oid relid = PG_GETARG_OID(0);
258  Relation rel;
259  BlockNumber nblocks;
260  BlockNumber blkno;
261  Buffer vmbuffer = InvalidBuffer;
262  int64 all_visible = 0;
263  int64 all_frozen = 0;
264  TupleDesc tupdesc;
265  Datum values[2];
266  bool nulls[2] = {0};
267 
268  rel = relation_open(relid, AccessShareLock);
269 
270  /* Only some relkinds have a visibility map */
272 
273  nblocks = RelationGetNumberOfBlocks(rel);
274 
275  for (blkno = 0; blkno < nblocks; ++blkno)
276  {
277  int32 mapbits;
278 
279  /* Make sure we are interruptible. */
281 
282  /* Get map info. */
283  mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
284  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
285  ++all_visible;
286  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
287  ++all_frozen;
288  }
289 
290  /* Clean up. */
291  if (vmbuffer != InvalidBuffer)
292  ReleaseBuffer(vmbuffer);
294 
295  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
296  elog(ERROR, "return type must be a row type");
297 
298  values[0] = Int64GetDatum(all_visible);
299  values[1] = Int64GetDatum(all_frozen);
300 
302 }
303 
304 /*
305  * Return the TIDs of non-frozen tuples present in pages marked all-frozen
306  * in the visibility map. We hope no one will ever find any, but there could
307  * be bugs, database corruption, etc.
308  */
309 Datum
311 {
312  FuncCallContext *funcctx;
314 
315  if (SRF_IS_FIRSTCALL())
316  {
317  Oid relid = PG_GETARG_OID(0);
318  MemoryContext oldcontext;
319 
320  funcctx = SRF_FIRSTCALL_INIT();
321  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
322  /* collect_corrupt_items will verify the relkind */
323  funcctx->user_fctx = collect_corrupt_items(relid, false, true);
324  MemoryContextSwitchTo(oldcontext);
325  }
326 
327  funcctx = SRF_PERCALL_SETUP();
328  items = (corrupt_items *) funcctx->user_fctx;
329 
330  if (items->next < items->count)
331  SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
332 
333  SRF_RETURN_DONE(funcctx);
334 }
335 
336 /*
337  * Return the TIDs of not-all-visible tuples in pages marked all-visible
338  * in the visibility map. We hope no one will ever find any, but there could
339  * be bugs, database corruption, etc.
340  */
341 Datum
343 {
344  FuncCallContext *funcctx;
346 
347  if (SRF_IS_FIRSTCALL())
348  {
349  Oid relid = PG_GETARG_OID(0);
350  MemoryContext oldcontext;
351 
352  funcctx = SRF_FIRSTCALL_INIT();
353  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
354  /* collect_corrupt_items will verify the relkind */
355  funcctx->user_fctx = collect_corrupt_items(relid, true, false);
356  MemoryContextSwitchTo(oldcontext);
357  }
358 
359  funcctx = SRF_PERCALL_SETUP();
360  items = (corrupt_items *) funcctx->user_fctx;
361 
362  if (items->next < items->count)
363  SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
364 
365  SRF_RETURN_DONE(funcctx);
366 }
367 
368 /*
369  * Remove the visibility map fork for a relation. If there turn out to be
370  * any bugs in the visibility map code that require rebuilding the VM, this
371  * provides users with a way to do it that is cleaner than shutting down the
372  * server and removing files by hand.
373  *
374  * This is a cut-down version of RelationTruncate.
375  */
376 Datum
378 {
379  Oid relid = PG_GETARG_OID(0);
380  Relation rel;
381  ForkNumber fork;
382  BlockNumber block;
383 
384  rel = relation_open(relid, AccessExclusiveLock);
385 
386  /* Only some relkinds have a visibility map */
388 
389  /* Forcibly reset cached file size */
391 
392  block = visibilitymap_prepare_truncate(rel, 0);
393  if (BlockNumberIsValid(block))
394  {
395  fork = VISIBILITYMAP_FORKNUM;
396  smgrtruncate(RelationGetSmgr(rel), &fork, 1, &block);
397  }
398 
399  if (RelationNeedsWAL(rel))
400  {
401  xl_smgr_truncate xlrec;
402 
403  xlrec.blkno = 0;
404  xlrec.rlocator = rel->rd_locator;
405  xlrec.flags = SMGR_TRUNCATE_VM;
406 
407  XLogBeginInsert();
408  XLogRegisterData((char *) &xlrec, sizeof(xlrec));
409 
411  }
412 
413  /*
414  * Release the lock right away, not at commit time.
415  *
416  * It would be a problem to release the lock prior to commit if this
417  * truncate operation sends any transactional invalidation messages. Other
418  * backends would potentially be able to lock the relation without
419  * processing them in the window of time between when we release the lock
420  * here and when we sent the messages at our eventual commit. However,
421  * we're currently only sending a non-transactional smgr invalidation,
422  * which will have been posted to shared memory immediately from within
423  * smgr_truncate. Therefore, there should be no race here.
424  *
425  * The reason why it's desirable to release the lock early here is because
426  * of the possibility that someone will need to use this to blow away many
427  * visibility map forks at once. If we can't release the lock until
428  * commit time, the transaction doing this will accumulate
429  * AccessExclusiveLocks on all of those relations at the same time, which
430  * is undesirable. However, if this turns out to be unsafe we may have no
431  * choice...
432  */
434 
435  /* Nothing to return. */
436  PG_RETURN_VOID();
437 }
438 
439 /*
440  * Helper function to construct whichever TupleDesc we need for a particular
441  * call.
442  */
443 static TupleDesc
444 pg_visibility_tupdesc(bool include_blkno, bool include_pd)
445 {
446  TupleDesc tupdesc;
447  AttrNumber maxattr = 2;
448  AttrNumber a = 0;
449 
450  if (include_blkno)
451  ++maxattr;
452  if (include_pd)
453  ++maxattr;
454  tupdesc = CreateTemplateTupleDesc(maxattr);
455  if (include_blkno)
456  TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
457  TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
458  TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
459  if (include_pd)
460  TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
461  Assert(a == maxattr);
462 
463  return BlessTupleDesc(tupdesc);
464 }
465 
466 /*
467  * Collect visibility data about a relation.
468  *
469  * Checks relkind of relid and will throw an error if the relation does not
470  * have a VM.
471  */
472 static vbits *
473 collect_visibility_data(Oid relid, bool include_pd)
474 {
475  Relation rel;
476  BlockNumber nblocks;
477  vbits *info;
478  BlockNumber blkno;
479  Buffer vmbuffer = InvalidBuffer;
481 
482  rel = relation_open(relid, AccessShareLock);
483 
484  /* Only some relkinds have a visibility map */
486 
487  nblocks = RelationGetNumberOfBlocks(rel);
488  info = palloc0(offsetof(vbits, bits) + nblocks);
489  info->next = 0;
490  info->count = nblocks;
491 
492  for (blkno = 0; blkno < nblocks; ++blkno)
493  {
494  int32 mapbits;
495 
496  /* Make sure we are interruptible. */
498 
499  /* Get map info. */
500  mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
501  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
502  info->bits[blkno] |= (1 << 0);
503  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
504  info->bits[blkno] |= (1 << 1);
505 
506  /*
507  * Page-level data requires reading every block, so only get it if the
508  * caller needs it. Use a buffer access strategy, too, to prevent
509  * cache-trashing.
510  */
511  if (include_pd)
512  {
513  Buffer buffer;
514  Page page;
515 
516  buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
517  bstrategy);
518  LockBuffer(buffer, BUFFER_LOCK_SHARE);
519 
520  page = BufferGetPage(buffer);
521  if (PageIsAllVisible(page))
522  info->bits[blkno] |= (1 << 2);
523 
524  UnlockReleaseBuffer(buffer);
525  }
526  }
527 
528  /* Clean up. */
529  if (vmbuffer != InvalidBuffer)
530  ReleaseBuffer(vmbuffer);
532 
533  return info;
534 }
535 
536 /*
537  * The "strict" version of GetOldestNonRemovableTransactionId(). The
538  * pg_visibility check can tolerate false positives (don't report some of the
539  * errors), but can't tolerate false negatives (report false errors). Normally,
540  * horizons move forwards, but there are cases when it could move backward
541  * (see comment for ComputeXidHorizons()).
542  *
543  * This is why we have to implement our own function for xid horizon, which
544  * would be guaranteed to be newer or equal to any xid horizon computed before.
545  * We have to do the following to achieve this.
546  *
547  * 1. Ignore processes xmin's, because they consider connection to other
548  * databases that were ignored before.
549  * 2. Ignore KnownAssignedXids, because they are not database-aware. At the
550  * same time, the primary could compute its horizons database-aware.
551  * 3. Ignore walsender xmin, because it could go backward if some replication
552  * connections don't use replication slots.
553  *
554  * As a result, we're using only currently running xids to compute the horizon.
555  * Surely these would significantly sacrifice accuracy. But we have to do so
556  * to avoid reporting false errors.
557  */
558 static TransactionId
560 {
561  RunningTransactions runningTransactions;
562 
563  if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
564  {
565  /* Shared relation: take into account all running xids */
566  runningTransactions = GetRunningTransactionData();
567  LWLockRelease(ProcArrayLock);
568  LWLockRelease(XidGenLock);
569  return runningTransactions->oldestRunningXid;
570  }
571  else if (!RELATION_IS_LOCAL(rel))
572  {
573  /*
574  * Normal relation: take into account xids running within the current
575  * database
576  */
577  runningTransactions = GetRunningTransactionData();
578  LWLockRelease(ProcArrayLock);
579  LWLockRelease(XidGenLock);
580  return runningTransactions->oldestDatabaseRunningXid;
581  }
582  else
583  {
584  /*
585  * For temporary relations, ComputeXidHorizons() uses only
586  * TransamVariables->latestCompletedXid and MyProc->xid. These two
587  * shouldn't go backwards. So we're fine with this horizon.
588  */
590  }
591 }
592 
593 /*
594  * Returns a list of items whose visibility map information does not match
595  * the status of the tuples on the page.
596  *
597  * If all_visible is passed as true, this will include all items which are
598  * on pages marked as all-visible in the visibility map but which do not
599  * seem to in fact be all-visible.
600  *
601  * If all_frozen is passed as true, this will include all items which are
602  * on pages marked as all-frozen but which do not seem to in fact be frozen.
603  *
604  * Checks relkind of relid and will throw an error if the relation does not
605  * have a VM.
606  */
607 static corrupt_items *
608 collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
609 {
610  Relation rel;
611  BlockNumber nblocks;
613  BlockNumber blkno;
614  Buffer vmbuffer = InvalidBuffer;
616  TransactionId OldestXmin = InvalidTransactionId;
617 
618  rel = relation_open(relid, AccessShareLock);
619 
620  /* Only some relkinds have a visibility map */
622 
623  if (all_visible)
625 
626  nblocks = RelationGetNumberOfBlocks(rel);
627 
628  /*
629  * Guess an initial array size. We don't expect many corrupted tuples, so
630  * start with a small array. This function uses the "next" field to track
631  * the next offset where we can store an item (which is the same thing as
632  * the number of items found so far) and the "count" field to track the
633  * number of entries allocated. We'll repurpose these fields before
634  * returning.
635  */
636  items = palloc0(sizeof(corrupt_items));
637  items->next = 0;
638  items->count = 64;
639  items->tids = palloc(items->count * sizeof(ItemPointerData));
640 
641  /* Loop over every block in the relation. */
642  for (blkno = 0; blkno < nblocks; ++blkno)
643  {
644  bool check_frozen = false;
645  bool check_visible = false;
646  Buffer buffer;
647  Page page;
648  OffsetNumber offnum,
649  maxoff;
650 
651  /* Make sure we are interruptible. */
653 
654  /* Use the visibility map to decide whether to check this page. */
655  if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer))
656  check_frozen = true;
657  if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
658  check_visible = true;
659  if (!check_visible && !check_frozen)
660  continue;
661 
662  /* Read and lock the page. */
663  buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
664  bstrategy);
665  LockBuffer(buffer, BUFFER_LOCK_SHARE);
666 
667  page = BufferGetPage(buffer);
668  maxoff = PageGetMaxOffsetNumber(page);
669 
670  /*
671  * The visibility map bits might have changed while we were acquiring
672  * the page lock. Recheck to avoid returning spurious results.
673  */
674  if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
675  check_frozen = false;
676  if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
677  check_visible = false;
678  if (!check_visible && !check_frozen)
679  {
680  UnlockReleaseBuffer(buffer);
681  continue;
682  }
683 
684  /* Iterate over each tuple on the page. */
685  for (offnum = FirstOffsetNumber;
686  offnum <= maxoff;
687  offnum = OffsetNumberNext(offnum))
688  {
689  HeapTupleData tuple;
690  ItemId itemid;
691 
692  itemid = PageGetItemId(page, offnum);
693 
694  /* Unused or redirect line pointers are of no interest. */
695  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
696  continue;
697 
698  /* Dead line pointers are neither all-visible nor frozen. */
699  if (ItemIdIsDead(itemid))
700  {
701  ItemPointerSet(&(tuple.t_self), blkno, offnum);
703  continue;
704  }
705 
706  /* Initialize a HeapTupleData structure for checks below. */
707  ItemPointerSet(&(tuple.t_self), blkno, offnum);
708  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
709  tuple.t_len = ItemIdGetLength(itemid);
710  tuple.t_tableOid = relid;
711 
712  /*
713  * If we're checking whether the page is all-visible, we expect
714  * the tuple to be all-visible.
715  */
716  if (check_visible &&
717  !tuple_all_visible(&tuple, OldestXmin, buffer))
718  {
719  TransactionId RecomputedOldestXmin;
720 
721  /*
722  * Time has passed since we computed OldestXmin, so it's
723  * possible that this tuple is all-visible in reality even
724  * though it doesn't appear so based on our
725  * previously-computed value. Let's compute a new value so we
726  * can be certain whether there is a problem.
727  *
728  * From a concurrency point of view, it sort of sucks to
729  * retake ProcArrayLock here while we're holding the buffer
730  * exclusively locked, but it should be safe against
731  * deadlocks, because surely
732  * GetStrictOldestNonRemovableTransactionId() should never
733  * take a buffer lock. And this shouldn't happen often, so
734  * it's worth being careful so as to avoid false positives.
735  */
736  RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
737 
738  if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
740  else
741  {
742  OldestXmin = RecomputedOldestXmin;
743  if (!tuple_all_visible(&tuple, OldestXmin, buffer))
745  }
746  }
747 
748  /*
749  * If we're checking whether the page is all-frozen, we expect the
750  * tuple to be in a state where it will never need freezing.
751  */
752  if (check_frozen)
753  {
756  }
757  }
758 
759  UnlockReleaseBuffer(buffer);
760  }
761 
762  /* Clean up. */
763  if (vmbuffer != InvalidBuffer)
764  ReleaseBuffer(vmbuffer);
766 
767  /*
768  * Before returning, repurpose the fields to match caller's expectations.
769  * next is now the next item that should be read (rather than written) and
770  * count is now the number of items we wrote (rather than the number we
771  * allocated).
772  */
773  items->count = items->next;
774  items->next = 0;
775 
776  return items;
777 }
778 
779 /*
780  * Remember one corrupt item.
781  */
782 static void
784 {
785  /* enlarge output array if needed. */
786  if (items->next >= items->count)
787  {
788  items->count *= 2;
789  items->tids = repalloc(items->tids,
790  items->count * sizeof(ItemPointerData));
791  }
792  /* and add the new item */
793  items->tids[items->next++] = *tid;
794 }
795 
796 /*
797  * Check whether a tuple is all-visible relative to a given OldestXmin value.
798  * The buffer should contain the tuple and should be locked and pinned.
799  */
800 static bool
802 {
804  TransactionId xmin;
805 
806  state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
807  if (state != HEAPTUPLE_LIVE)
808  return false; /* all-visible implies live */
809 
810  /*
811  * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
812  * all-visible unless every tuple is hinted committed. However, those hint
813  * bits could be lost after a crash, so we can't be certain that they'll
814  * be set here. So just check the xmin.
815  */
816 
817  xmin = HeapTupleHeaderGetXmin(tup->t_data);
818  if (!TransactionIdPrecedes(xmin, OldestXmin))
819  return false; /* xmin not old enough for all to see */
820 
821  return true;
822 }
823 
824 /*
825  * check_relation_relkind - convenience routine to check that relation
826  * is of the relkind supported by the callers
827  */
828 static void
830 {
831  if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
832  ereport(ERROR,
833  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
834  errmsg("relation \"%s\" is of wrong relation kind",
836  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
837 }
int16 AttrNumber
Definition: attnum.h:21
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define MaxBlockNumber
Definition: block.h:35
static Datum values[MAXATTR]
Definition: bootstrap.c:152
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4560
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4577
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4795
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:781
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:734
@ BAS_BULKREAD
Definition: bufmgr.h:35
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:229
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
@ RBM_NORMAL
Definition: bufmgr.h:44
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
signed int int32
Definition: c.h:481
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:385
unsigned char uint8
Definition: c.h:491
uint32 TransactionId
Definition: c.h:639
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2158
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7276
HTSV_Result
Definition: heapam.h:95
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1116
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
int a
Definition: isn.c:69
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
Assert(fmt[strlen(fmt) - 1] !='\n')
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define AccessShareLock
Definition: lockdefs.h:36
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1782
void * palloc0(Size size)
Definition: mcxt.c:1334
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1528
void * palloc(Size size)
Definition: mcxt.c:1304
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
int errdetail_relkind_not_supported(char relkind)
Definition: pg_class.c:24
static corrupt_items * collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
Datum pg_visibility_map_summary(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
Definition: pg_visibility.c:28
Datum pg_visibility_rel(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(pg_visibility_map)
struct corrupt_items corrupt_items
static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd)
static void check_relation_relkind(Relation rel)
static void record_corrupt_item(corrupt_items *items, ItemPointer tid)
static TransactionId GetStrictOldestNonRemovableTransactionId(Relation rel)
Datum pg_visibility_map(PG_FUNCTION_ARGS)
Definition: pg_visibility.c:70
struct vbits vbits
static vbits * collect_visibility_data(Oid relid, bool include_pd)
Datum pg_visibility_map_rel(PG_FUNCTION_ARGS)
Datum pg_check_visible(PG_FUNCTION_ARGS)
Datum pg_check_frozen(PG_FUNCTION_ARGS)
Datum pg_visibility(PG_FUNCTION_ARGS)
static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
Datum pg_truncate_visibility_map(PG_FUNCTION_ARGS)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
unsigned int Oid
Definition: postgres_ext.h:31
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1993
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2681
MemoryContextSwitchTo(old_ctx)
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:650
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:569
#define RelationGetRelationName(relation)
Definition: rel.h:541
#define RelationNeedsWAL(relation)
Definition: rel.h:630
ForkNumber
Definition: relpath.h:48
@ VISIBILITYMAP_FORKNUM
Definition: relpath.h:52
@ MAIN_FORKNUM
Definition: relpath.h:50
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
Definition: smgr.c:702
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
void * user_fctx
Definition: funcapi.h:82
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc tuple_desc
Definition: funcapi.h:112
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
RelFileLocator rd_locator
Definition: rel.h:57
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId oldestRunningXid
Definition: standby.h:84
TransactionId oldestDatabaseRunningXid
Definition: standby.h:85
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46
ItemPointer tids
Definition: pg_visibility.c:41
BlockNumber count
Definition: pg_visibility.c:40
BlockNumber next
Definition: pg_visibility.c:39
Definition: regguts.h:323
BlockNumber next
Definition: pg_visibility.c:32
uint8 bits[FLEXIBLE_ARRAY_MEMBER]
Definition: pg_visibility.c:34
BlockNumber count
Definition: pg_visibility.c:33
RelFileLocator rlocator
Definition: storage_xlog.h:49
BlockNumber blkno
Definition: storage_xlog.h:48
static ItemArray items
Definition: test_tidstore.c:49
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:67
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:651
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
bool RecoveryInProgress(void)
Definition: xlog.c:6201
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:82