PostgreSQL Source Code  git master
verify_heapam.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * verify_heapam.c
4  * Functions to check postgresql heap relations for corruption
5  *
6  * Copyright (c) 2016-2020, PostgreSQL Global Development Group
7  *
8  * contrib/amcheck/verify_heapam.c
9  *-------------------------------------------------------------------------
10  */
11 #include "postgres.h"
12 
13 #include "access/detoast.h"
14 #include "access/genam.h"
15 #include "access/heapam.h"
16 #include "access/heaptoast.h"
17 #include "access/multixact.h"
18 #include "access/toast_internals.h"
19 #include "access/visibilitymap.h"
20 #include "catalog/pg_am.h"
21 #include "funcapi.h"
22 #include "miscadmin.h"
23 #include "storage/bufmgr.h"
24 #include "storage/procarray.h"
25 #include "utils/builtins.h"
26 #include "utils/fmgroids.h"
27 
29 
30 /* The number of columns in tuples returned by verify_heapam */
31 #define HEAPCHECK_RELATION_COLS 4
32 
33 /*
34  * Despite the name, we use this for reporting problems with both XIDs and
35  * MXIDs.
36  */
37 typedef enum XidBoundsViolation
38 {
45 
46 typedef enum XidCommitStatus
47 {
52 
53 typedef enum SkipPages
54 {
58 } SkipPages;
59 
60 /*
61  * Struct holding the running context information during
62  * a lifetime of a verify_heapam execution.
63  */
64 typedef struct HeapCheckContext
65 {
66  /*
67  * Cached copies of values from ShmemVariableCache and computed values
68  * from them.
69  */
70  FullTransactionId next_fxid; /* ShmemVariableCache->nextXid */
71  TransactionId next_xid; /* 32-bit version of next_fxid */
72  TransactionId oldest_xid; /* ShmemVariableCache->oldestXid */
73  FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed
74  * relative to next_fxid */
75 
76  /*
77  * Cached copy of value from MultiXactState
78  */
79  MultiXactId next_mxact; /* MultiXactState->nextMXact */
80  MultiXactId oldest_mxact; /* MultiXactState->oldestMultiXactId */
81 
82  /*
83  * Cached copies of the most recently checked xid and its status.
84  */
87 
88  /* Values concerning the heap relation being checked */
97 
98  /* Values for iterating over pages in the relation */
103 
104  /* Values for iterating over tuples within a page */
110  int natts;
111 
112  /* Values for iterating over attributes within the tuple */
113  uint32 offset; /* offset in tuple data */
115 
116  /* Values for iterating over toast for the attribute */
121 
122  /* Whether verify_heapam has yet encountered any corrupt tuples */
124 
125  /* The descriptor and tuplestore for verify_heapam's result tuples */
129 
130 /* Internal implementation */
131 static void sanity_check_relation(Relation rel);
132 static void check_tuple(HeapCheckContext *ctx);
133 static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx);
134 
135 static bool check_tuple_attribute(HeapCheckContext *ctx);
137  HeapCheckContext *ctx);
138 
139 static void report_corruption(HeapCheckContext *ctx, char *msg);
140 static TupleDesc verify_heapam_tupdesc(void);
142  const HeapCheckContext *ctx);
143 static void update_cached_xid_range(HeapCheckContext *ctx);
146  HeapCheckContext *ctx);
148  HeapCheckContext *ctx);
150  HeapCheckContext *ctx,
152 
153 /*
154  * Scan and report corruption in heap pages, optionally reconciling toasted
155  * attributes with entries in the associated toast table. Intended to be
156  * called from SQL with the following parameters:
157  *
158  * relation:
159  * The Oid of the heap relation to be checked.
160  *
161  * on_error_stop:
162  * Whether to stop at the end of the first page for which errors are
163  * detected. Note that multiple rows may be returned.
164  *
165  * check_toast:
166  * Whether to check each toasted attribute against the toast table to
167  * verify that it can be found there.
168  *
169  * skip:
170  * What kinds of pages in the heap relation should be skipped. Valid
171  * options are "all-visible", "all-frozen", and "none".
172  *
173  * Returns to the SQL caller a set of tuples, each containing the location
174  * and a description of a corruption found in the heap.
175  *
176  * This code goes to some trouble to avoid crashing the server even if the
177  * table pages are badly corrupted, but it's probably not perfect. If
178  * check_toast is true, we'll use regular index lookups to try to fetch TOAST
179  * tuples, which can certainly cause crashes if the right kind of corruption
180  * exists in the toast table or index. No matter what parameters you pass,
181  * we can't protect against crashes that might occur trying to look up the
182  * commit status of transaction IDs (though we avoid trying to do such lookups
183  * for transaction IDs that can't legally appear in the table).
184  */
185 Datum
187 {
188  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
189  MemoryContext old_context;
190  bool random_access;
191  HeapCheckContext ctx;
192  Buffer vmbuffer = InvalidBuffer;
193  Oid relid;
194  bool on_error_stop;
195  bool check_toast;
196  SkipPages skip_option = SKIP_PAGES_NONE;
197  BlockNumber first_block;
198  BlockNumber last_block;
199  BlockNumber nblocks;
200  const char *skip;
201 
202  /* Check to see if caller supports us returning a tuplestore */
203  if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
204  ereport(ERROR,
205  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
206  errmsg("set-valued function called in context that cannot accept a set")));
207  if (!(rsinfo->allowedModes & SFRM_Materialize))
208  ereport(ERROR,
209  (errcode(ERRCODE_SYNTAX_ERROR),
210  errmsg("materialize mode required, but it is not allowed in this context")));
211 
212  /* Check supplied arguments */
213  if (PG_ARGISNULL(0))
214  ereport(ERROR,
215  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
216  errmsg("relation cannot be null")));
217  relid = PG_GETARG_OID(0);
218 
219  if (PG_ARGISNULL(1))
220  ereport(ERROR,
221  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
222  errmsg("on_error_stop cannot be null")));
223  on_error_stop = PG_GETARG_BOOL(1);
224 
225  if (PG_ARGISNULL(2))
226  ereport(ERROR,
227  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
228  errmsg("check_toast cannot be null")));
229  check_toast = PG_GETARG_BOOL(2);
230 
231  if (PG_ARGISNULL(3))
232  ereport(ERROR,
233  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
234  errmsg("skip cannot be null")));
236  if (pg_strcasecmp(skip, "all-visible") == 0)
237  skip_option = SKIP_PAGES_ALL_VISIBLE;
238  else if (pg_strcasecmp(skip, "all-frozen") == 0)
239  skip_option = SKIP_PAGES_ALL_FROZEN;
240  else if (pg_strcasecmp(skip, "none") == 0)
241  skip_option = SKIP_PAGES_NONE;
242  else
243  ereport(ERROR,
244  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
245  errmsg("invalid skip option"),
246  errhint("Valid skip options are \"all-visible\", \"all-frozen\", and \"none\".")));
247 
248  memset(&ctx, 0, sizeof(HeapCheckContext));
249  ctx.cached_xid = InvalidTransactionId;
250 
251  /*
252  * If we report corruption when not examining some individual attribute,
253  * we need attnum to be reported as NULL. Set that up before any
254  * corruption reporting might happen.
255  */
256  ctx.attnum = -1;
257 
258  /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
259  old_context = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
260  random_access = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
261  ctx.tupdesc = verify_heapam_tupdesc();
262  ctx.tupstore = tuplestore_begin_heap(random_access, false, work_mem);
263  rsinfo->returnMode = SFRM_Materialize;
264  rsinfo->setResult = ctx.tupstore;
265  rsinfo->setDesc = ctx.tupdesc;
266  MemoryContextSwitchTo(old_context);
267 
268  /* Open relation, check relkind and access method, and check privileges */
269  ctx.rel = relation_open(relid, AccessShareLock);
270  sanity_check_relation(ctx.rel);
271 
272  /* Early exit if the relation is empty */
273  nblocks = RelationGetNumberOfBlocks(ctx.rel);
274  if (!nblocks)
275  {
277  PG_RETURN_NULL();
278  }
279 
280  ctx.bstrategy = GetAccessStrategy(BAS_BULKREAD);
281  ctx.buffer = InvalidBuffer;
282  ctx.page = NULL;
283 
284  /* Validate block numbers, or handle nulls. */
285  if (PG_ARGISNULL(4))
286  first_block = 0;
287  else
288  {
289  int64 fb = PG_GETARG_INT64(4);
290 
291  if (fb < 0 || fb >= nblocks)
292  ereport(ERROR,
293  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
294  errmsg("starting block number must be between 0 and %u",
295  nblocks - 1)));
296  first_block = (BlockNumber) fb;
297  }
298  if (PG_ARGISNULL(5))
299  last_block = nblocks - 1;
300  else
301  {
302  int64 lb = PG_GETARG_INT64(5);
303 
304  if (lb < 0 || lb >= nblocks)
305  ereport(ERROR,
306  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
307  errmsg("ending block number must be between 0 and %u",
308  nblocks - 1)));
309  last_block = (BlockNumber) lb;
310  }
311 
312  /* Optionally open the toast relation, if any. */
313  if (ctx.rel->rd_rel->reltoastrelid && check_toast)
314  {
315  int offset;
316 
317  /* Main relation has associated toast relation */
318  ctx.toast_rel = table_open(ctx.rel->rd_rel->reltoastrelid,
320  offset = toast_open_indexes(ctx.toast_rel,
322  &(ctx.toast_indexes),
323  &(ctx.num_toast_indexes));
324  ctx.valid_toast_index = ctx.toast_indexes[offset];
325  }
326  else
327  {
328  /*
329  * Main relation has no associated toast relation, or we're
330  * intentionally skipping it.
331  */
332  ctx.toast_rel = NULL;
333  ctx.toast_indexes = NULL;
334  ctx.num_toast_indexes = 0;
335  }
336 
339  ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid;
340  ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx);
341  ctx.relminmxid = ctx.rel->rd_rel->relminmxid;
342 
343  if (TransactionIdIsNormal(ctx.relfrozenxid))
344  ctx.oldest_xid = ctx.relfrozenxid;
345 
346  for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++)
347  {
348  OffsetNumber maxoff;
349 
350  /* Optionally skip over all-frozen or all-visible blocks */
351  if (skip_option != SKIP_PAGES_NONE)
352  {
353  int32 mapbits;
354 
355  mapbits = (int32) visibilitymap_get_status(ctx.rel, ctx.blkno,
356  &vmbuffer);
357  if (skip_option == SKIP_PAGES_ALL_FROZEN)
358  {
359  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
360  continue;
361  }
362 
363  if (skip_option == SKIP_PAGES_ALL_VISIBLE)
364  {
365  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
366  continue;
367  }
368  }
369 
370  /* Read and lock the next page. */
371  ctx.buffer = ReadBufferExtended(ctx.rel, MAIN_FORKNUM, ctx.blkno,
372  RBM_NORMAL, ctx.bstrategy);
373  LockBuffer(ctx.buffer, BUFFER_LOCK_SHARE);
374  ctx.page = BufferGetPage(ctx.buffer);
375 
376  /* Perform tuple checks */
377  maxoff = PageGetMaxOffsetNumber(ctx.page);
378  for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
379  ctx.offnum = OffsetNumberNext(ctx.offnum))
380  {
381  ctx.itemid = PageGetItemId(ctx.page, ctx.offnum);
382 
383  /* Skip over unused/dead line pointers */
384  if (!ItemIdIsUsed(ctx.itemid) || ItemIdIsDead(ctx.itemid))
385  continue;
386 
387  /*
388  * If this line pointer has been redirected, check that it
389  * redirects to a valid offset within the line pointer array
390  */
391  if (ItemIdIsRedirected(ctx.itemid))
392  {
393  OffsetNumber rdoffnum = ItemIdGetRedirect(ctx.itemid);
394  ItemId rditem;
395 
396  if (rdoffnum < FirstOffsetNumber)
397  {
398  report_corruption(&ctx,
399  psprintf("line pointer redirection to item at offset %u precedes minimum offset %u",
400  (unsigned) rdoffnum,
401  (unsigned) FirstOffsetNumber));
402  continue;
403  }
404  if (rdoffnum > maxoff)
405  {
406  report_corruption(&ctx,
407  psprintf("line pointer redirection to item at offset %u exceeds maximum offset %u",
408  (unsigned) rdoffnum,
409  (unsigned) maxoff));
410  continue;
411  }
412  rditem = PageGetItemId(ctx.page, rdoffnum);
413  if (!ItemIdIsUsed(rditem))
414  report_corruption(&ctx,
415  psprintf("line pointer redirection to unused item at offset %u",
416  (unsigned) rdoffnum));
417  continue;
418  }
419 
420  /* Sanity-check the line pointer's offset and length values */
421  ctx.lp_len = ItemIdGetLength(ctx.itemid);
422  ctx.lp_off = ItemIdGetOffset(ctx.itemid);
423 
424  if (ctx.lp_off != MAXALIGN(ctx.lp_off))
425  {
426  report_corruption(&ctx,
427  psprintf("line pointer to page offset %u is not maximally aligned",
428  ctx.lp_off));
429  continue;
430  }
431  if (ctx.lp_len < MAXALIGN(SizeofHeapTupleHeader))
432  {
433  report_corruption(&ctx,
434  psprintf("line pointer length %u is less than the minimum tuple header size %u",
435  ctx.lp_len,
436  (unsigned) MAXALIGN(SizeofHeapTupleHeader)));
437  continue;
438  }
439  if (ctx.lp_off + ctx.lp_len > BLCKSZ)
440  {
441  report_corruption(&ctx,
442  psprintf("line pointer to page offset %u with length %u ends beyond maximum page offset %u",
443  ctx.lp_off,
444  ctx.lp_len,
445  (unsigned) BLCKSZ));
446  continue;
447  }
448 
449  /* It should be safe to examine the tuple's header, at least */
450  ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
451  ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
452 
453  /* Ok, ready to check this next tuple */
454  check_tuple(&ctx);
455  }
456 
457  /* clean up */
458  UnlockReleaseBuffer(ctx.buffer);
459 
460  if (on_error_stop && ctx.is_corrupt)
461  break;
462  }
463 
464  if (vmbuffer != InvalidBuffer)
465  ReleaseBuffer(vmbuffer);
466 
467  /* Close the associated toast table and indexes, if any. */
468  if (ctx.toast_indexes)
469  toast_close_indexes(ctx.toast_indexes, ctx.num_toast_indexes,
471  if (ctx.toast_rel)
472  table_close(ctx.toast_rel, AccessShareLock);
473 
474  /* Close the main relation */
476 
477  PG_RETURN_NULL();
478 }
479 
480 /*
481  * Check that a relation's relkind and access method are both supported,
482  * and that the caller has select privilege on the relation.
483  */
484 static void
486 {
487  if (rel->rd_rel->relkind != RELKIND_RELATION &&
488  rel->rd_rel->relkind != RELKIND_MATVIEW &&
489  rel->rd_rel->relkind != RELKIND_TOASTVALUE)
490  ereport(ERROR,
491  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
492  errmsg("\"%s\" is not a table, materialized view, or TOAST table",
493  RelationGetRelationName(rel))));
494  if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
495  ereport(ERROR,
496  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
497  errmsg("only heap AM is supported")));
498 }
499 
500 /*
501  * Record a single corruption found in the table. The values in ctx should
502  * reflect the location of the corruption, and the msg argument should contain
503  * a human-readable description of the corruption.
504  *
505  * The msg argument is pfree'd by this function.
506  */
507 static void
509 {
511  bool nulls[HEAPCHECK_RELATION_COLS];
512  HeapTuple tuple;
513 
514  MemSet(values, 0, sizeof(values));
515  MemSet(nulls, 0, sizeof(nulls));
516  values[0] = Int64GetDatum(ctx->blkno);
517  values[1] = Int32GetDatum(ctx->offnum);
518  values[2] = Int32GetDatum(ctx->attnum);
519  nulls[2] = (ctx->attnum < 0);
520  values[3] = CStringGetTextDatum(msg);
521 
522  /*
523  * In principle, there is nothing to prevent a scan over a large, highly
524  * corrupted table from using work_mem worth of memory building up the
525  * tuplestore. That's ok, but if we also leak the msg argument memory
526  * until the end of the query, we could exceed work_mem by more than a
527  * trivial amount. Therefore, free the msg argument each time we are
528  * called rather than waiting for our current memory context to be freed.
529  */
530  pfree(msg);
531 
532  tuple = heap_form_tuple(ctx->tupdesc, values, nulls);
533  tuplestore_puttuple(ctx->tupstore, tuple);
534  ctx->is_corrupt = true;
535 }
536 
537 /*
538  * Construct the TupleDesc used to report messages about corruptions found
539  * while scanning the heap.
540  */
541 static TupleDesc
543 {
545  AttrNumber a = 0;
546 
548  TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
549  TupleDescInitEntry(tupdesc, ++a, "offnum", INT4OID, -1, 0);
550  TupleDescInitEntry(tupdesc, ++a, "attnum", INT4OID, -1, 0);
551  TupleDescInitEntry(tupdesc, ++a, "msg", TEXTOID, -1, 0);
553 
554  return BlessTupleDesc(tupdesc);
555 }
556 
557 /*
558  * Check for tuple header corruption and tuple visibility.
559  *
560  * Since we do not hold a snapshot, tuple visibility is not a question of
561  * whether we should be able to see the tuple relative to any particular
562  * snapshot, but rather a question of whether it is safe and reasonable to
563  * check the tuple attributes.
564  *
565  * Some kinds of corruption make it unsafe to check the tuple attributes, for
566  * example when the line pointer refers to a range of bytes outside the page.
567  * In such cases, we return false (not visible) after recording appropriate
568  * corruption messages.
569  *
570  * Some other kinds of tuple header corruption confuse the question of where
571  * the tuple attributes begin, or how long the nulls bitmap is, etc., making it
572  * unreasonable to attempt to check attributes, even if all candidate answers
573  * to those questions would not result in reading past the end of the line
574  * pointer or page. In such cases, like above, we record corruption messages
575  * about the header and then return false.
576  *
577  * Other kinds of tuple header corruption do not bear on the question of
578  * whether the tuple attributes can be checked, so we record corruption
579  * messages for them but do not base our visibility determination on them. (In
580  * other words, we do not return false merely because we detected them.)
581  *
582  * For visibility determination not specifically related to corruption, what we
583  * want to know is if a tuple is potentially visible to any running
584  * transaction. If you are tempted to replace this function's visibility logic
585  * with a call to another visibility checking function, keep in mind that this
586  * function does not update hint bits, as it seems imprudent to write hint bits
587  * (or anything at all) to a table during a corruption check. Nor does this
588  * function bother classifying tuple visibility beyond a boolean visible vs.
589  * not visible.
590  *
591  * The caller should already have checked that xmin and xmax are not out of
592  * bounds for the relation.
593  *
594  * Returns whether the tuple is both visible and sufficiently sensible to
595  * undergo attribute checks.
596  */
597 static bool
599 {
600  uint16 infomask = tuphdr->t_infomask;
601  bool header_garbled = false;
602  unsigned expected_hoff;
603 
604  if (ctx->tuphdr->t_hoff > ctx->lp_len)
605  {
606  report_corruption(ctx,
607  psprintf("data begins at offset %u beyond the tuple length %u",
608  ctx->tuphdr->t_hoff, ctx->lp_len));
609  header_garbled = true;
610  }
611  if ((ctx->tuphdr->t_infomask & HEAP_XMAX_LOCK_ONLY) &&
613  {
614  report_corruption(ctx,
615  pstrdup("tuple is marked as only locked, but also claims key columns were updated"));
616  header_garbled = true;
617  }
618 
619  if ((ctx->tuphdr->t_infomask & HEAP_XMAX_COMMITTED) &&
621  {
622  report_corruption(ctx,
623  pstrdup("multixact should not be marked committed"));
624 
625  /*
626  * This condition is clearly wrong, but we do not consider the header
627  * garbled, because we don't rely on this property for determining if
628  * the tuple is visible or for interpreting other relevant header
629  * fields.
630  */
631  }
632 
633  if (infomask & HEAP_HASNULL)
634  expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts));
635  else
636  expected_hoff = MAXALIGN(SizeofHeapTupleHeader);
637  if (ctx->tuphdr->t_hoff != expected_hoff)
638  {
639  if ((infomask & HEAP_HASNULL) && ctx->natts == 1)
640  report_corruption(ctx,
641  psprintf("tuple data should begin at byte %u, but actually begins at byte %u (1 attribute, has nulls)",
642  expected_hoff, ctx->tuphdr->t_hoff));
643  else if ((infomask & HEAP_HASNULL))
644  report_corruption(ctx,
645  psprintf("tuple data should begin at byte %u, but actually begins at byte %u (%u attributes, has nulls)",
646  expected_hoff, ctx->tuphdr->t_hoff, ctx->natts));
647  else if (ctx->natts == 1)
648  report_corruption(ctx,
649  psprintf("tuple data should begin at byte %u, but actually begins at byte %u (1 attribute, no nulls)",
650  expected_hoff, ctx->tuphdr->t_hoff));
651  else
652  report_corruption(ctx,
653  psprintf("tuple data should begin at byte %u, but actually begins at byte %u (%u attributes, no nulls)",
654  expected_hoff, ctx->tuphdr->t_hoff, ctx->natts));
655  header_garbled = true;
656  }
657 
658  if (header_garbled)
659  return false; /* checking of this tuple should not continue */
660 
661  /*
662  * Ok, we can examine the header for tuple visibility purposes, though we
663  * still need to be careful about a few remaining types of header
664  * corruption. This logic roughly follows that of
665  * HeapTupleSatisfiesVacuum. Where possible the comments indicate which
666  * HTSV_Result we think that function might return for this tuple.
667  */
668  if (!HeapTupleHeaderXminCommitted(tuphdr))
669  {
670  TransactionId raw_xmin = HeapTupleHeaderGetRawXmin(tuphdr);
671 
672  if (HeapTupleHeaderXminInvalid(tuphdr))
673  return false; /* HEAPTUPLE_DEAD */
674  /* Used by pre-9.0 binary upgrades */
675  else if (infomask & HEAP_MOVED_OFF ||
676  infomask & HEAP_MOVED_IN)
677  {
679  TransactionId xvac = HeapTupleHeaderGetXvac(tuphdr);
680 
681  switch (get_xid_status(xvac, ctx, &status))
682  {
683  case XID_INVALID:
684  report_corruption(ctx,
685  pstrdup("old-style VACUUM FULL transaction ID is invalid"));
686  return false; /* corrupt */
687  case XID_IN_FUTURE:
688  report_corruption(ctx,
689  psprintf("old-style VACUUM FULL transaction ID %u equals or exceeds next valid transaction ID %u:%u",
690  xvac,
693  return false; /* corrupt */
694  case XID_PRECEDES_RELMIN:
695  report_corruption(ctx,
696  psprintf("old-style VACUUM FULL transaction ID %u precedes relation freeze threshold %u:%u",
697  xvac,
700  return false; /* corrupt */
701  break;
703  report_corruption(ctx,
704  psprintf("old-style VACUUM FULL transaction ID %u precedes oldest valid transaction ID %u:%u",
705  xvac,
708  return false; /* corrupt */
709  break;
710  case XID_BOUNDS_OK:
711  switch (status)
712  {
713  case XID_IN_PROGRESS:
714  return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */
715  case XID_COMMITTED:
716  case XID_ABORTED:
717  return false; /* HEAPTUPLE_DEAD */
718  }
719  }
720  }
721  else
722  {
724 
725  switch (get_xid_status(raw_xmin, ctx, &status))
726  {
727  case XID_INVALID:
728  report_corruption(ctx,
729  pstrdup("raw xmin is invalid"));
730  return false;
731  case XID_IN_FUTURE:
732  report_corruption(ctx,
733  psprintf("raw xmin %u equals or exceeds next valid transaction ID %u:%u",
734  raw_xmin,
737  return false; /* corrupt */
738  case XID_PRECEDES_RELMIN:
739  report_corruption(ctx,
740  psprintf("raw xmin %u precedes relation freeze threshold %u:%u",
741  raw_xmin,
744  return false; /* corrupt */
746  report_corruption(ctx,
747  psprintf("raw xmin %u precedes oldest valid transaction ID %u:%u",
748  raw_xmin,
751  return false; /* corrupt */
752  case XID_BOUNDS_OK:
753  switch (status)
754  {
755  case XID_COMMITTED:
756  break;
757  case XID_IN_PROGRESS:
758  return true; /* insert or delete in progress */
759  case XID_ABORTED:
760  return false; /* HEAPTUPLE_DEAD */
761  }
762  }
763  }
764  }
765 
766  if (!(infomask & HEAP_XMAX_INVALID) && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
767  {
768  if (infomask & HEAP_XMAX_IS_MULTI)
769  {
771  TransactionId xmax = HeapTupleGetUpdateXid(tuphdr);
772 
773  switch (get_xid_status(xmax, ctx, &status))
774  {
775  /* not LOCKED_ONLY, so it has to have an xmax */
776  case XID_INVALID:
777  report_corruption(ctx,
778  pstrdup("xmax is invalid"));
779  return false; /* corrupt */
780  case XID_IN_FUTURE:
781  report_corruption(ctx,
782  psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u",
783  xmax,
786  return false; /* corrupt */
787  case XID_PRECEDES_RELMIN:
788  report_corruption(ctx,
789  psprintf("xmax %u precedes relation freeze threshold %u:%u",
790  xmax,
793  return false; /* corrupt */
795  report_corruption(ctx,
796  psprintf("xmax %u precedes oldest valid transaction ID %u:%u",
797  xmax,
800  return false; /* corrupt */
801  case XID_BOUNDS_OK:
802  switch (status)
803  {
804  case XID_IN_PROGRESS:
805  return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */
806  case XID_COMMITTED:
807  case XID_ABORTED:
808  return false; /* HEAPTUPLE_RECENTLY_DEAD or
809  * HEAPTUPLE_DEAD */
810  }
811  }
812 
813  /* Ok, the tuple is live */
814  }
815  else if (!(infomask & HEAP_XMAX_COMMITTED))
816  return true; /* HEAPTUPLE_DELETE_IN_PROGRESS or
817  * HEAPTUPLE_LIVE */
818  else
819  return false; /* HEAPTUPLE_RECENTLY_DEAD or HEAPTUPLE_DEAD */
820  }
821  return true; /* not dead */
822 }
823 
824 /*
825  * Check the current toast tuple against the state tracked in ctx, recording
826  * any corruption found in ctx->tupstore.
827  *
828  * This is not equivalent to running verify_heapam on the toast table itself,
829  * and is not hardened against corruption of the toast table. Rather, when
830  * validating a toasted attribute in the main table, the sequence of toast
831  * tuples that store the toasted value are retrieved and checked in order, with
832  * each toast tuple being checked against where we are in the sequence, as well
833  * as each toast tuple having its varlena structure sanity checked.
834  */
835 static void
837 {
838  int32 curchunk;
839  Pointer chunk;
840  bool isnull;
841  int32 chunksize;
842  int32 expected_size;
843 
844  /*
845  * Have a chunk, extract the sequence number and the data
846  */
847  curchunk = DatumGetInt32(fastgetattr(toasttup, 2,
848  ctx->toast_rel->rd_att, &isnull));
849  if (isnull)
850  {
851  report_corruption(ctx,
852  pstrdup("toast chunk sequence number is null"));
853  return;
854  }
855  chunk = DatumGetPointer(fastgetattr(toasttup, 3,
856  ctx->toast_rel->rd_att, &isnull));
857  if (isnull)
858  {
859  report_corruption(ctx,
860  pstrdup("toast chunk data is null"));
861  return;
862  }
863  if (!VARATT_IS_EXTENDED(chunk))
864  chunksize = VARSIZE(chunk) - VARHDRSZ;
865  else if (VARATT_IS_SHORT(chunk))
866  {
867  /*
868  * could happen due to heap_form_tuple doing its thing
869  */
870  chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
871  }
872  else
873  {
874  /* should never happen */
875  uint32 header = ((varattrib_4b *) chunk)->va_4byte.va_header;
876 
877  report_corruption(ctx,
878  psprintf("corrupt extended toast chunk has invalid varlena header: %0x (sequence number %d)",
879  header, curchunk));
880  return;
881  }
882 
883  /*
884  * Some checks on the data we've found
885  */
886  if (curchunk != ctx->chunkno)
887  {
888  report_corruption(ctx,
889  psprintf("toast chunk sequence number %u does not match the expected sequence number %u",
890  curchunk, ctx->chunkno));
891  return;
892  }
893  if (curchunk > ctx->endchunk)
894  {
895  report_corruption(ctx,
896  psprintf("toast chunk sequence number %u exceeds the end chunk sequence number %u",
897  curchunk, ctx->endchunk));
898  return;
899  }
900 
901  expected_size = curchunk < ctx->totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE
902  : ctx->attrsize - ((ctx->totalchunks - 1) * TOAST_MAX_CHUNK_SIZE);
903  if (chunksize != expected_size)
904  {
905  report_corruption(ctx,
906  psprintf("toast chunk size %u differs from the expected size %u",
907  chunksize, expected_size));
908  return;
909  }
910 }
911 
912 /*
913  * Check the current attribute as tracked in ctx, recording any corruption
914  * found in ctx->tupstore.
915  *
916  * This function follows the logic performed by heap_deform_tuple(), and in the
917  * case of a toasted value, optionally continues along the logic of
918  * detoast_external_attr(), checking for any conditions that would result in
919  * either of those functions Asserting or crashing the backend. The checks
920  * performed by Asserts present in those two functions are also performed here.
921  * In cases where those two functions are a bit cavalier in their assumptions
922  * about data being correct, we perform additional checks not present in either
923  * of those two functions. Where some condition is checked in both of those
924  * functions, we perform it here twice, as we parallel the logical flow of
925  * those two functions. The presence of duplicate checks seems a reasonable
926  * price to pay for keeping this code tightly coupled with the code it
927  * protects.
928  *
929  * Returns true if the tuple attribute is sane enough for processing to
930  * continue on to the next attribute, false otherwise.
931  */
932 static bool
934 {
935  struct varatt_external toast_pointer;
936  ScanKeyData toastkey;
937  SysScanDesc toastscan;
938  SnapshotData SnapshotToast;
939  HeapTuple toasttup;
940  bool found_toasttup;
941  Datum attdatum;
942  struct varlena *attr;
943  char *tp; /* pointer to the tuple data */
944  uint16 infomask;
945  Form_pg_attribute thisatt;
946 
947  infomask = ctx->tuphdr->t_infomask;
948  thisatt = TupleDescAttr(RelationGetDescr(ctx->rel), ctx->attnum);
949 
950  tp = (char *) ctx->tuphdr + ctx->tuphdr->t_hoff;
951 
952  if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
953  {
954  report_corruption(ctx,
955  psprintf("attribute %u with length %u starts at offset %u beyond total tuple length %u",
956  ctx->attnum,
957  thisatt->attlen,
958  ctx->tuphdr->t_hoff + ctx->offset,
959  ctx->lp_len));
960  return false;
961  }
962 
963  /* Skip null values */
964  if (infomask & HEAP_HASNULL && att_isnull(ctx->attnum, ctx->tuphdr->t_bits))
965  return true;
966 
967  /* Skip non-varlena values, but update offset first */
968  if (thisatt->attlen != -1)
969  {
970  ctx->offset = att_align_nominal(ctx->offset, thisatt->attalign);
971  ctx->offset = att_addlength_pointer(ctx->offset, thisatt->attlen,
972  tp + ctx->offset);
973  if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
974  {
975  report_corruption(ctx,
976  psprintf("attribute %u with length %u ends at offset %u beyond total tuple length %u",
977  ctx->attnum,
978  thisatt->attlen,
979  ctx->tuphdr->t_hoff + ctx->offset,
980  ctx->lp_len));
981  return false;
982  }
983  return true;
984  }
985 
986  /* Ok, we're looking at a varlena attribute. */
987  ctx->offset = att_align_pointer(ctx->offset, thisatt->attalign, -1,
988  tp + ctx->offset);
989 
990  /* Get the (possibly corrupt) varlena datum */
991  attdatum = fetchatt(thisatt, tp + ctx->offset);
992 
993  /*
994  * We have the datum, but we cannot decode it carelessly, as it may still
995  * be corrupt.
996  */
997 
998  /*
999  * Check that VARTAG_SIZE won't hit a TrapMacro on a corrupt va_tag before
1000  * risking a call into att_addlength_pointer
1001  */
1002  if (VARATT_IS_EXTERNAL(tp + ctx->offset))
1003  {
1004  uint8 va_tag = VARTAG_EXTERNAL(tp + ctx->offset);
1005 
1006  if (va_tag != VARTAG_ONDISK)
1007  {
1008  report_corruption(ctx,
1009  psprintf("toasted attribute %u has unexpected TOAST tag %u",
1010  ctx->attnum,
1011  va_tag));
1012  /* We can't know where the next attribute begins */
1013  return false;
1014  }
1015  }
1016 
1017  /* Ok, should be safe now */
1018  ctx->offset = att_addlength_pointer(ctx->offset, thisatt->attlen,
1019  tp + ctx->offset);
1020 
1021  if (ctx->tuphdr->t_hoff + ctx->offset > ctx->lp_len)
1022  {
1023  report_corruption(ctx,
1024  psprintf("attribute %u with length %u ends at offset %u beyond total tuple length %u",
1025  ctx->attnum,
1026  thisatt->attlen,
1027  ctx->tuphdr->t_hoff + ctx->offset,
1028  ctx->lp_len));
1029 
1030  return false;
1031  }
1032 
1033  /*
1034  * heap_deform_tuple would be done with this attribute at this point,
1035  * having stored it in values[], and would continue to the next attribute.
1036  * We go further, because we need to check if the toast datum is corrupt.
1037  */
1038 
1039  attr = (struct varlena *) DatumGetPointer(attdatum);
1040 
1041  /*
1042  * Now we follow the logic of detoast_external_attr(), with the same
1043  * caveats about being paranoid about corruption.
1044  */
1045 
1046  /* Skip values that are not external */
1047  if (!VARATT_IS_EXTERNAL(attr))
1048  return true;
1049 
1050  /* It is external, and we're looking at a page on disk */
1051 
1052  /* The tuple header better claim to contain toasted values */
1053  if (!(infomask & HEAP_HASEXTERNAL))
1054  {
1055  report_corruption(ctx,
1056  psprintf("attribute %u is external but tuple header flag HEAP_HASEXTERNAL not set",
1057  ctx->attnum));
1058  return true;
1059  }
1060 
1061  /* The relation better have a toast table */
1062  if (!ctx->rel->rd_rel->reltoastrelid)
1063  {
1064  report_corruption(ctx,
1065  psprintf("attribute %u is external but relation has no toast relation",
1066  ctx->attnum));
1067  return true;
1068  }
1069 
1070  /* If we were told to skip toast checking, then we're done. */
1071  if (ctx->toast_rel == NULL)
1072  return true;
1073 
1074  /*
1075  * Must copy attr into toast_pointer for alignment considerations
1076  */
1077  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1078 
1079  ctx->attrsize = toast_pointer.va_extsize;
1080  ctx->endchunk = (ctx->attrsize - 1) / TOAST_MAX_CHUNK_SIZE;
1081  ctx->totalchunks = ctx->endchunk + 1;
1082 
1083  /*
1084  * Setup a scan key to find chunks in toast table with matching va_valueid
1085  */
1086  ScanKeyInit(&toastkey,
1087  (AttrNumber) 1,
1088  BTEqualStrategyNumber, F_OIDEQ,
1089  ObjectIdGetDatum(toast_pointer.va_valueid));
1090 
1091  /*
1092  * Check if any chunks for this toasted object exist in the toast table,
1093  * accessible via the index.
1094  */
1095  init_toast_snapshot(&SnapshotToast);
1096  toastscan = systable_beginscan_ordered(ctx->toast_rel,
1097  ctx->valid_toast_index,
1098  &SnapshotToast, 1,
1099  &toastkey);
1100  ctx->chunkno = 0;
1101  found_toasttup = false;
1102  while ((toasttup =
1103  systable_getnext_ordered(toastscan,
1104  ForwardScanDirection)) != NULL)
1105  {
1106  found_toasttup = true;
1107  check_toast_tuple(toasttup, ctx);
1108  ctx->chunkno++;
1109  }
1110  if (ctx->chunkno != (ctx->endchunk + 1))
1111  report_corruption(ctx,
1112  psprintf("final toast chunk number %u differs from expected value %u",
1113  ctx->chunkno, (ctx->endchunk + 1)));
1114  if (!found_toasttup)
1115  report_corruption(ctx,
1116  psprintf("toasted value for attribute %u missing from toast table",
1117  ctx->attnum));
1118  systable_endscan_ordered(toastscan);
1119 
1120  return true;
1121 }
1122 
1123 /*
1124  * Check the current tuple as tracked in ctx, recording any corruption found in
1125  * ctx->tupstore.
1126  */
1127 static void
1129 {
1130  TransactionId xmin;
1131  TransactionId xmax;
1132  bool fatal = false;
1133  uint16 infomask = ctx->tuphdr->t_infomask;
1134 
1135  /* If xmin is normal, it should be within valid range */
1136  xmin = HeapTupleHeaderGetXmin(ctx->tuphdr);
1137  switch (get_xid_status(xmin, ctx, NULL))
1138  {
1139  case XID_INVALID:
1140  case XID_BOUNDS_OK:
1141  break;
1142  case XID_IN_FUTURE:
1143  report_corruption(ctx,
1144  psprintf("xmin %u equals or exceeds next valid transaction ID %u:%u",
1145  xmin,
1148  fatal = true;
1149  break;
1151  report_corruption(ctx,
1152  psprintf("xmin %u precedes oldest valid transaction ID %u:%u",
1153  xmin,
1156  fatal = true;
1157  break;
1158  case XID_PRECEDES_RELMIN:
1159  report_corruption(ctx,
1160  psprintf("xmin %u precedes relation freeze threshold %u:%u",
1161  xmin,
1164  fatal = true;
1165  break;
1166  }
1167 
1168  xmax = HeapTupleHeaderGetRawXmax(ctx->tuphdr);
1169 
1170  if (infomask & HEAP_XMAX_IS_MULTI)
1171  {
1172  /* xmax is a multixact, so it should be within valid MXID range */
1173  switch (check_mxid_valid_in_rel(xmax, ctx))
1174  {
1175  case XID_INVALID:
1176  report_corruption(ctx,
1177  pstrdup("multitransaction ID is invalid"));
1178  fatal = true;
1179  break;
1180  case XID_PRECEDES_RELMIN:
1181  report_corruption(ctx,
1182  psprintf("multitransaction ID %u precedes relation minimum multitransaction ID threshold %u",
1183  xmax, ctx->relminmxid));
1184  fatal = true;
1185  break;
1187  report_corruption(ctx,
1188  psprintf("multitransaction ID %u precedes oldest valid multitransaction ID threshold %u",
1189  xmax, ctx->oldest_mxact));
1190  fatal = true;
1191  break;
1192  case XID_IN_FUTURE:
1193  report_corruption(ctx,
1194  psprintf("multitransaction ID %u equals or exceeds next valid multitransaction ID %u",
1195  xmax,
1196  ctx->next_mxact));
1197  fatal = true;
1198  break;
1199  case XID_BOUNDS_OK:
1200  break;
1201  }
1202  }
1203  else
1204  {
1205  /*
1206  * xmax is not a multixact and is normal, so it should be within the
1207  * valid XID range.
1208  */
1209  switch (get_xid_status(xmax, ctx, NULL))
1210  {
1211  case XID_INVALID:
1212  case XID_BOUNDS_OK:
1213  break;
1214  case XID_IN_FUTURE:
1215  report_corruption(ctx,
1216  psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u",
1217  xmax,
1220  fatal = true;
1221  break;
1223  report_corruption(ctx,
1224  psprintf("xmax %u precedes oldest valid transaction ID %u:%u",
1225  xmax,
1228  fatal = true;
1229  break;
1230  case XID_PRECEDES_RELMIN:
1231  report_corruption(ctx,
1232  psprintf("xmax %u precedes relation freeze threshold %u:%u",
1233  xmax,
1236  fatal = true;
1237  }
1238  }
1239 
1240  /*
1241  * Cannot process tuple data if tuple header was corrupt, as the offsets
1242  * within the page cannot be trusted, leaving too much risk of reading
1243  * garbage if we continue.
1244  *
1245  * We also cannot process the tuple if the xmin or xmax were invalid
1246  * relative to relfrozenxid or relminmxid, as clog entries for the xids
1247  * may already be gone.
1248  */
1249  if (fatal)
1250  return;
1251 
1252  /*
1253  * Check various forms of tuple header corruption. If the header is too
1254  * corrupt to continue checking, or if the tuple is not visible to anyone,
1255  * we cannot continue with other checks.
1256  */
1257  if (!check_tuple_header_and_visibilty(ctx->tuphdr, ctx))
1258  return;
1259 
1260  /*
1261  * The tuple is visible, so it must be compatible with the current version
1262  * of the relation descriptor. It might have fewer columns than are
1263  * present in the relation descriptor, but it cannot have more.
1264  */
1265  if (RelationGetDescr(ctx->rel)->natts < ctx->natts)
1266  {
1267  report_corruption(ctx,
1268  psprintf("number of attributes %u exceeds maximum expected for table %u",
1269  ctx->natts,
1270  RelationGetDescr(ctx->rel)->natts));
1271  return;
1272  }
1273 
1274  /*
1275  * Check each attribute unless we hit corruption that confuses what to do
1276  * next, at which point we abort further attribute checks for this tuple.
1277  * Note that we don't abort for all types of corruption, only for those
1278  * types where we don't know how to continue.
1279  */
1280  ctx->offset = 0;
1281  for (ctx->attnum = 0; ctx->attnum < ctx->natts; ctx->attnum++)
1282  if (!check_tuple_attribute(ctx))
1283  break; /* cannot continue */
1284 
1285  /* revert attnum to -1 until we again examine individual attributes */
1286  ctx->attnum = -1;
1287 }
1288 
1289 /*
1290  * Convert a TransactionId into a FullTransactionId using our cached values of
1291  * the valid transaction ID range. It is the caller's responsibility to have
1292  * already updated the cached values, if necessary.
1293  */
1294 static FullTransactionId
1296 {
1297  uint32 epoch;
1298 
1299  if (!TransactionIdIsNormal(xid))
1300  return FullTransactionIdFromEpochAndXid(0, xid);
1301  epoch = EpochFromFullTransactionId(ctx->next_fxid);
1302  if (xid > ctx->next_xid)
1303  epoch--;
1304  return FullTransactionIdFromEpochAndXid(epoch, xid);
1305 }
1306 
1307 /*
1308  * Update our cached range of valid transaction IDs.
1309  */
1310 static void
1312 {
1313  /* Make cached copies */
1314  LWLockAcquire(XidGenLock, LW_SHARED);
1317  LWLockRelease(XidGenLock);
1318 
1319  /* And compute alternate versions of the same */
1322 }
1323 
1324 /*
1325  * Update our cached range of valid multitransaction IDs.
1326  */
1327 static void
1329 {
1331 }
1332 
1333 /*
1334  * Return whether the given FullTransactionId is within our cached valid
1335  * transaction ID range.
1336  */
1337 static inline bool
1339 {
1340  return (FullTransactionIdPrecedesOrEquals(ctx->oldest_fxid, fxid) &&
1341  FullTransactionIdPrecedes(fxid, ctx->next_fxid));
1342 }
1343 
1344 /*
1345  * Checks whether a multitransaction ID is in the cached valid range, returning
1346  * the nature of the range violation, if any.
1347  */
1348 static XidBoundsViolation
1350 {
1351  if (!TransactionIdIsValid(mxid))
1352  return XID_INVALID;
1353  if (MultiXactIdPrecedes(mxid, ctx->relminmxid))
1354  return XID_PRECEDES_RELMIN;
1355  if (MultiXactIdPrecedes(mxid, ctx->oldest_mxact))
1356  return XID_PRECEDES_CLUSTERMIN;
1357  if (MultiXactIdPrecedesOrEquals(ctx->next_mxact, mxid))
1358  return XID_IN_FUTURE;
1359  return XID_BOUNDS_OK;
1360 }
1361 
1362 /*
1363  * Checks whether the given mxid is valid to appear in the heap being checked,
1364  * returning the nature of the range violation, if any.
1365  *
1366  * This function attempts to return quickly by caching the known valid mxid
1367  * range in ctx. Callers should already have performed the initial setup of
1368  * the cache prior to the first call to this function.
1369  */
1370 static XidBoundsViolation
1372 {
1373  XidBoundsViolation result;
1374 
1375  result = check_mxid_in_range(mxid, ctx);
1376  if (result == XID_BOUNDS_OK)
1377  return XID_BOUNDS_OK;
1378 
1379  /* The range may have advanced. Recheck. */
1381  return check_mxid_in_range(mxid, ctx);
1382 }
1383 
1384 /*
1385  * Checks whether the given transaction ID is (or was recently) valid to appear
1386  * in the heap being checked, or whether it is too old or too new to appear in
1387  * the relation, returning information about the nature of the bounds violation.
1388  *
1389  * We cache the range of valid transaction IDs. If xid is in that range, we
1390  * conclude that it is valid, even though concurrent changes to the table might
1391  * invalidate it under certain corrupt conditions. (For example, if the table
1392  * contains corrupt all-frozen bits, a concurrent vacuum might skip the page(s)
1393  * containing the xid and then truncate clog and advance the relfrozenxid
1394  * beyond xid.) Reporting the xid as valid under such conditions seems
1395  * acceptable, since if we had checked it earlier in our scan it would have
1396  * truly been valid at that time.
1397  *
1398  * If the status argument is not NULL, and if and only if the transaction ID
1399  * appears to be valid in this relation, the status argument will be set with
1400  * the commit status of the transaction ID.
1401  */
1402 static XidBoundsViolation
1405 {
1406  FullTransactionId fxid;
1407  FullTransactionId clog_horizon;
1408 
1409  /* Quick check for special xids */
1410  if (!TransactionIdIsValid(xid))
1411  return XID_INVALID;
1412  else if (xid == BootstrapTransactionId || xid == FrozenTransactionId)
1413  {
1414  if (status != NULL)
1415  *status = XID_COMMITTED;
1416  return XID_BOUNDS_OK;
1417  }
1418 
1419  /* Check if the xid is within bounds */
1420  fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
1421  if (!fxid_in_cached_range(fxid, ctx))
1422  {
1423  /*
1424  * We may have been checking against stale values. Update the cached
1425  * range to be sure, and since we relied on the cached range when we
1426  * performed the full xid conversion, reconvert.
1427  */
1429  fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
1430  }
1431 
1433  return XID_IN_FUTURE;
1434  if (FullTransactionIdPrecedes(fxid, ctx->oldest_fxid))
1435  return XID_PRECEDES_CLUSTERMIN;
1436  if (FullTransactionIdPrecedes(fxid, ctx->relfrozenfxid))
1437  return XID_PRECEDES_RELMIN;
1438 
1439  /* Early return if the caller does not request clog checking */
1440  if (status == NULL)
1441  return XID_BOUNDS_OK;
1442 
1443  /* Early return if we just checked this xid in a prior call */
1444  if (xid == ctx->cached_xid)
1445  {
1446  *status = ctx->cached_status;
1447  return XID_BOUNDS_OK;
1448  }
1449 
1450  *status = XID_COMMITTED;
1451  LWLockAcquire(XactTruncationLock, LW_SHARED);
1452  clog_horizon =
1454  ctx);
1455  if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid))
1456  {
1458  *status = XID_IN_PROGRESS;
1459  else if (TransactionIdDidCommit(xid))
1460  *status = XID_COMMITTED;
1461  else if (TransactionIdDidAbort(xid))
1462  *status = XID_ABORTED;
1463  else
1464  *status = XID_IN_PROGRESS;
1465  }
1466  LWLockRelease(XactTruncationLock);
1467  ctx->cached_xid = xid;
1468  ctx->cached_status = *status;
1469  return XID_BOUNDS_OK;
1470 }
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:542
BlockNumber blkno
Definition: verify_heapam.c:99
TransactionId oldest_xid
Definition: verify_heapam.c:72
#define SizeofHeapTupleHeader
Definition: htup_details.h:184
FullTransactionId oldest_fxid
Definition: verify_heapam.c:73
#define IsA(nodeptr, _type_)
Definition: nodes.h:578
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
int errhint(const char *fmt,...)
Definition: elog.c:1149
#define HEAP_XMAX_LOCK_ONLY
Definition: htup_details.h:196
#define fastgetattr(tup, attnum, tupleDesc, isnull)
Definition: htup_details.h:712
#define TOAST_MAX_CHUNK_SIZE
Definition: heaptoast.h:84
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:148
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
HeapTupleHeader tuphdr
uint32 TransactionId
Definition: c.h:575
bits8 t_bits[FLEXIBLE_ARRAY_MEMBER]
Definition: htup_details.h:177
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:44
#define DatumGetInt32(X)
Definition: postgres.h:472
BufferAccessStrategy bstrategy
#define RelationGetDescr(relation)
Definition: rel.h:483
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:869
#define VARHDRSZ_SHORT
Definition: postgres.h:268
#define VARSIZE(PTR)
Definition: postgres.h:303
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition: detoast.h:32
#define att_isnull(ATT, BITS)
Definition: tupmacs.h:25
TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple)
Definition: heapam.c:6540
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
void init_toast_snapshot(Snapshot toast_snapshot)
Relation toast_rel
Definition: verify_heapam.c:93
#define HEAPCHECK_RELATION_COLS
Definition: verify_heapam.c:31
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
#define VARHDRSZ
Definition: c.h:623
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
FullTransactionId next_fxid
Definition: verify_heapam.c:70
MultiXactId oldest_mxact
Definition: verify_heapam.c:80
#define VISIBILITYMAP_ALL_FROZEN
Definition: visibilitymap.h:27
char * pstrdup(const char *in)
Definition: mcxt.c:1187
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
Datum verify_heapam(PG_FUNCTION_ARGS)
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:654
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:112
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
unsigned char uint8
Definition: c.h:427
#define AccessShareLock
Definition: lockdefs.h:36
#define InvalidBuffer
Definition: buf.h:25
TransactionId cached_xid
Definition: verify_heapam.c:85
TransactionId oldestXid
Definition: transam.h:215
int errcode(int sqlerrcode)
Definition: elog.c:691
static void report_corruption(HeapCheckContext *ctx, char *msg)
#define MemSet(start, val, len)
Definition: c.h:1004
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3513
#define BITMAPLEN(NATTS)
Definition: htup_details.h:547
#define HeapTupleHeaderXminInvalid(tup)
Definition: htup_details.h:329
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:206
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
AttrNumber attnum
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:125
HeapTuple systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
Definition: genam.c:681
PG_FUNCTION_INFO_V1(verify_heapam)
Relation valid_toast_index
Definition: verify_heapam.c:95
Form_pg_class rd_rel
Definition: rel.h:110
unsigned int Oid
Definition: postgres_ext.h:31
static int fb(int x)
Definition: preproc-init.c:92
#define VARTAG_EXTERNAL(PTR)
Definition: postgres.h:308
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
FullTransactionId nextXid
Definition: transam.h:213
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
#define fetchatt(A, T)
Definition: tupmacs.h:41
MultiXactId next_mxact
Definition: verify_heapam.c:79
signed int int32
Definition: c.h:417
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define XidFromFullTransactionId(x)
Definition: transam.h:48
uint16 OffsetNumber
Definition: off.h:24
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1811
static void update_cached_xid_range(HeapCheckContext *ctx)
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:313
#define FullTransactionIdPrecedesOrEquals(a, b)
Definition: transam.h:52
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:375
unsigned short uint16
Definition: c.h:428
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
char * Pointer
Definition: c.h:406
#define HEAP_HASNULL
Definition: htup_details.h:189
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3536
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
TransactionId relfrozenxid
Definition: verify_heapam.c:90
static bool check_tuple_attribute(HeapCheckContext *ctx)
#define VARATT_IS_SHORT(PTR)
Definition: postgres.h:326
#define HEAP_XMAX_INVALID
Definition: htup_details.h:207
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:531
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:48
void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple)
Definition: tuplestore.c:730
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2052
#define BootstrapTransactionId
Definition: transam.h:32
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:324
static void sanity_check_relation(Relation rel)
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
static bool check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx)
#define FirstOffsetNumber
Definition: off.h:27
TransactionId oldestClogXid
Definition: transam.h:246
static XidBoundsViolation get_xid_status(TransactionId xid, HeapCheckContext *ctx, XidCommitStatus *status)
VariableCache ShmemVariableCache
Definition: varsup.c:34
void toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
#define InvalidTransactionId
Definition: transam.h:31
#define HeapTupleHeaderGetXvac(tup)
Definition: htup_details.h:415
#define RelationGetRelationName(relation)
Definition: rel.h:491
static XidBoundsViolation check_mxid_in_range(MultiXactId mxid, HeapCheckContext *ctx)
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:193
unsigned int uint32
Definition: c.h:429
#define ItemIdGetOffset(itemId)
Definition: itemid.h:65
#define HEAP_MOVED_IN
Definition: htup_details.h:213
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1701
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:603
static void update_cached_mxid_range(HeapCheckContext *ctx)
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define att_addlength_pointer(cur_offset, attlen, attptr)
Definition: tupmacs.h:176
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:181
static void check_tuple(HeapCheckContext *ctx)
#define VARSIZE_SHORT(PTR)
Definition: postgres.h:305
OffsetNumber offnum
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx)
Tuplestorestate * tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
Definition: tuplestore.c:318
int toast_open_indexes(Relation toastrel, LOCKMODE lock, Relation **toastidxs, int *num_indexes)
uintptr_t Datum
Definition: postgres.h:367
TransactionId next_xid
Definition: verify_heapam.c:71
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:230
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3752
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:278
#define att_align_pointer(cur_offset, attalign, attlen, attptr)
Definition: tupmacs.h:126
void systable_endscan_ordered(SysScanDesc sysscan)
Definition: genam.c:706
int work_mem
Definition: globals.c:121
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
FullTransactionId relfrozenfxid
Definition: verify_heapam.c:91
#define EpochFromFullTransactionId(x)
Definition: transam.h:47
TupleDesc rd_att
Definition: rel.h:111
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:208
XidBoundsViolation
Definition: verify_heapam.c:37
SkipPages
Definition: verify_heapam.c:53
#define ereport(elevel,...)
Definition: elog.h:155
int allowedModes
Definition: execnodes.h:304
TransactionId MultiXactId
Definition: c.h:585
SetFunctionReturnMode returnMode
Definition: execnodes.h:306
#define PG_ARGISNULL(n)
Definition: fmgr.h:209
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
#define HEAP_MOVED_OFF
Definition: htup_details.h:210
#define Assert(condition)
Definition: c.h:800
static TupleDesc verify_heapam_tupdesc(void)
#define FrozenTransactionId
Definition: transam.h:33
int32 va_extsize
Definition: postgres.h:70
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
XidCommitStatus
Definition: verify_heapam.c:46
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1207
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
static bool fxid_in_cached_range(FullTransactionId fxid, const HeapCheckContext *ctx)
#define MAXALIGN(LEN)
Definition: c.h:753
#define fatal(...)
#define HeapTupleHeaderGetRawXmin(tup)
Definition: htup_details.h:308
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3146
MemoryContext ecxt_per_query_memory
Definition: execnodes.h:232
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
Tuplestorestate * tupstore
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:210
#define VARATT_IS_EXTENDED(PTR)
Definition: postgres.h:327
Tuplestorestate * setResult
Definition: execnodes.h:309
#define DatumGetPointer(X)
Definition: postgres.h:549
static Datum values[MAXATTR]
Definition: bootstrap.c:165
char * text_to_cstring(const text *t)
Definition: varlena.c:221
ExprContext * econtext
Definition: execnodes.h:302
#define Int32GetDatum(X)
Definition: postgres.h:479
TupleDesc setDesc
Definition: execnodes.h:310
int errmsg(const char *fmt,...)
Definition: elog.c:902
SysScanDesc systable_beginscan_ordered(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:616
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
XidCommitStatus cached_status
Definition: verify_heapam.c:86
static const unsigned __int64 epoch
Definition: gettimeofday.c:34
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3160
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
#define CStringGetTextDatum(s)
Definition: builtins.h:86
#define HEAP_HASEXTERNAL
Definition: htup_details.h:191
Definition: c.h:617
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Relation * toast_indexes
Definition: verify_heapam.c:94
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
struct HeapCheckContext HeapCheckContext
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx)
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:227
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
TransactionId relminmxid
Definition: verify_heapam.c:92
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
int Buffer
Definition: buf.h:23
int16 AttrNumber
Definition: attnum.h:21
void ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
Definition: multixact.c:743
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
static XidBoundsViolation check_mxid_valid_in_rel(MultiXactId mxid, HeapCheckContext *ctx)
Pointer Page
Definition: bufpage.h:78