PostgreSQL Source Code git master
pg_visibility.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_visibility.c
4 * display visibility map information and page-level visibility bits
5 *
6 * Copyright (c) 2016-2025, PostgreSQL Global Development Group
7 *
8 * contrib/pg_visibility/pg_visibility.c
9 *-------------------------------------------------------------------------
10 */
11#include "postgres.h"
12
13#include "access/heapam.h"
14#include "access/htup_details.h"
16#include "access/xloginsert.h"
17#include "catalog/pg_type.h"
19#include "funcapi.h"
20#include "miscadmin.h"
21#include "storage/bufmgr.h"
22#include "storage/proc.h"
23#include "storage/procarray.h"
24#include "storage/read_stream.h"
25#include "storage/smgr.h"
26#include "utils/rel.h"
27
29
30typedef struct vbits
31{
36
37typedef struct corrupt_items
38{
43
44/* for collect_corrupt_items_read_stream_next_block */
46{
53};
54
63
64static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
65static vbits *collect_visibility_data(Oid relid, bool include_pd);
66static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
67 bool all_frozen);
69static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
70 Buffer buffer);
71static void check_relation_relkind(Relation rel);
72
73/*
74 * Visibility map information for a single block of a relation.
75 *
76 * Note: the VM code will silently return zeroes for pages past the end
77 * of the map, so we allow probes up to MaxBlockNumber regardless of the
78 * actual relation size.
79 */
82{
83 Oid relid = PG_GETARG_OID(0);
84 int64 blkno = PG_GETARG_INT64(1);
85 int32 mapbits;
86 Relation rel;
87 Buffer vmbuffer = InvalidBuffer;
88 TupleDesc tupdesc;
89 Datum values[2];
90 bool nulls[2] = {0};
91
92 rel = relation_open(relid, AccessShareLock);
93
94 /* Only some relkinds have a visibility map */
96
97 if (blkno < 0 || blkno > MaxBlockNumber)
99 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
100 errmsg("invalid block number")));
101
102 tupdesc = pg_visibility_tupdesc(false, false);
103
104 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
105 if (vmbuffer != InvalidBuffer)
106 ReleaseBuffer(vmbuffer);
107 values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
108 values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
109
111
113}
114
115/*
116 * Visibility map information for a single block of a relation, plus the
117 * page-level information for the same block.
118 */
119Datum
121{
122 Oid relid = PG_GETARG_OID(0);
123 int64 blkno = PG_GETARG_INT64(1);
124 int32 mapbits;
125 Relation rel;
126 Buffer vmbuffer = InvalidBuffer;
127 Buffer buffer;
128 Page page;
129 TupleDesc tupdesc;
130 Datum values[3];
131 bool nulls[3] = {0};
132
133 rel = relation_open(relid, AccessShareLock);
134
135 /* Only some relkinds have a visibility map */
137
138 if (blkno < 0 || blkno > MaxBlockNumber)
140 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
141 errmsg("invalid block number")));
142
143 tupdesc = pg_visibility_tupdesc(false, true);
144
145 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
146 if (vmbuffer != InvalidBuffer)
147 ReleaseBuffer(vmbuffer);
148 values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
149 values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
150
151 /* Here we have to explicitly check rel size ... */
152 if (blkno < RelationGetNumberOfBlocks(rel))
153 {
154 buffer = ReadBuffer(rel, blkno);
156
157 page = BufferGetPage(buffer);
159
160 UnlockReleaseBuffer(buffer);
161 }
162 else
163 {
164 /* As with the vismap, silently return 0 for pages past EOF */
165 values[2] = BoolGetDatum(false);
166 }
167
169
171}
172
173/*
174 * Visibility map information for every block in a relation.
175 */
176Datum
178{
179 FuncCallContext *funcctx;
180 vbits *info;
181
182 if (SRF_IS_FIRSTCALL())
183 {
184 Oid relid = PG_GETARG_OID(0);
185 MemoryContext oldcontext;
186
187 funcctx = SRF_FIRSTCALL_INIT();
188 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
189 funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
190 /* collect_visibility_data will verify the relkind */
191 funcctx->user_fctx = collect_visibility_data(relid, false);
192 MemoryContextSwitchTo(oldcontext);
193 }
194
195 funcctx = SRF_PERCALL_SETUP();
196 info = (vbits *) funcctx->user_fctx;
197
198 if (info->next < info->count)
199 {
200 Datum values[3];
201 bool nulls[3] = {0};
202 HeapTuple tuple;
203
204 values[0] = Int64GetDatum(info->next);
205 values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
206 values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
207 info->next++;
208
209 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
210 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
211 }
212
213 SRF_RETURN_DONE(funcctx);
214}
215
216/*
217 * Visibility map information for every block in a relation, plus the page
218 * level information for each block.
219 */
220Datum
222{
223 FuncCallContext *funcctx;
224 vbits *info;
225
226 if (SRF_IS_FIRSTCALL())
227 {
228 Oid relid = PG_GETARG_OID(0);
229 MemoryContext oldcontext;
230
231 funcctx = SRF_FIRSTCALL_INIT();
232 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
233 funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
234 /* collect_visibility_data will verify the relkind */
235 funcctx->user_fctx = collect_visibility_data(relid, true);
236 MemoryContextSwitchTo(oldcontext);
237 }
238
239 funcctx = SRF_PERCALL_SETUP();
240 info = (vbits *) funcctx->user_fctx;
241
242 if (info->next < info->count)
243 {
244 Datum values[4];
245 bool nulls[4] = {0};
246 HeapTuple tuple;
247
248 values[0] = Int64GetDatum(info->next);
249 values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
250 values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
251 values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
252 info->next++;
253
254 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
255 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
256 }
257
258 SRF_RETURN_DONE(funcctx);
259}
260
261/*
262 * Count the number of all-visible and all-frozen pages in the visibility
263 * map for a particular relation.
264 */
265Datum
267{
268 Oid relid = PG_GETARG_OID(0);
269 Relation rel;
270 BlockNumber nblocks;
271 BlockNumber blkno;
272 Buffer vmbuffer = InvalidBuffer;
273 int64 all_visible = 0;
274 int64 all_frozen = 0;
275 TupleDesc tupdesc;
276 Datum values[2];
277 bool nulls[2] = {0};
278
279 rel = relation_open(relid, AccessShareLock);
280
281 /* Only some relkinds have a visibility map */
283
284 nblocks = RelationGetNumberOfBlocks(rel);
285
286 for (blkno = 0; blkno < nblocks; ++blkno)
287 {
288 int32 mapbits;
289
290 /* Make sure we are interruptible. */
292
293 /* Get map info. */
294 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
295 if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
296 ++all_visible;
297 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
298 ++all_frozen;
299 }
300
301 /* Clean up. */
302 if (vmbuffer != InvalidBuffer)
303 ReleaseBuffer(vmbuffer);
305
306 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
307 elog(ERROR, "return type must be a row type");
308
309 values[0] = Int64GetDatum(all_visible);
310 values[1] = Int64GetDatum(all_frozen);
311
313}
314
315/*
316 * Return the TIDs of non-frozen tuples present in pages marked all-frozen
317 * in the visibility map. We hope no one will ever find any, but there could
318 * be bugs, database corruption, etc.
319 */
320Datum
322{
323 FuncCallContext *funcctx;
325
326 if (SRF_IS_FIRSTCALL())
327 {
328 Oid relid = PG_GETARG_OID(0);
329 MemoryContext oldcontext;
330
331 funcctx = SRF_FIRSTCALL_INIT();
332 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
333 /* collect_corrupt_items will verify the relkind */
334 funcctx->user_fctx = collect_corrupt_items(relid, false, true);
335 MemoryContextSwitchTo(oldcontext);
336 }
337
338 funcctx = SRF_PERCALL_SETUP();
339 items = (corrupt_items *) funcctx->user_fctx;
340
341 if (items->next < items->count)
342 SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
343
344 SRF_RETURN_DONE(funcctx);
345}
346
347/*
348 * Return the TIDs of not-all-visible tuples in pages marked all-visible
349 * in the visibility map. We hope no one will ever find any, but there could
350 * be bugs, database corruption, etc.
351 */
352Datum
354{
355 FuncCallContext *funcctx;
357
358 if (SRF_IS_FIRSTCALL())
359 {
360 Oid relid = PG_GETARG_OID(0);
361 MemoryContext oldcontext;
362
363 funcctx = SRF_FIRSTCALL_INIT();
364 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
365 /* collect_corrupt_items will verify the relkind */
366 funcctx->user_fctx = collect_corrupt_items(relid, true, false);
367 MemoryContextSwitchTo(oldcontext);
368 }
369
370 funcctx = SRF_PERCALL_SETUP();
371 items = (corrupt_items *) funcctx->user_fctx;
372
373 if (items->next < items->count)
374 SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
375
376 SRF_RETURN_DONE(funcctx);
377}
378
379/*
380 * Remove the visibility map fork for a relation. If there turn out to be
381 * any bugs in the visibility map code that require rebuilding the VM, this
382 * provides users with a way to do it that is cleaner than shutting down the
383 * server and removing files by hand.
384 *
385 * This is a cut-down version of RelationTruncate.
386 */
387Datum
389{
390 Oid relid = PG_GETARG_OID(0);
391 Relation rel;
392 ForkNumber fork;
393 BlockNumber block;
394 BlockNumber old_block;
395
397
398 /* Only some relkinds have a visibility map */
400
401 /* Forcibly reset cached file size */
403
404 /* Compute new and old size before entering critical section. */
406 block = visibilitymap_prepare_truncate(rel, 0);
407 old_block = BlockNumberIsValid(block) ? smgrnblocks(RelationGetSmgr(rel), fork) : 0;
408
409 /*
410 * WAL-logging, buffer dropping, file truncation must be atomic and all on
411 * one side of a checkpoint. See RelationTruncate() for discussion.
412 */
416
417 if (RelationNeedsWAL(rel))
418 {
419 XLogRecPtr lsn;
420 xl_smgr_truncate xlrec;
421
422 xlrec.blkno = 0;
423 xlrec.rlocator = rel->rd_locator;
424 xlrec.flags = SMGR_TRUNCATE_VM;
425
427 XLogRegisterData((char *) &xlrec, sizeof(xlrec));
428
429 lsn = XLogInsert(RM_SMGR_ID,
431 XLogFlush(lsn);
432 }
433
434 if (BlockNumberIsValid(block))
435 smgrtruncate(RelationGetSmgr(rel), &fork, 1, &old_block, &block);
436
439
440 /*
441 * Release the lock right away, not at commit time.
442 *
443 * It would be a problem to release the lock prior to commit if this
444 * truncate operation sends any transactional invalidation messages. Other
445 * backends would potentially be able to lock the relation without
446 * processing them in the window of time between when we release the lock
447 * here and when we sent the messages at our eventual commit. However,
448 * we're currently only sending a non-transactional smgr invalidation,
449 * which will have been posted to shared memory immediately from within
450 * smgr_truncate. Therefore, there should be no race here.
451 *
452 * The reason why it's desirable to release the lock early here is because
453 * of the possibility that someone will need to use this to blow away many
454 * visibility map forks at once. If we can't release the lock until
455 * commit time, the transaction doing this will accumulate
456 * AccessExclusiveLocks on all of those relations at the same time, which
457 * is undesirable. However, if this turns out to be unsafe we may have no
458 * choice...
459 */
461
462 /* Nothing to return. */
464}
465
466/*
467 * Helper function to construct whichever TupleDesc we need for a particular
468 * call.
469 */
470static TupleDesc
471pg_visibility_tupdesc(bool include_blkno, bool include_pd)
472{
473 TupleDesc tupdesc;
474 AttrNumber maxattr = 2;
475 AttrNumber a = 0;
476
477 if (include_blkno)
478 ++maxattr;
479 if (include_pd)
480 ++maxattr;
481 tupdesc = CreateTemplateTupleDesc(maxattr);
482 if (include_blkno)
483 TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
484 TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
485 TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
486 if (include_pd)
487 TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
488 Assert(a == maxattr);
489
490 return BlessTupleDesc(tupdesc);
491}
492
493/*
494 * Collect visibility data about a relation.
495 *
496 * Checks relkind of relid and will throw an error if the relation does not
497 * have a VM.
498 */
499static vbits *
500collect_visibility_data(Oid relid, bool include_pd)
501{
502 Relation rel;
503 BlockNumber nblocks;
504 vbits *info;
505 BlockNumber blkno;
506 Buffer vmbuffer = InvalidBuffer;
509 ReadStream *stream = NULL;
510
511 rel = relation_open(relid, AccessShareLock);
512
513 /* Only some relkinds have a visibility map */
515
516 nblocks = RelationGetNumberOfBlocks(rel);
517 info = palloc0(offsetof(vbits, bits) + nblocks);
518 info->next = 0;
519 info->count = nblocks;
520
521 /* Create a stream if reading main fork. */
522 if (include_pd)
523 {
524 p.current_blocknum = 0;
525 p.last_exclusive = nblocks;
527 bstrategy,
528 rel,
531 &p,
532 0);
533 }
534
535 for (blkno = 0; blkno < nblocks; ++blkno)
536 {
537 int32 mapbits;
538
539 /* Make sure we are interruptible. */
541
542 /* Get map info. */
543 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
544 if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
545 info->bits[blkno] |= (1 << 0);
546 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
547 info->bits[blkno] |= (1 << 1);
548
549 /*
550 * Page-level data requires reading every block, so only get it if the
551 * caller needs it. Use a buffer access strategy, too, to prevent
552 * cache-trashing.
553 */
554 if (include_pd)
555 {
556 Buffer buffer;
557 Page page;
558
559 buffer = read_stream_next_buffer(stream, NULL);
561
562 page = BufferGetPage(buffer);
563 if (PageIsAllVisible(page))
564 info->bits[blkno] |= (1 << 2);
565
566 UnlockReleaseBuffer(buffer);
567 }
568 }
569
570 if (include_pd)
571 {
573 read_stream_end(stream);
574 }
575
576 /* Clean up. */
577 if (vmbuffer != InvalidBuffer)
578 ReleaseBuffer(vmbuffer);
580
581 return info;
582}
583
584/*
585 * The "strict" version of GetOldestNonRemovableTransactionId(). The
586 * pg_visibility check can tolerate false positives (don't report some of the
587 * errors), but can't tolerate false negatives (report false errors). Normally,
588 * horizons move forwards, but there are cases when it could move backward
589 * (see comment for ComputeXidHorizons()).
590 *
591 * This is why we have to implement our own function for xid horizon, which
592 * would be guaranteed to be newer or equal to any xid horizon computed before.
593 * We have to do the following to achieve this.
594 *
595 * 1. Ignore processes xmin's, because they consider connection to other
596 * databases that were ignored before.
597 * 2. Ignore KnownAssignedXids, as they are not database-aware. Although we
598 * now perform minimal checking on a standby by always using nextXid, this
599 * approach is better than nothing and will at least catch extremely broken
600 * cases where a xid is in the future.
601 * 3. Ignore walsender xmin, because it could go backward if some replication
602 * connections don't use replication slots.
603 *
604 * While it might seem like we could use KnownAssignedXids for shared
605 * catalogs, since shared catalogs rely on a global horizon rather than a
606 * database-specific one - there are potential edge cases. For example, a
607 * transaction may crash on the primary without writing a commit/abort record.
608 * This would lead to a situation where it appears to still be running on the
609 * standby, even though it has already ended on the primary. For this reason,
610 * it's safer to ignore KnownAssignedXids, even for shared catalogs.
611 *
612 * As a result, we're using only currently running xids to compute the horizon.
613 * Surely these would significantly sacrifice accuracy. But we have to do so
614 * to avoid reporting false errors.
615 */
616static TransactionId
618{
619 RunningTransactions runningTransactions;
620
621 if (RecoveryInProgress())
622 {
623 TransactionId result;
624
625 /* As we ignore KnownAssignedXids on standby, just pick nextXid */
626 LWLockAcquire(XidGenLock, LW_SHARED);
628 LWLockRelease(XidGenLock);
629 return result;
630 }
631 else if (rel == NULL || rel->rd_rel->relisshared)
632 {
633 /* Shared relation: take into account all running xids */
634 runningTransactions = GetRunningTransactionData();
635 LWLockRelease(ProcArrayLock);
636 LWLockRelease(XidGenLock);
637 return runningTransactions->oldestRunningXid;
638 }
639 else if (!RELATION_IS_LOCAL(rel))
640 {
641 /*
642 * Normal relation: take into account xids running within the current
643 * database
644 */
645 runningTransactions = GetRunningTransactionData();
646 LWLockRelease(ProcArrayLock);
647 LWLockRelease(XidGenLock);
648 return runningTransactions->oldestDatabaseRunningXid;
649 }
650 else
651 {
652 /*
653 * For temporary relations, ComputeXidHorizons() uses only
654 * TransamVariables->latestCompletedXid and MyProc->xid. These two
655 * shouldn't go backwards. So we're fine with this horizon.
656 */
658 }
659}
660
661/*
662 * Callback function to get next block for read stream object used in
663 * collect_corrupt_items() function.
664 */
665static BlockNumber
667 void *callback_private_data,
668 void *per_buffer_data)
669{
670 struct collect_corrupt_items_read_stream_private *p = callback_private_data;
671
673 {
674 bool check_frozen = false;
675 bool check_visible = false;
676
677 /* Make sure we are interruptible. */
679
681 check_frozen = true;
683 check_visible = true;
684 if (!check_visible && !check_frozen)
685 continue;
686
687 return p->current_blocknum++;
688 }
689
690 return InvalidBlockNumber;
691}
692
693/*
694 * Returns a list of items whose visibility map information does not match
695 * the status of the tuples on the page.
696 *
697 * If all_visible is passed as true, this will include all items which are
698 * on pages marked as all-visible in the visibility map but which do not
699 * seem to in fact be all-visible.
700 *
701 * If all_frozen is passed as true, this will include all items which are
702 * on pages marked as all-frozen but which do not seem to in fact be frozen.
703 *
704 * Checks relkind of relid and will throw an error if the relation does not
705 * have a VM.
706 */
707static corrupt_items *
709{
716 ReadStream *stream;
717 Buffer buffer;
718
720
721 /* Only some relkinds have a visibility map */
723
724 if (all_visible)
726
727 /*
728 * Guess an initial array size. We don't expect many corrupted tuples, so
729 * start with a small array. This function uses the "next" field to track
730 * the next offset where we can store an item (which is the same thing as
731 * the number of items found so far) and the "count" field to track the
732 * number of entries allocated. We'll repurpose these fields before
733 * returning.
734 */
735 items = palloc0(sizeof(corrupt_items));
736 items->next = 0;
737 items->count = 64;
738 items->tids = palloc(items->count * sizeof(ItemPointerData));
739
740 p.current_blocknum = 0;
742 p.rel = rel;
747 bstrategy,
748 rel,
751 &p,
752 0);
753
754 /* Loop over every block in the relation. */
755 while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
756 {
757 bool check_frozen = all_frozen;
758 bool check_visible = all_visible;
759 Page page;
760 OffsetNumber offnum,
761 maxoff;
762 BlockNumber blkno;
763
764 /* Make sure we are interruptible. */
766
768
769 page = BufferGetPage(buffer);
770 maxoff = PageGetMaxOffsetNumber(page);
771 blkno = BufferGetBlockNumber(buffer);
772
773 /*
774 * The visibility map bits might have changed while we were acquiring
775 * the page lock. Recheck to avoid returning spurious results.
776 */
777 if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
778 check_frozen = false;
779 if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
780 check_visible = false;
781 if (!check_visible && !check_frozen)
782 {
783 UnlockReleaseBuffer(buffer);
784 continue;
785 }
786
787 /* Iterate over each tuple on the page. */
788 for (offnum = FirstOffsetNumber;
789 offnum <= maxoff;
790 offnum = OffsetNumberNext(offnum))
791 {
792 HeapTupleData tuple;
793 ItemId itemid;
794
795 itemid = PageGetItemId(page, offnum);
796
797 /* Unused or redirect line pointers are of no interest. */
798 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
799 continue;
800
801 /* Dead line pointers are neither all-visible nor frozen. */
802 if (ItemIdIsDead(itemid))
803 {
804 ItemPointerSet(&(tuple.t_self), blkno, offnum);
806 continue;
807 }
808
809 /* Initialize a HeapTupleData structure for checks below. */
810 ItemPointerSet(&(tuple.t_self), blkno, offnum);
811 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
812 tuple.t_len = ItemIdGetLength(itemid);
813 tuple.t_tableOid = relid;
814
815 /*
816 * If we're checking whether the page is all-visible, we expect
817 * the tuple to be all-visible.
818 */
819 if (check_visible &&
820 !tuple_all_visible(&tuple, OldestXmin, buffer))
821 {
822 TransactionId RecomputedOldestXmin;
823
824 /*
825 * Time has passed since we computed OldestXmin, so it's
826 * possible that this tuple is all-visible in reality even
827 * though it doesn't appear so based on our
828 * previously-computed value. Let's compute a new value so we
829 * can be certain whether there is a problem.
830 *
831 * From a concurrency point of view, it sort of sucks to
832 * retake ProcArrayLock here while we're holding the buffer
833 * exclusively locked, but it should be safe against
834 * deadlocks, because surely
835 * GetStrictOldestNonRemovableTransactionId() should never
836 * take a buffer lock. And this shouldn't happen often, so
837 * it's worth being careful so as to avoid false positives.
838 */
839 RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
840
841 if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
843 else
844 {
845 OldestXmin = RecomputedOldestXmin;
846 if (!tuple_all_visible(&tuple, OldestXmin, buffer))
848 }
849 }
850
851 /*
852 * If we're checking whether the page is all-frozen, we expect the
853 * tuple to be in a state where it will never need freezing.
854 */
855 if (check_frozen)
856 {
859 }
860 }
861
862 UnlockReleaseBuffer(buffer);
863 }
864 read_stream_end(stream);
865
866 /* Clean up. */
867 if (vmbuffer != InvalidBuffer)
869 if (p.vmbuffer != InvalidBuffer)
872
873 /*
874 * Before returning, repurpose the fields to match caller's expectations.
875 * next is now the next item that should be read (rather than written) and
876 * count is now the number of items we wrote (rather than the number we
877 * allocated).
878 */
879 items->count = items->next;
880 items->next = 0;
881
882 return items;
883}
884
885/*
886 * Remember one corrupt item.
887 */
888static void
890{
891 /* enlarge output array if needed. */
892 if (items->next >= items->count)
893 {
894 items->count *= 2;
895 items->tids = repalloc(items->tids,
896 items->count * sizeof(ItemPointerData));
897 }
898 /* and add the new item */
899 items->tids[items->next++] = *tid;
900}
901
902/*
903 * Check whether a tuple is all-visible relative to a given OldestXmin value.
904 * The buffer should contain the tuple and should be locked and pinned.
905 */
906static bool
908{
910 TransactionId xmin;
911
912 state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
913 if (state != HEAPTUPLE_LIVE)
914 return false; /* all-visible implies live */
915
916 /*
917 * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
918 * all-visible unless every tuple is hinted committed. However, those hint
919 * bits could be lost after a crash, so we can't be certain that they'll
920 * be set here. So just check the xmin.
921 */
922
923 xmin = HeapTupleHeaderGetXmin(tup->t_data);
924 if (!TransactionIdPrecedes(xmin, OldestXmin))
925 return false; /* xmin not old enough for all to see */
926
927 return true;
928}
929
930/*
931 * check_relation_relkind - convenience routine to check that relation
932 * is of the relkind supported by the callers
933 */
934static void
936{
937 if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
939 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
940 errmsg("relation \"%s\" is of wrong relation kind",
943}
int16 AttrNumber
Definition: attnum.h:21
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define MaxBlockNumber
Definition: block.h:35
static Datum values[MAXATTR]
Definition: bootstrap.c:151
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4866
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4883
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5100
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
@ BAS_BULKREAD
Definition: bufmgr.h:36
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:396
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:429
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
PageData * Page
Definition: bufpage.h:82
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
uint8_t uint8
Definition: c.h:486
#define Assert(condition)
Definition: c.h:815
int64_t int64
Definition: c.h:485
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:420
int32_t int32
Definition: c.h:484
uint32 TransactionId
Definition: c.h:609
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2258
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7669
HTSV_Result
Definition: heapam.h:133
@ HEAPTUPLE_LIVE
Definition: heapam.h:135
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
int a
Definition: isn.c:68
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define AccessShareLock
Definition: lockdefs.h:36
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
int errdetail_relkind_not_supported(char relkind)
Definition: pg_class.c:24
static BlockNumber collect_corrupt_items_read_stream_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
static corrupt_items * collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
Datum pg_visibility_map_summary(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
Definition: pg_visibility.c:28
Datum pg_visibility_rel(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(pg_visibility_map)
struct corrupt_items corrupt_items
static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd)
static void check_relation_relkind(Relation rel)
static void record_corrupt_item(corrupt_items *items, ItemPointer tid)
static TransactionId GetStrictOldestNonRemovableTransactionId(Relation rel)
Datum pg_visibility_map(PG_FUNCTION_ARGS)
Definition: pg_visibility.c:81
struct vbits vbits
static vbits * collect_visibility_data(Oid relid, bool include_pd)
Datum pg_visibility_map_rel(PG_FUNCTION_ARGS)
Datum pg_check_visible(PG_FUNCTION_ARGS)
Datum pg_check_frozen(PG_FUNCTION_ARGS)
Datum pg_visibility(PG_FUNCTION_ARGS)
static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
Datum pg_truncate_visibility_map(PG_FUNCTION_ARGS)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
static Datum BoolGetDatum(bool X)
Definition: postgres.h:107
unsigned int Oid
Definition: postgres_ext.h:32
#define DELAY_CHKPT_START
Definition: proc.h:119
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:120
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:2005
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2689
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:605
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:551
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:846
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.c:171
#define READ_STREAM_FULL
Definition: read_stream.h:43
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationNeedsWAL(relation)
Definition: rel.h:628
ForkNumber
Definition: relpath.h:56
@ VISIBILITYMAP_FORKNUM
Definition: relpath.h:60
@ MAIN_FORKNUM
Definition: relpath.h:58
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:677
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *old_nblocks, BlockNumber *nblocks)
Definition: smgr.c:729
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
PGPROC * MyProc
Definition: proc.c:66
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
void * user_fctx
Definition: funcapi.h:82
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc tuple_desc
Definition: funcapi.h:112
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
int delayChkptFlags
Definition: proc.h:240
RelFileLocator rd_locator
Definition: rel.h:57
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId oldestDatabaseRunningXid
Definition: standby.h:93
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46
FullTransactionId nextXid
Definition: transam.h:220
ItemPointer tids
Definition: pg_visibility.c:41
BlockNumber count
Definition: pg_visibility.c:40
BlockNumber next
Definition: pg_visibility.c:39
Definition: regguts.h:323
BlockNumber next
Definition: pg_visibility.c:32
uint8 bits[FLEXIBLE_ARRAY_MEMBER]
Definition: pg_visibility.c:34
BlockNumber count
Definition: pg_visibility.c:33
RelFileLocator rlocator
Definition: storage_xlog.h:49
BlockNumber blkno
Definition: storage_xlog.h:48
static ItemArray items
Definition: test_tidstore.c:48
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define XidFromFullTransactionId(x)
Definition: transam.h:48
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:164
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:798
TransamVariablesData * TransamVariables
Definition: varsup.c:34
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
bool RecoveryInProgress(void)
Definition: xlog.c:6334
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2802
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:82