PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
pg_visibility.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_visibility.c
4 * display visibility map information and page-level visibility bits
5 *
6 * Copyright (c) 2016-2025, PostgreSQL Global Development Group
7 *
8 * contrib/pg_visibility/pg_visibility.c
9 *-------------------------------------------------------------------------
10 */
11#include "postgres.h"
12
13#include "access/heapam.h"
14#include "access/htup_details.h"
16#include "access/xloginsert.h"
17#include "catalog/pg_type.h"
19#include "funcapi.h"
20#include "miscadmin.h"
21#include "storage/bufmgr.h"
22#include "storage/proc.h"
23#include "storage/procarray.h"
24#include "storage/read_stream.h"
25#include "storage/smgr.h"
26#include "utils/rel.h"
27
29 .name = "pg_visibility",
30 .version = PG_VERSION
31);
32
33typedef struct vbits
34{
39
40typedef struct corrupt_items
41{
46
47/* for collect_corrupt_items_read_stream_next_block */
49{
56};
57
66
67static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
68static vbits *collect_visibility_data(Oid relid, bool include_pd);
69static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
70 bool all_frozen);
72static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
73 Buffer buffer);
74static void check_relation_relkind(Relation rel);
75
76/*
77 * Visibility map information for a single block of a relation.
78 *
79 * Note: the VM code will silently return zeroes for pages past the end
80 * of the map, so we allow probes up to MaxBlockNumber regardless of the
81 * actual relation size.
82 */
85{
86 Oid relid = PG_GETARG_OID(0);
87 int64 blkno = PG_GETARG_INT64(1);
88 int32 mapbits;
89 Relation rel;
90 Buffer vmbuffer = InvalidBuffer;
91 TupleDesc tupdesc;
92 Datum values[2];
93 bool nulls[2] = {0};
94
95 rel = relation_open(relid, AccessShareLock);
96
97 /* Only some relkinds have a visibility map */
99
100 if (blkno < 0 || blkno > MaxBlockNumber)
102 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
103 errmsg("invalid block number")));
104
105 tupdesc = pg_visibility_tupdesc(false, false);
106
107 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
108 if (vmbuffer != InvalidBuffer)
109 ReleaseBuffer(vmbuffer);
110 values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
111 values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
112
114
116}
117
118/*
119 * Visibility map information for a single block of a relation, plus the
120 * page-level information for the same block.
121 */
122Datum
124{
125 Oid relid = PG_GETARG_OID(0);
126 int64 blkno = PG_GETARG_INT64(1);
127 int32 mapbits;
128 Relation rel;
129 Buffer vmbuffer = InvalidBuffer;
130 Buffer buffer;
131 Page page;
132 TupleDesc tupdesc;
133 Datum values[3];
134 bool nulls[3] = {0};
135
136 rel = relation_open(relid, AccessShareLock);
137
138 /* Only some relkinds have a visibility map */
140
141 if (blkno < 0 || blkno > MaxBlockNumber)
143 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
144 errmsg("invalid block number")));
145
146 tupdesc = pg_visibility_tupdesc(false, true);
147
148 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
149 if (vmbuffer != InvalidBuffer)
150 ReleaseBuffer(vmbuffer);
151 values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
152 values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
153
154 /* Here we have to explicitly check rel size ... */
155 if (blkno < RelationGetNumberOfBlocks(rel))
156 {
157 buffer = ReadBuffer(rel, blkno);
159
160 page = BufferGetPage(buffer);
162
163 UnlockReleaseBuffer(buffer);
164 }
165 else
166 {
167 /* As with the vismap, silently return 0 for pages past EOF */
168 values[2] = BoolGetDatum(false);
169 }
170
172
174}
175
176/*
177 * Visibility map information for every block in a relation.
178 */
179Datum
181{
182 FuncCallContext *funcctx;
183 vbits *info;
184
185 if (SRF_IS_FIRSTCALL())
186 {
187 Oid relid = PG_GETARG_OID(0);
188 MemoryContext oldcontext;
189
190 funcctx = SRF_FIRSTCALL_INIT();
191 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
192 funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
193 /* collect_visibility_data will verify the relkind */
194 funcctx->user_fctx = collect_visibility_data(relid, false);
195 MemoryContextSwitchTo(oldcontext);
196 }
197
198 funcctx = SRF_PERCALL_SETUP();
199 info = (vbits *) funcctx->user_fctx;
200
201 if (info->next < info->count)
202 {
203 Datum values[3];
204 bool nulls[3] = {0};
205 HeapTuple tuple;
206
207 values[0] = Int64GetDatum(info->next);
208 values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
209 values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
210 info->next++;
211
212 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
213 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
214 }
215
216 SRF_RETURN_DONE(funcctx);
217}
218
219/*
220 * Visibility map information for every block in a relation, plus the page
221 * level information for each block.
222 */
223Datum
225{
226 FuncCallContext *funcctx;
227 vbits *info;
228
229 if (SRF_IS_FIRSTCALL())
230 {
231 Oid relid = PG_GETARG_OID(0);
232 MemoryContext oldcontext;
233
234 funcctx = SRF_FIRSTCALL_INIT();
235 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
236 funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
237 /* collect_visibility_data will verify the relkind */
238 funcctx->user_fctx = collect_visibility_data(relid, true);
239 MemoryContextSwitchTo(oldcontext);
240 }
241
242 funcctx = SRF_PERCALL_SETUP();
243 info = (vbits *) funcctx->user_fctx;
244
245 if (info->next < info->count)
246 {
247 Datum values[4];
248 bool nulls[4] = {0};
249 HeapTuple tuple;
250
251 values[0] = Int64GetDatum(info->next);
252 values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
253 values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
254 values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
255 info->next++;
256
257 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
258 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
259 }
260
261 SRF_RETURN_DONE(funcctx);
262}
263
264/*
265 * Count the number of all-visible and all-frozen pages in the visibility
266 * map for a particular relation.
267 */
268Datum
270{
271 Oid relid = PG_GETARG_OID(0);
272 Relation rel;
273 BlockNumber nblocks;
274 BlockNumber blkno;
275 Buffer vmbuffer = InvalidBuffer;
276 int64 all_visible = 0;
277 int64 all_frozen = 0;
278 TupleDesc tupdesc;
279 Datum values[2];
280 bool nulls[2] = {0};
281
282 rel = relation_open(relid, AccessShareLock);
283
284 /* Only some relkinds have a visibility map */
286
287 nblocks = RelationGetNumberOfBlocks(rel);
288
289 for (blkno = 0; blkno < nblocks; ++blkno)
290 {
291 int32 mapbits;
292
293 /* Make sure we are interruptible. */
295
296 /* Get map info. */
297 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
298 if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
299 ++all_visible;
300 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
301 ++all_frozen;
302 }
303
304 /* Clean up. */
305 if (vmbuffer != InvalidBuffer)
306 ReleaseBuffer(vmbuffer);
308
309 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
310 elog(ERROR, "return type must be a row type");
311
312 values[0] = Int64GetDatum(all_visible);
313 values[1] = Int64GetDatum(all_frozen);
314
316}
317
318/*
319 * Return the TIDs of non-frozen tuples present in pages marked all-frozen
320 * in the visibility map. We hope no one will ever find any, but there could
321 * be bugs, database corruption, etc.
322 */
323Datum
325{
326 FuncCallContext *funcctx;
328
329 if (SRF_IS_FIRSTCALL())
330 {
331 Oid relid = PG_GETARG_OID(0);
332 MemoryContext oldcontext;
333
334 funcctx = SRF_FIRSTCALL_INIT();
335 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
336 /* collect_corrupt_items will verify the relkind */
337 funcctx->user_fctx = collect_corrupt_items(relid, false, true);
338 MemoryContextSwitchTo(oldcontext);
339 }
340
341 funcctx = SRF_PERCALL_SETUP();
342 items = (corrupt_items *) funcctx->user_fctx;
343
344 if (items->next < items->count)
345 SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
346
347 SRF_RETURN_DONE(funcctx);
348}
349
350/*
351 * Return the TIDs of not-all-visible tuples in pages marked all-visible
352 * in the visibility map. We hope no one will ever find any, but there could
353 * be bugs, database corruption, etc.
354 */
355Datum
357{
358 FuncCallContext *funcctx;
360
361 if (SRF_IS_FIRSTCALL())
362 {
363 Oid relid = PG_GETARG_OID(0);
364 MemoryContext oldcontext;
365
366 funcctx = SRF_FIRSTCALL_INIT();
367 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
368 /* collect_corrupt_items will verify the relkind */
369 funcctx->user_fctx = collect_corrupt_items(relid, true, false);
370 MemoryContextSwitchTo(oldcontext);
371 }
372
373 funcctx = SRF_PERCALL_SETUP();
374 items = (corrupt_items *) funcctx->user_fctx;
375
376 if (items->next < items->count)
377 SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
378
379 SRF_RETURN_DONE(funcctx);
380}
381
382/*
383 * Remove the visibility map fork for a relation. If there turn out to be
384 * any bugs in the visibility map code that require rebuilding the VM, this
385 * provides users with a way to do it that is cleaner than shutting down the
386 * server and removing files by hand.
387 *
388 * This is a cut-down version of RelationTruncate.
389 */
390Datum
392{
393 Oid relid = PG_GETARG_OID(0);
394 Relation rel;
395 ForkNumber fork;
396 BlockNumber block;
397 BlockNumber old_block;
398
400
401 /* Only some relkinds have a visibility map */
403
404 /* Forcibly reset cached file size */
406
407 /* Compute new and old size before entering critical section. */
409 block = visibilitymap_prepare_truncate(rel, 0);
410 old_block = BlockNumberIsValid(block) ? smgrnblocks(RelationGetSmgr(rel), fork) : 0;
411
412 /*
413 * WAL-logging, buffer dropping, file truncation must be atomic and all on
414 * one side of a checkpoint. See RelationTruncate() for discussion.
415 */
419
420 if (RelationNeedsWAL(rel))
421 {
422 XLogRecPtr lsn;
423 xl_smgr_truncate xlrec;
424
425 xlrec.blkno = 0;
426 xlrec.rlocator = rel->rd_locator;
427 xlrec.flags = SMGR_TRUNCATE_VM;
428
430 XLogRegisterData(&xlrec, sizeof(xlrec));
431
432 lsn = XLogInsert(RM_SMGR_ID,
434 XLogFlush(lsn);
435 }
436
437 if (BlockNumberIsValid(block))
438 smgrtruncate(RelationGetSmgr(rel), &fork, 1, &old_block, &block);
439
442
443 /*
444 * Release the lock right away, not at commit time.
445 *
446 * It would be a problem to release the lock prior to commit if this
447 * truncate operation sends any transactional invalidation messages. Other
448 * backends would potentially be able to lock the relation without
449 * processing them in the window of time between when we release the lock
450 * here and when we sent the messages at our eventual commit. However,
451 * we're currently only sending a non-transactional smgr invalidation,
452 * which will have been posted to shared memory immediately from within
453 * smgr_truncate. Therefore, there should be no race here.
454 *
455 * The reason why it's desirable to release the lock early here is because
456 * of the possibility that someone will need to use this to blow away many
457 * visibility map forks at once. If we can't release the lock until
458 * commit time, the transaction doing this will accumulate
459 * AccessExclusiveLocks on all of those relations at the same time, which
460 * is undesirable. However, if this turns out to be unsafe we may have no
461 * choice...
462 */
464
465 /* Nothing to return. */
467}
468
469/*
470 * Helper function to construct whichever TupleDesc we need for a particular
471 * call.
472 */
473static TupleDesc
474pg_visibility_tupdesc(bool include_blkno, bool include_pd)
475{
476 TupleDesc tupdesc;
477 AttrNumber maxattr = 2;
478 AttrNumber a = 0;
479
480 if (include_blkno)
481 ++maxattr;
482 if (include_pd)
483 ++maxattr;
484 tupdesc = CreateTemplateTupleDesc(maxattr);
485 if (include_blkno)
486 TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
487 TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
488 TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
489 if (include_pd)
490 TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
491 Assert(a == maxattr);
492
493 return BlessTupleDesc(tupdesc);
494}
495
496/*
497 * Collect visibility data about a relation.
498 *
499 * Checks relkind of relid and will throw an error if the relation does not
500 * have a VM.
501 */
502static vbits *
503collect_visibility_data(Oid relid, bool include_pd)
504{
505 Relation rel;
506 BlockNumber nblocks;
507 vbits *info;
508 BlockNumber blkno;
509 Buffer vmbuffer = InvalidBuffer;
512 ReadStream *stream = NULL;
513
514 rel = relation_open(relid, AccessShareLock);
515
516 /* Only some relkinds have a visibility map */
518
519 nblocks = RelationGetNumberOfBlocks(rel);
520 info = palloc0(offsetof(vbits, bits) + nblocks);
521 info->next = 0;
522 info->count = nblocks;
523
524 /* Create a stream if reading main fork. */
525 if (include_pd)
526 {
527 p.current_blocknum = 0;
528 p.last_exclusive = nblocks;
529
530 /*
531 * It is safe to use batchmode as block_range_read_stream_cb takes no
532 * locks.
533 */
536 bstrategy,
537 rel,
540 &p,
541 0);
542 }
543
544 for (blkno = 0; blkno < nblocks; ++blkno)
545 {
546 int32 mapbits;
547
548 /* Make sure we are interruptible. */
550
551 /* Get map info. */
552 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
553 if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
554 info->bits[blkno] |= (1 << 0);
555 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
556 info->bits[blkno] |= (1 << 1);
557
558 /*
559 * Page-level data requires reading every block, so only get it if the
560 * caller needs it. Use a buffer access strategy, too, to prevent
561 * cache-trashing.
562 */
563 if (include_pd)
564 {
565 Buffer buffer;
566 Page page;
567
568 buffer = read_stream_next_buffer(stream, NULL);
570
571 page = BufferGetPage(buffer);
572 if (PageIsAllVisible(page))
573 info->bits[blkno] |= (1 << 2);
574
575 UnlockReleaseBuffer(buffer);
576 }
577 }
578
579 if (include_pd)
580 {
582 read_stream_end(stream);
583 }
584
585 /* Clean up. */
586 if (vmbuffer != InvalidBuffer)
587 ReleaseBuffer(vmbuffer);
589
590 return info;
591}
592
593/*
594 * The "strict" version of GetOldestNonRemovableTransactionId(). The
595 * pg_visibility check can tolerate false positives (don't report some of the
596 * errors), but can't tolerate false negatives (report false errors). Normally,
597 * horizons move forwards, but there are cases when it could move backward
598 * (see comment for ComputeXidHorizons()).
599 *
600 * This is why we have to implement our own function for xid horizon, which
601 * would be guaranteed to be newer or equal to any xid horizon computed before.
602 * We have to do the following to achieve this.
603 *
604 * 1. Ignore processes xmin's, because they consider connection to other
605 * databases that were ignored before.
606 * 2. Ignore KnownAssignedXids, as they are not database-aware. Although we
607 * now perform minimal checking on a standby by always using nextXid, this
608 * approach is better than nothing and will at least catch extremely broken
609 * cases where a xid is in the future.
610 * 3. Ignore walsender xmin, because it could go backward if some replication
611 * connections don't use replication slots.
612 *
613 * While it might seem like we could use KnownAssignedXids for shared
614 * catalogs, since shared catalogs rely on a global horizon rather than a
615 * database-specific one - there are potential edge cases. For example, a
616 * transaction may crash on the primary without writing a commit/abort record.
617 * This would lead to a situation where it appears to still be running on the
618 * standby, even though it has already ended on the primary. For this reason,
619 * it's safer to ignore KnownAssignedXids, even for shared catalogs.
620 *
621 * As a result, we're using only currently running xids to compute the horizon.
622 * Surely these would significantly sacrifice accuracy. But we have to do so
623 * to avoid reporting false errors.
624 */
625static TransactionId
627{
628 RunningTransactions runningTransactions;
629
630 if (RecoveryInProgress())
631 {
632 TransactionId result;
633
634 /* As we ignore KnownAssignedXids on standby, just pick nextXid */
635 LWLockAcquire(XidGenLock, LW_SHARED);
637 LWLockRelease(XidGenLock);
638 return result;
639 }
640 else if (rel == NULL || rel->rd_rel->relisshared)
641 {
642 /* Shared relation: take into account all running xids */
643 runningTransactions = GetRunningTransactionData();
644 LWLockRelease(ProcArrayLock);
645 LWLockRelease(XidGenLock);
646 return runningTransactions->oldestRunningXid;
647 }
648 else if (!RELATION_IS_LOCAL(rel))
649 {
650 /*
651 * Normal relation: take into account xids running within the current
652 * database
653 */
654 runningTransactions = GetRunningTransactionData();
655 LWLockRelease(ProcArrayLock);
656 LWLockRelease(XidGenLock);
657 return runningTransactions->oldestDatabaseRunningXid;
658 }
659 else
660 {
661 /*
662 * For temporary relations, ComputeXidHorizons() uses only
663 * TransamVariables->latestCompletedXid and MyProc->xid. These two
664 * shouldn't go backwards. So we're fine with this horizon.
665 */
667 }
668}
669
670/*
671 * Callback function to get next block for read stream object used in
672 * collect_corrupt_items() function.
673 */
674static BlockNumber
676 void *callback_private_data,
677 void *per_buffer_data)
678{
679 struct collect_corrupt_items_read_stream_private *p = callback_private_data;
680
682 {
683 bool check_frozen = false;
684 bool check_visible = false;
685
686 /* Make sure we are interruptible. */
688
690 check_frozen = true;
692 check_visible = true;
693 if (!check_visible && !check_frozen)
694 continue;
695
696 return p->current_blocknum++;
697 }
698
699 return InvalidBlockNumber;
700}
701
702/*
703 * Returns a list of items whose visibility map information does not match
704 * the status of the tuples on the page.
705 *
706 * If all_visible is passed as true, this will include all items which are
707 * on pages marked as all-visible in the visibility map but which do not
708 * seem to in fact be all-visible.
709 *
710 * If all_frozen is passed as true, this will include all items which are
711 * on pages marked as all-frozen but which do not seem to in fact be frozen.
712 *
713 * Checks relkind of relid and will throw an error if the relation does not
714 * have a VM.
715 */
716static corrupt_items *
718{
725 ReadStream *stream;
726 Buffer buffer;
727
729
730 /* Only some relkinds have a visibility map */
732
733 if (all_visible)
735
736 /*
737 * Guess an initial array size. We don't expect many corrupted tuples, so
738 * start with a small array. This function uses the "next" field to track
739 * the next offset where we can store an item (which is the same thing as
740 * the number of items found so far) and the "count" field to track the
741 * number of entries allocated. We'll repurpose these fields before
742 * returning.
743 */
744 items = palloc0(sizeof(corrupt_items));
745 items->next = 0;
746 items->count = 64;
747 items->tids = palloc(items->count * sizeof(ItemPointerData));
748
749 p.current_blocknum = 0;
751 p.rel = rel;
756 bstrategy,
757 rel,
760 &p,
761 0);
762
763 /* Loop over every block in the relation. */
764 while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
765 {
766 bool check_frozen = all_frozen;
767 bool check_visible = all_visible;
768 Page page;
769 OffsetNumber offnum,
770 maxoff;
771 BlockNumber blkno;
772
773 /* Make sure we are interruptible. */
775
777
778 page = BufferGetPage(buffer);
779 maxoff = PageGetMaxOffsetNumber(page);
780 blkno = BufferGetBlockNumber(buffer);
781
782 /*
783 * The visibility map bits might have changed while we were acquiring
784 * the page lock. Recheck to avoid returning spurious results.
785 */
786 if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
787 check_frozen = false;
788 if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
789 check_visible = false;
790 if (!check_visible && !check_frozen)
791 {
792 UnlockReleaseBuffer(buffer);
793 continue;
794 }
795
796 /* Iterate over each tuple on the page. */
797 for (offnum = FirstOffsetNumber;
798 offnum <= maxoff;
799 offnum = OffsetNumberNext(offnum))
800 {
801 HeapTupleData tuple;
802 ItemId itemid;
803
804 itemid = PageGetItemId(page, offnum);
805
806 /* Unused or redirect line pointers are of no interest. */
807 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
808 continue;
809
810 /* Dead line pointers are neither all-visible nor frozen. */
811 if (ItemIdIsDead(itemid))
812 {
813 ItemPointerSet(&(tuple.t_self), blkno, offnum);
815 continue;
816 }
817
818 /* Initialize a HeapTupleData structure for checks below. */
819 ItemPointerSet(&(tuple.t_self), blkno, offnum);
820 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
821 tuple.t_len = ItemIdGetLength(itemid);
822 tuple.t_tableOid = relid;
823
824 /*
825 * If we're checking whether the page is all-visible, we expect
826 * the tuple to be all-visible.
827 */
828 if (check_visible &&
829 !tuple_all_visible(&tuple, OldestXmin, buffer))
830 {
831 TransactionId RecomputedOldestXmin;
832
833 /*
834 * Time has passed since we computed OldestXmin, so it's
835 * possible that this tuple is all-visible in reality even
836 * though it doesn't appear so based on our
837 * previously-computed value. Let's compute a new value so we
838 * can be certain whether there is a problem.
839 *
840 * From a concurrency point of view, it sort of sucks to
841 * retake ProcArrayLock here while we're holding the buffer
842 * exclusively locked, but it should be safe against
843 * deadlocks, because surely
844 * GetStrictOldestNonRemovableTransactionId() should never
845 * take a buffer lock. And this shouldn't happen often, so
846 * it's worth being careful so as to avoid false positives.
847 */
848 RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
849
850 if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
852 else
853 {
854 OldestXmin = RecomputedOldestXmin;
855 if (!tuple_all_visible(&tuple, OldestXmin, buffer))
857 }
858 }
859
860 /*
861 * If we're checking whether the page is all-frozen, we expect the
862 * tuple to be in a state where it will never need freezing.
863 */
864 if (check_frozen)
865 {
868 }
869 }
870
871 UnlockReleaseBuffer(buffer);
872 }
873 read_stream_end(stream);
874
875 /* Clean up. */
876 if (vmbuffer != InvalidBuffer)
878 if (p.vmbuffer != InvalidBuffer)
881
882 /*
883 * Before returning, repurpose the fields to match caller's expectations.
884 * next is now the next item that should be read (rather than written) and
885 * count is now the number of items we wrote (rather than the number we
886 * allocated).
887 */
888 items->count = items->next;
889 items->next = 0;
890
891 return items;
892}
893
894/*
895 * Remember one corrupt item.
896 */
897static void
899{
900 /* enlarge output array if needed. */
901 if (items->next >= items->count)
902 {
903 items->count *= 2;
904 items->tids = repalloc(items->tids,
905 items->count * sizeof(ItemPointerData));
906 }
907 /* and add the new item */
908 items->tids[items->next++] = *tid;
909}
910
911/*
912 * Check whether a tuple is all-visible relative to a given OldestXmin value.
913 * The buffer should contain the tuple and should be locked and pinned.
914 */
915static bool
917{
919 TransactionId xmin;
920
921 state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
922 if (state != HEAPTUPLE_LIVE)
923 return false; /* all-visible implies live */
924
925 /*
926 * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
927 * all-visible unless every tuple is hinted committed. However, those hint
928 * bits could be lost after a crash, so we can't be certain that they'll
929 * be set here. So just check the xmin.
930 */
931
932 xmin = HeapTupleHeaderGetXmin(tup->t_data);
933 if (!TransactionIdPrecedes(xmin, OldestXmin))
934 return false; /* xmin not old enough for all to see */
935
936 return true;
937}
938
939/*
940 * check_relation_relkind - convenience routine to check that relation
941 * is of the relkind supported by the callers
942 */
943static void
945{
946 if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
948 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
949 errmsg("relation \"%s\" is of wrong relation kind",
952}
int16 AttrNumber
Definition: attnum.h:21
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define MaxBlockNumber
Definition: block.h:35
static Datum values[MAXATTR]
Definition: bootstrap.c:151
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4161
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5303
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5320
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5537
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:751
@ BAS_BULKREAD
Definition: bufmgr.h:37
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:197
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:280
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:414
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:429
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
PageData * Page
Definition: bufpage.h:82
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
uint8_t uint8
Definition: c.h:500
int64_t int64
Definition: c.h:499
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:434
int32_t int32
Definition: c.h:498
uint32 TransactionId
Definition: c.h:623
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2260
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
Assert(PointerIsAligned(start, uint64))
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7766
HTSV_Result
Definition: heapam.h:123
@ HEAPTUPLE_LIVE
Definition: heapam.h:125
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:324
int a
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define AccessShareLock
Definition: lockdefs.h:36
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1182
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
@ LW_SHARED
Definition: lwlock.h:115
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:2167
void * palloc0(Size size)
Definition: mcxt.c:1970
void * palloc(Size size)
Definition: mcxt.c:1940
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
int errdetail_relkind_not_supported(char relkind)
Definition: pg_class.c:24
static BlockNumber collect_corrupt_items_read_stream_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
static corrupt_items * collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
Datum pg_visibility_map_summary(PG_FUNCTION_ARGS)
Datum pg_visibility_rel(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(pg_visibility_map)
struct corrupt_items corrupt_items
static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd)
static void check_relation_relkind(Relation rel)
static void record_corrupt_item(corrupt_items *items, ItemPointer tid)
static TransactionId GetStrictOldestNonRemovableTransactionId(Relation rel)
Datum pg_visibility_map(PG_FUNCTION_ARGS)
Definition: pg_visibility.c:84
struct vbits vbits
static vbits * collect_visibility_data(Oid relid, bool include_pd)
Datum pg_visibility_map_rel(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC_EXT(.name="pg_visibility",.version=PG_VERSION)
Datum pg_check_visible(PG_FUNCTION_ARGS)
Datum pg_check_frozen(PG_FUNCTION_ARGS)
Datum pg_visibility(PG_FUNCTION_ARGS)
static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
Datum pg_truncate_visibility_map(PG_FUNCTION_ARGS)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
static Datum BoolGetDatum(bool X)
Definition: postgres.h:107
unsigned int Oid
Definition: postgres_ext.h:30
#define DELAY_CHKPT_START
Definition: proc.h:120
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:121
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:2005
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2689
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:770
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:716
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:1055
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.c:162
#define READ_STREAM_USE_BATCHING
Definition: read_stream.h:64
#define READ_STREAM_FULL
Definition: read_stream.h:43
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:659
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:578
#define RelationGetRelationName(relation)
Definition: rel.h:550
#define RelationNeedsWAL(relation)
Definition: rel.h:639
ForkNumber
Definition: relpath.h:56
@ VISIBILITYMAP_FORKNUM
Definition: relpath.h:60
@ MAIN_FORKNUM
Definition: relpath.h:58
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:819
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *old_nblocks, BlockNumber *nblocks)
Definition: smgr.c:875
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
PGPROC * MyProc
Definition: proc.c:67
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
void * user_fctx
Definition: funcapi.h:82
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc tuple_desc
Definition: funcapi.h:112
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
int delayChkptFlags
Definition: proc.h:241
RelFileLocator rd_locator
Definition: rel.h:57
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId oldestDatabaseRunningXid
Definition: standby.h:93
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:47
FullTransactionId nextXid
Definition: transam.h:220
ItemPointer tids
Definition: pg_visibility.c:44
BlockNumber count
Definition: pg_visibility.c:43
BlockNumber next
Definition: pg_visibility.c:42
Definition: regguts.h:323
BlockNumber next
Definition: pg_visibility.c:35
uint8 bits[FLEXIBLE_ARRAY_MEMBER]
Definition: pg_visibility.c:37
BlockNumber count
Definition: pg_visibility.c:36
RelFileLocator rlocator
Definition: storage_xlog.h:49
BlockNumber blkno
Definition: storage_xlog.h:48
static ItemArray items
Definition: test_tidstore.c:48
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define XidFromFullTransactionId(x)
Definition: transam.h:48
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:175
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:835
TransamVariablesData * TransamVariables
Definition: varsup.c:34
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
const char * name
bool RecoveryInProgress(void)
Definition: xlog.c:6522
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2923
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:82