PostgreSQL Source Code git master
pg_visibility.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_visibility.c
4 * display visibility map information and page-level visibility bits
5 *
6 * Copyright (c) 2016-2026, PostgreSQL Global Development Group
7 *
8 * contrib/pg_visibility/pg_visibility.c
9 *-------------------------------------------------------------------------
10 */
11#include "postgres.h"
12
13#include "access/heapam.h"
14#include "access/htup_details.h"
16#include "access/xloginsert.h"
17#include "catalog/pg_type.h"
19#include "funcapi.h"
20#include "miscadmin.h"
21#include "storage/bufmgr.h"
22#include "storage/proc.h"
23#include "storage/procarray.h"
24#include "storage/read_stream.h"
25#include "storage/smgr.h"
26#include "utils/rel.h"
27
29 .name = "pg_visibility",
30 .version = PG_VERSION
31);
32
33typedef struct vbits
34{
39
40typedef struct corrupt_items
41{
46
47/* for collect_corrupt_items_read_stream_next_block */
49{
56};
57
66
67static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
68static vbits *collect_visibility_data(Oid relid, bool include_pd);
69static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
70 bool all_frozen);
72static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
73 Buffer buffer);
74static void check_relation_relkind(Relation rel);
75
76/*
77 * Visibility map information for a single block of a relation.
78 *
79 * Note: the VM code will silently return zeroes for pages past the end
80 * of the map, so we allow probes up to MaxBlockNumber regardless of the
81 * actual relation size.
82 */
85{
86 Oid relid = PG_GETARG_OID(0);
87 int64 blkno = PG_GETARG_INT64(1);
88 int32 mapbits;
89 Relation rel;
90 Buffer vmbuffer = InvalidBuffer;
91 TupleDesc tupdesc;
92 Datum values[2];
93 bool nulls[2] = {0};
94
95 rel = relation_open(relid, AccessShareLock);
96
97 /* Only some relkinds have a visibility map */
99
100 if (blkno < 0 || blkno > MaxBlockNumber)
102 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
103 errmsg("invalid block number")));
104
105 tupdesc = pg_visibility_tupdesc(false, false);
106
107 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
108 if (vmbuffer != InvalidBuffer)
109 ReleaseBuffer(vmbuffer);
110 values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
111 values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
112
114
116}
117
118/*
119 * Visibility map information for a single block of a relation, plus the
120 * page-level information for the same block.
121 */
122Datum
124{
125 Oid relid = PG_GETARG_OID(0);
126 int64 blkno = PG_GETARG_INT64(1);
127 int32 mapbits;
128 Relation rel;
129 Buffer vmbuffer = InvalidBuffer;
130 Buffer buffer;
131 Page page;
132 TupleDesc tupdesc;
133 Datum values[3];
134 bool nulls[3] = {0};
135
136 rel = relation_open(relid, AccessShareLock);
137
138 /* Only some relkinds have a visibility map */
140
141 if (blkno < 0 || blkno > MaxBlockNumber)
143 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
144 errmsg("invalid block number")));
145
146 tupdesc = pg_visibility_tupdesc(false, true);
147
148 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
149 if (vmbuffer != InvalidBuffer)
150 ReleaseBuffer(vmbuffer);
151 values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
152 values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
153
154 /* Here we have to explicitly check rel size ... */
155 if (blkno < RelationGetNumberOfBlocks(rel))
156 {
157 buffer = ReadBuffer(rel, blkno);
159
160 page = BufferGetPage(buffer);
162
163 UnlockReleaseBuffer(buffer);
164 }
165 else
166 {
167 /* As with the vismap, silently return 0 for pages past EOF */
168 values[2] = BoolGetDatum(false);
169 }
170
172
174}
175
176/*
177 * Visibility map information for every block in a relation.
178 */
179Datum
181{
182 FuncCallContext *funcctx;
183 vbits *info;
184
185 if (SRF_IS_FIRSTCALL())
186 {
187 Oid relid = PG_GETARG_OID(0);
188 MemoryContext oldcontext;
189
190 funcctx = SRF_FIRSTCALL_INIT();
191 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
192 funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
193 /* collect_visibility_data will verify the relkind */
194 funcctx->user_fctx = collect_visibility_data(relid, false);
195 MemoryContextSwitchTo(oldcontext);
196 }
197
198 funcctx = SRF_PERCALL_SETUP();
199 info = (vbits *) funcctx->user_fctx;
200
201 if (info->next < info->count)
202 {
203 Datum values[3];
204 bool nulls[3] = {0};
205 HeapTuple tuple;
206
207 values[0] = Int64GetDatum(info->next);
208 values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
209 values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
210 info->next++;
211
212 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
213 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
214 }
215
216 SRF_RETURN_DONE(funcctx);
217}
218
219/*
220 * Visibility map information for every block in a relation, plus the page
221 * level information for each block.
222 */
223Datum
225{
226 FuncCallContext *funcctx;
227 vbits *info;
228
229 if (SRF_IS_FIRSTCALL())
230 {
231 Oid relid = PG_GETARG_OID(0);
232 MemoryContext oldcontext;
233
234 funcctx = SRF_FIRSTCALL_INIT();
235 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
236 funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
237 /* collect_visibility_data will verify the relkind */
238 funcctx->user_fctx = collect_visibility_data(relid, true);
239 MemoryContextSwitchTo(oldcontext);
240 }
241
242 funcctx = SRF_PERCALL_SETUP();
243 info = (vbits *) funcctx->user_fctx;
244
245 if (info->next < info->count)
246 {
247 Datum values[4];
248 bool nulls[4] = {0};
249 HeapTuple tuple;
250
251 values[0] = Int64GetDatum(info->next);
252 values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
253 values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
254 values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
255 info->next++;
256
257 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
258 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
259 }
260
261 SRF_RETURN_DONE(funcctx);
262}
263
264/*
265 * Count the number of all-visible and all-frozen pages in the visibility
266 * map for a particular relation.
267 */
268Datum
270{
271 Oid relid = PG_GETARG_OID(0);
272 Relation rel;
273 BlockNumber all_visible = 0;
274 BlockNumber all_frozen = 0;
275 TupleDesc tupdesc;
276 Datum values[2];
277 bool nulls[2] = {0};
278
279 rel = relation_open(relid, AccessShareLock);
280
281 /* Only some relkinds have a visibility map */
283
284 visibilitymap_count(rel, &all_visible, &all_frozen);
285
287
288 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
289 elog(ERROR, "return type must be a row type");
290
291 values[0] = Int64GetDatum((int64) all_visible);
292 values[1] = Int64GetDatum((int64) all_frozen);
293
295}
296
297/*
298 * Return the TIDs of non-frozen tuples present in pages marked all-frozen
299 * in the visibility map. We hope no one will ever find any, but there could
300 * be bugs, database corruption, etc.
301 */
302Datum
304{
305 FuncCallContext *funcctx;
307
308 if (SRF_IS_FIRSTCALL())
309 {
310 Oid relid = PG_GETARG_OID(0);
311 MemoryContext oldcontext;
312
313 funcctx = SRF_FIRSTCALL_INIT();
314 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
315 /* collect_corrupt_items will verify the relkind */
316 funcctx->user_fctx = collect_corrupt_items(relid, false, true);
317 MemoryContextSwitchTo(oldcontext);
318 }
319
320 funcctx = SRF_PERCALL_SETUP();
321 items = (corrupt_items *) funcctx->user_fctx;
322
323 if (items->next < items->count)
324 SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
325
326 SRF_RETURN_DONE(funcctx);
327}
328
329/*
330 * Return the TIDs of not-all-visible tuples in pages marked all-visible
331 * in the visibility map. We hope no one will ever find any, but there could
332 * be bugs, database corruption, etc.
333 */
334Datum
336{
337 FuncCallContext *funcctx;
339
340 if (SRF_IS_FIRSTCALL())
341 {
342 Oid relid = PG_GETARG_OID(0);
343 MemoryContext oldcontext;
344
345 funcctx = SRF_FIRSTCALL_INIT();
346 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
347 /* collect_corrupt_items will verify the relkind */
348 funcctx->user_fctx = collect_corrupt_items(relid, true, false);
349 MemoryContextSwitchTo(oldcontext);
350 }
351
352 funcctx = SRF_PERCALL_SETUP();
353 items = (corrupt_items *) funcctx->user_fctx;
354
355 if (items->next < items->count)
356 SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
357
358 SRF_RETURN_DONE(funcctx);
359}
360
361/*
362 * Remove the visibility map fork for a relation. If there turn out to be
363 * any bugs in the visibility map code that require rebuilding the VM, this
364 * provides users with a way to do it that is cleaner than shutting down the
365 * server and removing files by hand.
366 *
367 * This is a cut-down version of RelationTruncate.
368 */
369Datum
371{
372 Oid relid = PG_GETARG_OID(0);
373 Relation rel;
374 ForkNumber fork;
375 BlockNumber block;
376 BlockNumber old_block;
377
379
380 /* Only some relkinds have a visibility map */
382
383 /* Forcibly reset cached file size */
385
386 /* Compute new and old size before entering critical section. */
388 block = visibilitymap_prepare_truncate(rel, 0);
389 old_block = BlockNumberIsValid(block) ? smgrnblocks(RelationGetSmgr(rel), fork) : 0;
390
391 /*
392 * WAL-logging, buffer dropping, file truncation must be atomic and all on
393 * one side of a checkpoint. See RelationTruncate() for discussion.
394 */
398
399 if (RelationNeedsWAL(rel))
400 {
401 XLogRecPtr lsn;
402 xl_smgr_truncate xlrec;
403
404 xlrec.blkno = 0;
405 xlrec.rlocator = rel->rd_locator;
406 xlrec.flags = SMGR_TRUNCATE_VM;
407
409 XLogRegisterData(&xlrec, sizeof(xlrec));
410
411 lsn = XLogInsert(RM_SMGR_ID,
413 XLogFlush(lsn);
414 }
415
416 if (BlockNumberIsValid(block))
417 smgrtruncate(RelationGetSmgr(rel), &fork, 1, &old_block, &block);
418
421
422 /*
423 * Release the lock right away, not at commit time.
424 *
425 * It would be a problem to release the lock prior to commit if this
426 * truncate operation sends any transactional invalidation messages. Other
427 * backends would potentially be able to lock the relation without
428 * processing them in the window of time between when we release the lock
429 * here and when we sent the messages at our eventual commit. However,
430 * we're currently only sending a non-transactional smgr invalidation,
431 * which will have been posted to shared memory immediately from within
432 * smgr_truncate. Therefore, there should be no race here.
433 *
434 * The reason why it's desirable to release the lock early here is because
435 * of the possibility that someone will need to use this to blow away many
436 * visibility map forks at once. If we can't release the lock until
437 * commit time, the transaction doing this will accumulate
438 * AccessExclusiveLocks on all of those relations at the same time, which
439 * is undesirable. However, if this turns out to be unsafe we may have no
440 * choice...
441 */
443
444 /* Nothing to return. */
446}
447
448/*
449 * Helper function to construct whichever TupleDesc we need for a particular
450 * call.
451 */
452static TupleDesc
453pg_visibility_tupdesc(bool include_blkno, bool include_pd)
454{
455 TupleDesc tupdesc;
456 AttrNumber maxattr = 2;
457 AttrNumber a = 0;
458
459 if (include_blkno)
460 ++maxattr;
461 if (include_pd)
462 ++maxattr;
463 tupdesc = CreateTemplateTupleDesc(maxattr);
464 if (include_blkno)
465 TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
466 TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
467 TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
468 if (include_pd)
469 TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
470 Assert(a == maxattr);
471
472 return BlessTupleDesc(tupdesc);
473}
474
475/*
476 * Collect visibility data about a relation.
477 *
478 * Checks relkind of relid and will throw an error if the relation does not
479 * have a VM.
480 */
481static vbits *
482collect_visibility_data(Oid relid, bool include_pd)
483{
484 Relation rel;
485 BlockNumber nblocks;
486 vbits *info;
487 BlockNumber blkno;
488 Buffer vmbuffer = InvalidBuffer;
491 ReadStream *stream = NULL;
492
493 rel = relation_open(relid, AccessShareLock);
494
495 /* Only some relkinds have a visibility map */
497
498 nblocks = RelationGetNumberOfBlocks(rel);
499 info = palloc0(offsetof(vbits, bits) + nblocks);
500 info->next = 0;
501 info->count = nblocks;
502
503 /* Create a stream if reading main fork. */
504 if (include_pd)
505 {
506 p.current_blocknum = 0;
507 p.last_exclusive = nblocks;
508
509 /*
510 * It is safe to use batchmode as block_range_read_stream_cb takes no
511 * locks.
512 */
515 bstrategy,
516 rel,
519 &p,
520 0);
521 }
522
523 for (blkno = 0; blkno < nblocks; ++blkno)
524 {
525 int32 mapbits;
526
527 /* Make sure we are interruptible. */
529
530 /* Get map info. */
531 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
532 if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
533 info->bits[blkno] |= (1 << 0);
534 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
535 info->bits[blkno] |= (1 << 1);
536
537 /*
538 * Page-level data requires reading every block, so only get it if the
539 * caller needs it. Use a buffer access strategy, too, to prevent
540 * cache-trashing.
541 */
542 if (include_pd)
543 {
544 Buffer buffer;
545 Page page;
546
547 buffer = read_stream_next_buffer(stream, NULL);
549
550 page = BufferGetPage(buffer);
551 if (PageIsAllVisible(page))
552 info->bits[blkno] |= (1 << 2);
553
554 UnlockReleaseBuffer(buffer);
555 }
556 }
557
558 if (include_pd)
559 {
561 read_stream_end(stream);
562 }
563
564 /* Clean up. */
565 if (vmbuffer != InvalidBuffer)
566 ReleaseBuffer(vmbuffer);
568
569 return info;
570}
571
572/*
573 * The "strict" version of GetOldestNonRemovableTransactionId(). The
574 * pg_visibility check can tolerate false positives (don't report some of the
575 * errors), but can't tolerate false negatives (report false errors). Normally,
576 * horizons move forwards, but there are cases when it could move backward
577 * (see comment for ComputeXidHorizons()).
578 *
579 * This is why we have to implement our own function for xid horizon, which
580 * would be guaranteed to be newer or equal to any xid horizon computed before.
581 * We have to do the following to achieve this.
582 *
583 * 1. Ignore processes xmin's, because they consider connection to other
584 * databases that were ignored before.
585 * 2. Ignore KnownAssignedXids, as they are not database-aware. Although we
586 * now perform minimal checking on a standby by always using nextXid, this
587 * approach is better than nothing and will at least catch extremely broken
588 * cases where a xid is in the future.
589 * 3. Ignore walsender xmin, because it could go backward if some replication
590 * connections don't use replication slots.
591 *
592 * While it might seem like we could use KnownAssignedXids for shared
593 * catalogs, since shared catalogs rely on a global horizon rather than a
594 * database-specific one - there are potential edge cases. For example, a
595 * transaction may crash on the primary without writing a commit/abort record.
596 * This would lead to a situation where it appears to still be running on the
597 * standby, even though it has already ended on the primary. For this reason,
598 * it's safer to ignore KnownAssignedXids, even for shared catalogs.
599 *
600 * As a result, we're using only currently running xids to compute the horizon.
601 * Surely these would significantly sacrifice accuracy. But we have to do so
602 * to avoid reporting false errors.
603 */
604static TransactionId
606{
607 RunningTransactions runningTransactions;
608
609 if (RecoveryInProgress())
610 {
611 TransactionId result;
612
613 /* As we ignore KnownAssignedXids on standby, just pick nextXid */
614 LWLockAcquire(XidGenLock, LW_SHARED);
616 LWLockRelease(XidGenLock);
617 return result;
618 }
619 else if (rel == NULL || rel->rd_rel->relisshared)
620 {
621 /* Shared relation: take into account all running xids */
622 runningTransactions = GetRunningTransactionData();
623 LWLockRelease(ProcArrayLock);
624 LWLockRelease(XidGenLock);
625 return runningTransactions->oldestRunningXid;
626 }
627 else if (!RELATION_IS_LOCAL(rel))
628 {
629 /*
630 * Normal relation: take into account xids running within the current
631 * database
632 */
633 runningTransactions = GetRunningTransactionData();
634 LWLockRelease(ProcArrayLock);
635 LWLockRelease(XidGenLock);
636 return runningTransactions->oldestDatabaseRunningXid;
637 }
638 else
639 {
640 /*
641 * For temporary relations, ComputeXidHorizons() uses only
642 * TransamVariables->latestCompletedXid and MyProc->xid. These two
643 * shouldn't go backwards. So we're fine with this horizon.
644 */
646 }
647}
648
649/*
650 * Callback function to get next block for read stream object used in
651 * collect_corrupt_items() function.
652 */
653static BlockNumber
655 void *callback_private_data,
656 void *per_buffer_data)
657{
658 struct collect_corrupt_items_read_stream_private *p = callback_private_data;
659
661 {
662 bool check_frozen = false;
663 bool check_visible = false;
664
665 /* Make sure we are interruptible. */
667
669 check_frozen = true;
671 check_visible = true;
672 if (!check_visible && !check_frozen)
673 continue;
674
675 return p->current_blocknum++;
676 }
677
678 return InvalidBlockNumber;
679}
680
681/*
682 * Returns a list of items whose visibility map information does not match
683 * the status of the tuples on the page.
684 *
685 * If all_visible is passed as true, this will include all items which are
686 * on pages marked as all-visible in the visibility map but which do not
687 * seem to in fact be all-visible.
688 *
689 * If all_frozen is passed as true, this will include all items which are
690 * on pages marked as all-frozen but which do not seem to in fact be frozen.
691 *
692 * Checks relkind of relid and will throw an error if the relation does not
693 * have a VM.
694 */
695static corrupt_items *
697{
704 ReadStream *stream;
705 Buffer buffer;
706
708
709 /* Only some relkinds have a visibility map */
711
712 if (all_visible)
714
715 /*
716 * Guess an initial array size. We don't expect many corrupted tuples, so
717 * start with a small array. This function uses the "next" field to track
718 * the next offset where we can store an item (which is the same thing as
719 * the number of items found so far) and the "count" field to track the
720 * number of entries allocated. We'll repurpose these fields before
721 * returning.
722 */
724 items->next = 0;
725 items->count = 64;
726 items->tids = palloc(items->count * sizeof(ItemPointerData));
727
728 p.current_blocknum = 0;
730 p.rel = rel;
735 bstrategy,
736 rel,
739 &p,
740 0);
741
742 /* Loop over every block in the relation. */
743 while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
744 {
745 bool check_frozen = all_frozen;
746 bool check_visible = all_visible;
747 Page page;
748 OffsetNumber offnum,
749 maxoff;
750 BlockNumber blkno;
751
752 /* Make sure we are interruptible. */
754
756
757 page = BufferGetPage(buffer);
758 maxoff = PageGetMaxOffsetNumber(page);
759 blkno = BufferGetBlockNumber(buffer);
760
761 /*
762 * The visibility map bits might have changed while we were acquiring
763 * the page lock. Recheck to avoid returning spurious results.
764 */
765 if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
766 check_frozen = false;
767 if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
768 check_visible = false;
769 if (!check_visible && !check_frozen)
770 {
771 UnlockReleaseBuffer(buffer);
772 continue;
773 }
774
775 /* Iterate over each tuple on the page. */
776 for (offnum = FirstOffsetNumber;
777 offnum <= maxoff;
778 offnum = OffsetNumberNext(offnum))
779 {
780 HeapTupleData tuple;
781 ItemId itemid;
782
783 itemid = PageGetItemId(page, offnum);
784
785 /* Unused or redirect line pointers are of no interest. */
786 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
787 continue;
788
789 /* Dead line pointers are neither all-visible nor frozen. */
790 if (ItemIdIsDead(itemid))
791 {
792 ItemPointerSet(&(tuple.t_self), blkno, offnum);
794 continue;
795 }
796
797 /* Initialize a HeapTupleData structure for checks below. */
798 ItemPointerSet(&(tuple.t_self), blkno, offnum);
799 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
800 tuple.t_len = ItemIdGetLength(itemid);
801 tuple.t_tableOid = relid;
802
803 /*
804 * If we're checking whether the page is all-visible, we expect
805 * the tuple to be all-visible.
806 */
807 if (check_visible &&
808 !tuple_all_visible(&tuple, OldestXmin, buffer))
809 {
810 TransactionId RecomputedOldestXmin;
811
812 /*
813 * Time has passed since we computed OldestXmin, so it's
814 * possible that this tuple is all-visible in reality even
815 * though it doesn't appear so based on our
816 * previously-computed value. Let's compute a new value so we
817 * can be certain whether there is a problem.
818 *
819 * From a concurrency point of view, it sort of sucks to
820 * retake ProcArrayLock here while we're holding the buffer
821 * locked in shared mode, but it should be safe against
822 * deadlocks, because surely
823 * GetStrictOldestNonRemovableTransactionId() should never
824 * take a buffer lock. And this shouldn't happen often, so
825 * it's worth being careful so as to avoid false positives.
826 */
827 RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
828
829 if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
831 else
832 {
833 OldestXmin = RecomputedOldestXmin;
834 if (!tuple_all_visible(&tuple, OldestXmin, buffer))
836 }
837 }
838
839 /*
840 * If we're checking whether the page is all-frozen, we expect the
841 * tuple to be in a state where it will never need freezing.
842 */
843 if (check_frozen)
844 {
847 }
848 }
849
850 UnlockReleaseBuffer(buffer);
851 }
852 read_stream_end(stream);
853
854 /* Clean up. */
855 if (vmbuffer != InvalidBuffer)
857 if (p.vmbuffer != InvalidBuffer)
860
861 /*
862 * Before returning, repurpose the fields to match caller's expectations.
863 * next is now the next item that should be read (rather than written) and
864 * count is now the number of items we wrote (rather than the number we
865 * allocated).
866 */
867 items->count = items->next;
868 items->next = 0;
869
870 return items;
871}
872
873/*
874 * Remember one corrupt item.
875 */
876static void
878{
879 /* enlarge output array if needed. */
880 if (items->next >= items->count)
881 {
882 items->count *= 2;
883 items->tids = repalloc(items->tids,
884 items->count * sizeof(ItemPointerData));
885 }
886 /* and add the new item */
887 items->tids[items->next++] = *tid;
888}
889
890/*
891 * Check whether a tuple is all-visible relative to a given OldestXmin value.
892 * The buffer should contain the tuple and should be locked and pinned.
893 */
894static bool
896{
898 TransactionId xmin;
899
900 state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
901 if (state != HEAPTUPLE_LIVE)
902 return false; /* all-visible implies live */
903
904 /*
905 * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
906 * all-visible unless every tuple is hinted committed. However, those hint
907 * bits could be lost after a crash, so we can't be certain that they'll
908 * be set here. So just check the xmin.
909 */
910
911 xmin = HeapTupleHeaderGetXmin(tup->t_data);
912 if (!TransactionIdPrecedes(xmin, OldestXmin))
913 return false; /* xmin not old enough for all to see */
914
915 return true;
916}
917
918/*
919 * check_relation_relkind - convenience routine to check that relation
920 * is of the relkind supported by the callers
921 */
922static void
924{
925 if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
927 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
928 errmsg("relation \"%s\" is of wrong relation kind",
931}
int16 AttrNumber
Definition: attnum.h:21
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
#define MaxBlockNumber
Definition: block.h:35
static Datum values[MAXATTR]
Definition: bootstrap.c:155
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4318
void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:5699
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5461
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5478
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:839
@ BAS_BULKREAD
Definition: bufmgr.h:37
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:294
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
@ BUFFER_LOCK_SHARE
Definition: bufmgr.h:206
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:428
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:353
PageData * Page
Definition: bufpage.h:81
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:371
uint8_t uint8
Definition: c.h:550
int64_t int64
Definition: c.h:549
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:486
int32_t int32
Definition: c.h:548
uint32 TransactionId
Definition: c.h:672
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2260
#define palloc0_object(type)
Definition: fe_memutils.h:75
#define PG_RETURN_VOID()
Definition: fmgr.h:350
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:284
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:354
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:461
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
Assert(PointerIsAligned(start, uint64))
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7855
HTSV_Result
Definition: heapam.h:125
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:324
int a
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define AccessShareLock
Definition: lockdefs.h:36
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1178
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1898
@ LW_SHARED
Definition: lwlock.h:113
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1632
void * palloc0(Size size)
Definition: mcxt.c:1417
void * palloc(Size size)
Definition: mcxt.c:1387
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
int errdetail_relkind_not_supported(char relkind)
Definition: pg_class.c:24
static BlockNumber collect_corrupt_items_read_stream_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
static corrupt_items * collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
Datum pg_visibility_map_summary(PG_FUNCTION_ARGS)
Datum pg_visibility_rel(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(pg_visibility_map)
struct corrupt_items corrupt_items
static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd)
static void check_relation_relkind(Relation rel)
static void record_corrupt_item(corrupt_items *items, ItemPointer tid)
static TransactionId GetStrictOldestNonRemovableTransactionId(Relation rel)
Datum pg_visibility_map(PG_FUNCTION_ARGS)
Definition: pg_visibility.c:84
struct vbits vbits
static vbits * collect_visibility_data(Oid relid, bool include_pd)
Datum pg_visibility_map_rel(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC_EXT(.name="pg_visibility",.version=PG_VERSION)
Datum pg_check_visible(PG_FUNCTION_ARGS)
Datum pg_check_frozen(PG_FUNCTION_ARGS)
Datum pg_visibility(PG_FUNCTION_ARGS)
static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
Datum pg_truncate_visibility_map(PG_FUNCTION_ARGS)
static Datum Int64GetDatum(int64 X)
Definition: postgres.h:423
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:352
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
uint64_t Datum
Definition: postgres.h:70
unsigned int Oid
Definition: postgres_ext.h:32
#define DELAY_CHKPT_START
Definition: proc.h:135
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:136
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1955
RunningTransactions GetRunningTransactionData(void)
Definition: procarray.c:2639
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:791
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:737
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:1089
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.c:162
#define READ_STREAM_USE_BATCHING
Definition: read_stream.h:64
#define READ_STREAM_FULL
Definition: read_stream.h:43
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:658
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:577
#define RelationGetRelationName(relation)
Definition: rel.h:549
#define RelationNeedsWAL(relation)
Definition: rel.h:638
ForkNumber
Definition: relpath.h:56
@ VISIBILITYMAP_FORKNUM
Definition: relpath.h:60
@ MAIN_FORKNUM
Definition: relpath.h:58
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:819
void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *old_nblocks, BlockNumber *nblocks)
Definition: smgr.c:875
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
PGPROC * MyProc
Definition: proc.c:67
#define SMGR_TRUNCATE_VM
Definition: storage_xlog.h:41
#define XLOG_SMGR_TRUNCATE
Definition: storage_xlog.h:31
void * user_fctx
Definition: funcapi.h:82
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc tuple_desc
Definition: funcapi.h:112
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
int delayChkptFlags
Definition: proc.h:257
RelFileLocator rd_locator
Definition: rel.h:57
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId oldestDatabaseRunningXid
Definition: standby.h:93
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:47
FullTransactionId nextXid
Definition: transam.h:220
ItemPointer tids
Definition: pg_visibility.c:44
BlockNumber count
Definition: pg_visibility.c:43
BlockNumber next
Definition: pg_visibility.c:42
Definition: regguts.h:323
BlockNumber next
Definition: pg_visibility.c:35
uint8 bits[FLEXIBLE_ARRAY_MEMBER]
Definition: pg_visibility.c:37
BlockNumber count
Definition: pg_visibility.c:36
RelFileLocator rlocator
Definition: storage_xlog.h:49
BlockNumber blkno
Definition: storage_xlog.h:48
static ItemArray items
Definition: test_tidstore.c:48
#define InvalidTransactionId
Definition: transam.h:31
#define XidFromFullTransactionId(x)
Definition: transam.h:48
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.h:263
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:182
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:842
TransamVariablesData * TransamVariables
Definition: varsup.c:34
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
BlockNumber visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:25
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:27
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
const char * name
bool RecoveryInProgress(void)
Definition: xlog.c:6461
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2784
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:368
void XLogBeginInsert(void)
Definition: xloginsert.c:152
#define XLR_SPECIAL_REL_UPDATE
Definition: xlogrecord.h:82