PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
heap_surgery.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * heap_surgery.c
4 * Functions to perform surgery on the damaged heap table.
5 *
6 * Copyright (c) 2020-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/pg_surgery/heap_surgery.c
10 *
11 *-------------------------------------------------------------------------
12 */
13#include "postgres.h"
14
15#include "access/htup_details.h"
16#include "access/relation.h"
18#include "access/xloginsert.h"
19#include "catalog/pg_am_d.h"
20#include "miscadmin.h"
21#include "storage/bufmgr.h"
22#include "utils/acl.h"
23#include "utils/array.h"
24#include "utils/rel.h"
25
27 .name = "pg_surgery",
28 .version = PG_VERSION
29);
30
31/* Options to forcefully change the state of a heap tuple. */
33{
37
40
41static int32 tidcmp(const void *a, const void *b);
43 HeapTupleForceOption heap_force_opt);
44static void sanity_check_tid_array(ArrayType *ta, int *ntids);
45static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
46 OffsetNumber *next_start_ptr);
47
48/*-------------------------------------------------------------------------
49 * heap_force_kill()
50 *
51 * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
52 * given TID array.
53 *
54 * Usage: SELECT heap_force_kill(regclass, tid[]);
55 *-------------------------------------------------------------------------
56 */
59{
61}
62
63/*-------------------------------------------------------------------------
64 * heap_force_freeze()
65 *
66 * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
67 * given TID array.
68 *
69 * Usage: SELECT heap_force_freeze(regclass, tid[]);
70 *-------------------------------------------------------------------------
71 */
74{
76}
77
78/*-------------------------------------------------------------------------
79 * heap_force_common()
80 *
81 * Common code for heap_force_kill and heap_force_freeze
82 *-------------------------------------------------------------------------
83 */
84static Datum
86{
87 Oid relid = PG_GETARG_OID(0);
89 ItemPointer tids;
90 int ntids,
91 nblocks;
92 Relation rel;
93 OffsetNumber curr_start_ptr,
94 next_start_ptr;
95 bool include_this_tid[MaxHeapTuplesPerPage];
96
99 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
100 errmsg("recovery is in progress"),
101 errhint("Heap surgery functions cannot be executed during recovery.")));
102
103 /* Check inputs. */
104 sanity_check_tid_array(ta, &ntids);
105
106 rel = relation_open(relid, RowExclusiveLock);
107
108 /*
109 * Check target relation.
110 */
111 if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
113 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
114 errmsg("cannot operate on relation \"%s\"",
117
118 if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
120 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
121 errmsg("only heap AM is supported")));
122
123 /* Must be owner of the table or superuser. */
124 if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), GetUserId()))
126 get_relkind_objtype(rel->rd_rel->relkind),
128
129 tids = ((ItemPointer) ARR_DATA_PTR(ta));
130
131 /*
132 * If there is more than one TID in the array, sort them so that we can
133 * easily fetch all the TIDs belonging to one particular page from the
134 * array.
135 */
136 if (ntids > 1)
137 qsort(tids, ntids, sizeof(ItemPointerData), tidcmp);
138
139 curr_start_ptr = next_start_ptr = 0;
140 nblocks = RelationGetNumberOfBlocks(rel);
141
142 /*
143 * Loop, performing the necessary actions for each block.
144 */
145 while (next_start_ptr != ntids)
146 {
147 Buffer buf;
148 Buffer vmbuf = InvalidBuffer;
149 Page page;
150 BlockNumber blkno;
151 OffsetNumber curoff;
152 OffsetNumber maxoffset;
153 int i;
154 bool did_modify_page = false;
155 bool did_modify_vm = false;
156
158
159 /*
160 * Find all the TIDs belonging to one particular page starting from
161 * next_start_ptr and process them one by one.
162 */
163 blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
164
165 /* Check whether the block number is valid. */
166 if (blkno >= nblocks)
167 {
168 /* Update the current_start_ptr before moving to the next page. */
169 curr_start_ptr = next_start_ptr;
170
172 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
173 errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
174 blkno, RelationGetRelationName(rel))));
175 continue;
176 }
177
178 buf = ReadBuffer(rel, blkno);
180
181 page = BufferGetPage(buf);
182
183 maxoffset = PageGetMaxOffsetNumber(page);
184
185 /*
186 * Figure out which TIDs we are going to process and which ones we are
187 * going to skip.
188 */
189 memset(include_this_tid, 0, sizeof(include_this_tid));
190 for (i = curr_start_ptr; i < next_start_ptr; i++)
191 {
193 ItemId itemid;
194
195 /* Check whether the offset number is valid. */
196 if (offno == InvalidOffsetNumber || offno > maxoffset)
197 {
199 errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
200 blkno, offno, RelationGetRelationName(rel)));
201 continue;
202 }
203
204 itemid = PageGetItemId(page, offno);
205
206 /* Only accept an item ID that is used. */
207 if (ItemIdIsRedirected(itemid))
208 {
210 errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
211 blkno, offno, RelationGetRelationName(rel),
212 ItemIdGetRedirect(itemid)));
213 continue;
214 }
215 else if (ItemIdIsDead(itemid))
216 {
218 (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
219 blkno, offno, RelationGetRelationName(rel))));
220 continue;
221 }
222 else if (!ItemIdIsUsed(itemid))
223 {
225 (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
226 blkno, offno, RelationGetRelationName(rel))));
227 continue;
228 }
229
230 /* Mark it for processing. */
232 include_this_tid[offno] = true;
233 }
234
235 /*
236 * Before entering the critical section, pin the visibility map page
237 * if it appears to be necessary.
238 */
239 if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
240 visibilitymap_pin(rel, blkno, &vmbuf);
241
242 /* No ereport(ERROR) from here until all the changes are logged. */
244
245 for (curoff = FirstOffsetNumber; curoff <= maxoffset;
246 curoff = OffsetNumberNext(curoff))
247 {
248 ItemId itemid;
249
250 if (!include_this_tid[curoff])
251 continue;
252
253 itemid = PageGetItemId(page, curoff);
254 Assert(ItemIdIsNormal(itemid));
255
256 did_modify_page = true;
257
258 if (heap_force_opt == HEAP_FORCE_KILL)
259 {
260 ItemIdSetDead(itemid);
261
262 /*
263 * If the page is marked all-visible, we must clear
264 * PD_ALL_VISIBLE flag on the page header and an all-visible
265 * bit on the visibility map corresponding to the page.
266 */
267 if (PageIsAllVisible(page))
268 {
270 visibilitymap_clear(rel, blkno, vmbuf,
272 did_modify_vm = true;
273 }
274 }
275 else
276 {
277 HeapTupleHeader htup;
278
279 Assert(heap_force_opt == HEAP_FORCE_FREEZE);
280
281 htup = (HeapTupleHeader) PageGetItem(page, itemid);
282
283 /*
284 * Reset all visibility-related fields of the tuple. This
285 * logic should mimic heap_execute_freeze_tuple(), but we
286 * choose to reset xmin and ctid just to be sure that no
287 * potentially-garbled data is left behind.
288 */
289 ItemPointerSet(&htup->t_ctid, blkno, curoff);
292 if (htup->t_infomask & HEAP_MOVED)
293 {
294 if (htup->t_infomask & HEAP_MOVED_OFF)
296 else
298 }
299
300 /*
301 * Clear all the visibility-related bits of this tuple and
302 * mark it as frozen. Also, get rid of HOT_UPDATED and
303 * KEYS_UPDATES bits.
304 */
305 htup->t_infomask &= ~HEAP_XACT_MASK;
307 htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
308 htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
309 }
310 }
311
312 /*
313 * If the page was modified, only then, we mark the buffer dirty or do
314 * the WAL logging.
315 */
316 if (did_modify_page)
317 {
318 /* Mark buffer dirty before we write WAL. */
320
321 /* XLOG stuff */
322 if (RelationNeedsWAL(rel))
323 log_newpage_buffer(buf, true);
324 }
325
326 /* WAL log the VM page if it was modified. */
327 if (did_modify_vm && RelationNeedsWAL(rel))
328 log_newpage_buffer(vmbuf, false);
329
331
333
334 if (vmbuf != InvalidBuffer)
335 ReleaseBuffer(vmbuf);
336
337 /* Update the current_start_ptr before moving to the next page. */
338 curr_start_ptr = next_start_ptr;
339 }
340
342
343 pfree(ta);
344
346}
347
348/*-------------------------------------------------------------------------
349 * tidcmp()
350 *
351 * Compare two item pointers, return -1, 0, or +1.
352 *
353 * See ItemPointerCompare for details.
354 * ------------------------------------------------------------------------
355 */
356static int32
357tidcmp(const void *a, const void *b)
358{
359 ItemPointer iptr1 = ((const ItemPointer) a);
360 ItemPointer iptr2 = ((const ItemPointer) b);
361
362 return ItemPointerCompare(iptr1, iptr2);
363}
364
365/*-------------------------------------------------------------------------
366 * sanity_check_tid_array()
367 *
368 * Perform sanity checks on the given tid array, and set *ntids to the
369 * number of items in the array.
370 * ------------------------------------------------------------------------
371 */
372static void
374{
375 if (ARR_HASNULL(ta) && array_contains_nulls(ta))
377 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
378 errmsg("array must not contain nulls")));
379
380 if (ARR_NDIM(ta) > 1)
382 (errcode(ERRCODE_DATA_EXCEPTION),
383 errmsg("argument must be empty or one-dimensional array")));
384
385 *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
386}
387
388/*-------------------------------------------------------------------------
389 * find_tids_one_page()
390 *
391 * Find all the tids residing in the same page as tids[next_start_ptr], and
392 * update next_start_ptr so that it points to the first tid in the next page.
393 *
394 * NOTE: The input tids[] array must be sorted.
395 * ------------------------------------------------------------------------
396 */
397static BlockNumber
398find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
399{
400 int i;
401 BlockNumber prev_blkno,
402 blkno;
403
404 prev_blkno = blkno = InvalidBlockNumber;
405
406 for (i = *next_start_ptr; i < ntids; i++)
407 {
408 ItemPointerData tid = tids[i];
409
411
412 if (i == *next_start_ptr)
413 prev_blkno = blkno;
414
415 if (prev_blkno != blkno)
416 break;
417 }
418
419 *next_start_ptr = i;
420 return prev_blkno;
421}
@ ACLCHECK_NOT_OWNER
Definition: acl.h:185
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:2639
bool object_ownercheck(Oid classid, Oid objectid, Oid roleid)
Definition: aclchk.c:4075
#define PG_GETARG_ARRAYTYPE_P_COPY(n)
Definition: array.h:264
#define ARR_NDIM(a)
Definition: array.h:290
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define ARR_DIMS(a)
Definition: array.h:294
#define ARR_HASNULL(a)
Definition: array.h:291
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3767
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5303
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5320
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2945
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5617
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:751
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:280
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:414
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:429
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
PageData * Page
Definition: bufpage.h:82
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
int32_t int32
Definition: c.h:498
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define NOTICE
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Assert(PointerIsAligned(start, uint64))
Datum heap_force_freeze(PG_FUNCTION_ARGS)
Definition: heap_surgery.c:73
PG_FUNCTION_INFO_V1(heap_force_kill)
Datum heap_force_kill(PG_FUNCTION_ARGS)
Definition: heap_surgery.c:58
static int32 tidcmp(const void *a, const void *b)
Definition: heap_surgery.c:357
static BlockNumber find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
Definition: heap_surgery.c:398
static Datum heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
Definition: heap_surgery.c:85
PG_MODULE_MAGIC_EXT(.name="pg_surgery",.version=PG_VERSION)
HeapTupleForceOption
Definition: heap_surgery.c:33
@ HEAP_FORCE_KILL
Definition: heap_surgery.c:34
@ HEAP_FORCE_FREEZE
Definition: heap_surgery.c:35
static void sanity_check_tid_array(ArrayType *ta, int *ntids)
Definition: heap_surgery.c:373
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
#define HEAP_MOVED
Definition: htup_details.h:213
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:451
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define MaxHeapTuplesPerPage
Definition: htup_details.h:624
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:331
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:383
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int i
Definition: isn.c:77
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static OffsetNumber ItemPointerGetOffsetNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:114
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
ItemPointerData * ItemPointer
Definition: itemptr.h:49
#define RowExclusiveLock
Definition: lockdefs.h:38
void pfree(void *pointer)
Definition: mcxt.c:2147
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
Oid GetUserId(void)
Definition: miscinit.c:520
ObjectType get_relkind_objtype(char relkind)
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
int errdetail_relkind_not_supported(char relkind)
Definition: pg_class.c:24
static char * buf
Definition: pg_test_fsync.c:72
#define qsort(a, b, c, d)
Definition: port.h:479
uintptr_t Datum
Definition: postgres.h:69
unsigned int Oid
Definition: postgres_ext.h:30
#define RelationGetRelid(relation)
Definition: rel.h:516
#define RelationGetRelationName(relation)
Definition: rel.h:550
#define RelationNeedsWAL(relation)
Definition: rel.h:639
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
ItemPointerData t_ctid
Definition: htup_details.h:161
Form_pg_class rd_rel
Definition: rel.h:111
#define FrozenTransactionId
Definition: transam.h:33
#define InvalidTransactionId
Definition: transam.h:31
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
const char * name
bool RecoveryInProgress(void)
Definition: xlog.c:6522
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237