PostgreSQL Source Code  git master
heap_surgery.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heap_surgery.c
4  * Functions to perform surgery on the damaged heap table.
5  *
6  * Copyright (c) 2020-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * contrib/pg_surgery/heap_surgery.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/heapam.h"
16 #include "access/visibilitymap.h"
17 #include "access/xloginsert.h"
18 #include "catalog/pg_am_d.h"
19 #include "catalog/pg_proc_d.h"
20 #include "miscadmin.h"
21 #include "storage/bufmgr.h"
22 #include "utils/acl.h"
23 #include "utils/array.h"
24 #include "utils/rel.h"
25 
27 
28 /* Options to forcefully change the state of a heap tuple. */
30 {
34 
37 
38 static int32 tidcmp(const void *a, const void *b);
40  HeapTupleForceOption heap_force_opt);
41 static void sanity_check_tid_array(ArrayType *ta, int *ntids);
42 static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
43  OffsetNumber *next_start_ptr);
44 
45 /*-------------------------------------------------------------------------
46  * heap_force_kill()
47  *
48  * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
49  * given TID array.
50  *
51  * Usage: SELECT heap_force_kill(regclass, tid[]);
52  *-------------------------------------------------------------------------
53  */
54 Datum
56 {
58 }
59 
60 /*-------------------------------------------------------------------------
61  * heap_force_freeze()
62  *
63  * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
64  * given TID array.
65  *
66  * Usage: SELECT heap_force_freeze(regclass, tid[]);
67  *-------------------------------------------------------------------------
68  */
69 Datum
71 {
73 }
74 
75 /*-------------------------------------------------------------------------
76  * heap_force_common()
77  *
78  * Common code for heap_force_kill and heap_force_freeze
79  *-------------------------------------------------------------------------
80  */
81 static Datum
83 {
84  Oid relid = PG_GETARG_OID(0);
86  ItemPointer tids;
87  int ntids,
88  nblocks;
89  Relation rel;
90  OffsetNumber curr_start_ptr,
91  next_start_ptr;
92  bool include_this_tid[MaxHeapTuplesPerPage];
93 
94  if (RecoveryInProgress())
95  ereport(ERROR,
96  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
97  errmsg("recovery is in progress"),
98  errhint("Heap surgery functions cannot be executed during recovery.")));
99 
100  /* Check inputs. */
101  sanity_check_tid_array(ta, &ntids);
102 
103  rel = relation_open(relid, RowExclusiveLock);
104 
105  /*
106  * Check target relation.
107  */
108  if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
109  ereport(ERROR,
110  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
111  errmsg("cannot operate on relation \"%s\"",
113  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
114 
115  if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
116  ereport(ERROR,
117  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
118  errmsg("only heap AM is supported")));
119 
120  /* Must be owner of the table or superuser. */
121  if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), GetUserId()))
123  get_relkind_objtype(rel->rd_rel->relkind),
125 
126  tids = ((ItemPointer) ARR_DATA_PTR(ta));
127 
128  /*
129  * If there is more than one TID in the array, sort them so that we can
130  * easily fetch all the TIDs belonging to one particular page from the
131  * array.
132  */
133  if (ntids > 1)
134  qsort(tids, ntids, sizeof(ItemPointerData), tidcmp);
135 
136  curr_start_ptr = next_start_ptr = 0;
137  nblocks = RelationGetNumberOfBlocks(rel);
138 
139  /*
140  * Loop, performing the necessary actions for each block.
141  */
142  while (next_start_ptr != ntids)
143  {
144  Buffer buf;
145  Buffer vmbuf = InvalidBuffer;
146  Page page;
147  BlockNumber blkno;
148  OffsetNumber curoff;
149  OffsetNumber maxoffset;
150  int i;
151  bool did_modify_page = false;
152  bool did_modify_vm = false;
153 
155 
156  /*
157  * Find all the TIDs belonging to one particular page starting from
158  * next_start_ptr and process them one by one.
159  */
160  blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
161 
162  /* Check whether the block number is valid. */
163  if (blkno >= nblocks)
164  {
165  /* Update the current_start_ptr before moving to the next page. */
166  curr_start_ptr = next_start_ptr;
167 
168  ereport(NOTICE,
169  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
170  errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
171  blkno, RelationGetRelationName(rel))));
172  continue;
173  }
174 
175  buf = ReadBuffer(rel, blkno);
177 
178  page = BufferGetPage(buf);
179 
180  maxoffset = PageGetMaxOffsetNumber(page);
181 
182  /*
183  * Figure out which TIDs we are going to process and which ones we are
184  * going to skip.
185  */
186  memset(include_this_tid, 0, sizeof(include_this_tid));
187  for (i = curr_start_ptr; i < next_start_ptr; i++)
188  {
190  ItemId itemid;
191 
192  /* Check whether the offset number is valid. */
193  if (offno == InvalidOffsetNumber || offno > maxoffset)
194  {
195  ereport(NOTICE,
196  errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
197  blkno, offno, RelationGetRelationName(rel)));
198  continue;
199  }
200 
201  itemid = PageGetItemId(page, offno);
202 
203  /* Only accept an item ID that is used. */
204  if (ItemIdIsRedirected(itemid))
205  {
206  ereport(NOTICE,
207  errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
208  blkno, offno, RelationGetRelationName(rel),
209  ItemIdGetRedirect(itemid)));
210  continue;
211  }
212  else if (ItemIdIsDead(itemid))
213  {
214  ereport(NOTICE,
215  (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
216  blkno, offno, RelationGetRelationName(rel))));
217  continue;
218  }
219  else if (!ItemIdIsUsed(itemid))
220  {
221  ereport(NOTICE,
222  (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
223  blkno, offno, RelationGetRelationName(rel))));
224  continue;
225  }
226 
227  /* Mark it for processing. */
228  Assert(offno < MaxHeapTuplesPerPage);
229  include_this_tid[offno] = true;
230  }
231 
232  /*
233  * Before entering the critical section, pin the visibility map page
234  * if it appears to be necessary.
235  */
236  if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
237  visibilitymap_pin(rel, blkno, &vmbuf);
238 
239  /* No ereport(ERROR) from here until all the changes are logged. */
241 
242  for (curoff = FirstOffsetNumber; curoff <= maxoffset;
243  curoff = OffsetNumberNext(curoff))
244  {
245  ItemId itemid;
246 
247  if (!include_this_tid[curoff])
248  continue;
249 
250  itemid = PageGetItemId(page, curoff);
251  Assert(ItemIdIsNormal(itemid));
252 
253  did_modify_page = true;
254 
255  if (heap_force_opt == HEAP_FORCE_KILL)
256  {
257  ItemIdSetDead(itemid);
258 
259  /*
260  * If the page is marked all-visible, we must clear
261  * PD_ALL_VISIBLE flag on the page header and an all-visible
262  * bit on the visibility map corresponding to the page.
263  */
264  if (PageIsAllVisible(page))
265  {
266  PageClearAllVisible(page);
267  visibilitymap_clear(rel, blkno, vmbuf,
269  did_modify_vm = true;
270  }
271  }
272  else
273  {
274  HeapTupleHeader htup;
275 
276  Assert(heap_force_opt == HEAP_FORCE_FREEZE);
277 
278  htup = (HeapTupleHeader) PageGetItem(page, itemid);
279 
280  /*
281  * Reset all visibility-related fields of the tuple. This
282  * logic should mimic heap_execute_freeze_tuple(), but we
283  * choose to reset xmin and ctid just to be sure that no
284  * potentially-garbled data is left behind.
285  */
286  ItemPointerSet(&htup->t_ctid, blkno, curoff);
289  if (htup->t_infomask & HEAP_MOVED)
290  {
291  if (htup->t_infomask & HEAP_MOVED_OFF)
293  else
295  }
296 
297  /*
298  * Clear all the visibility-related bits of this tuple and
299  * mark it as frozen. Also, get rid of HOT_UPDATED and
300  * KEYS_UPDATES bits.
301  */
302  htup->t_infomask &= ~HEAP_XACT_MASK;
304  htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
305  htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
306  }
307  }
308 
309  /*
310  * If the page was modified, only then, we mark the buffer dirty or do
311  * the WAL logging.
312  */
313  if (did_modify_page)
314  {
315  /* Mark buffer dirty before we write WAL. */
317 
318  /* XLOG stuff */
319  if (RelationNeedsWAL(rel))
320  log_newpage_buffer(buf, true);
321  }
322 
323  /* WAL log the VM page if it was modified. */
324  if (did_modify_vm && RelationNeedsWAL(rel))
325  log_newpage_buffer(vmbuf, false);
326 
328 
330 
331  if (vmbuf != InvalidBuffer)
332  ReleaseBuffer(vmbuf);
333 
334  /* Update the current_start_ptr before moving to the next page. */
335  curr_start_ptr = next_start_ptr;
336  }
337 
339 
340  pfree(ta);
341 
342  PG_RETURN_VOID();
343 }
344 
345 /*-------------------------------------------------------------------------
346  * tidcmp()
347  *
348  * Compare two item pointers, return -1, 0, or +1.
349  *
350  * See ItemPointerCompare for details.
351  * ------------------------------------------------------------------------
352  */
353 static int32
354 tidcmp(const void *a, const void *b)
355 {
356  ItemPointer iptr1 = ((const ItemPointer) a);
357  ItemPointer iptr2 = ((const ItemPointer) b);
358 
359  return ItemPointerCompare(iptr1, iptr2);
360 }
361 
362 /*-------------------------------------------------------------------------
363  * sanity_check_tid_array()
364  *
365  * Perform sanity checks on the given tid array, and set *ntids to the
366  * number of items in the array.
367  * ------------------------------------------------------------------------
368  */
369 static void
371 {
372  if (ARR_HASNULL(ta) && array_contains_nulls(ta))
373  ereport(ERROR,
374  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
375  errmsg("array must not contain nulls")));
376 
377  if (ARR_NDIM(ta) > 1)
378  ereport(ERROR,
379  (errcode(ERRCODE_DATA_EXCEPTION),
380  errmsg("argument must be empty or one-dimensional array")));
381 
382  *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
383 }
384 
385 /*-------------------------------------------------------------------------
386  * find_tids_one_page()
387  *
388  * Find all the tids residing in the same page as tids[next_start_ptr], and
389  * update next_start_ptr so that it points to the first tid in the next page.
390  *
391  * NOTE: The input tids[] array must be sorted.
392  * ------------------------------------------------------------------------
393  */
394 static BlockNumber
395 find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
396 {
397  int i;
398  BlockNumber prev_blkno,
399  blkno;
400 
401  prev_blkno = blkno = InvalidBlockNumber;
402 
403  for (i = *next_start_ptr; i < ntids; i++)
404  {
405  ItemPointerData tid = tids[i];
406 
407  blkno = ItemPointerGetBlockNumberNoCheck(&tid);
408 
409  if (i == *next_start_ptr)
410  prev_blkno = blkno;
411 
412  if (prev_blkno != blkno)
413  break;
414  }
415 
416  *next_start_ptr = i;
417  return prev_blkno;
418 }
@ ACLCHECK_NOT_OWNER
Definition: acl.h:185
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:2703
bool object_ownercheck(Oid classid, Oid objectid, Oid roleid)
Definition: aclchk.c:4145
#define PG_GETARG_ARRAYTYPE_P_COPY(n)
Definition: array.h:264
#define ARR_NDIM(a)
Definition: array.h:290
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define ARR_DIMS(a)
Definition: array.h:294
#define ARR_HASNULL(a)
Definition: array.h:291
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3755
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5238
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
signed int int32
Definition: c.h:497
#define Assert(condition)
Definition: c.h:861
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define NOTICE
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Datum heap_force_freeze(PG_FUNCTION_ARGS)
Definition: heap_surgery.c:70
PG_FUNCTION_INFO_V1(heap_force_kill)
Datum heap_force_kill(PG_FUNCTION_ARGS)
Definition: heap_surgery.c:55
static int32 tidcmp(const void *a, const void *b)
Definition: heap_surgery.c:354
PG_MODULE_MAGIC
Definition: heap_surgery.c:26
static BlockNumber find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
Definition: heap_surgery.c:395
static Datum heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
Definition: heap_surgery.c:82
HeapTupleForceOption
Definition: heap_surgery.c:30
@ HEAP_FORCE_KILL
Definition: heap_surgery.c:31
@ HEAP_FORCE_FREEZE
Definition: heap_surgery.c:32
static void sanity_check_tid_array(ArrayType *ta, int *ntids)
Definition: heap_surgery.c:370
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HEAP_HOT_UPDATED
Definition: htup_details.h:276
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderSetXvac(tup, xid)
Definition: htup_details.h:419
#define HEAP_MOVED
Definition: htup_details.h:213
#define HEAP_XACT_MASK
Definition: htup_details.h:215
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static OffsetNumber ItemPointerGetOffsetNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:114
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
ItemPointerData * ItemPointer
Definition: itemptr.h:49
#define RowExclusiveLock
Definition: lockdefs.h:38
void pfree(void *pointer)
Definition: mcxt.c:1521
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
Oid GetUserId(void)
Definition: miscinit.c:514
ObjectType get_relkind_objtype(char relkind)
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
int errdetail_relkind_not_supported(char relkind)
Definition: pg_class.c:24
static char * buf
Definition: pg_test_fsync.c:73
#define qsort(a, b, c, d)
Definition: port.h:447
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationNeedsWAL(relation)
Definition: rel.h:628
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
ItemPointerData t_ctid
Definition: htup_details.h:161
Form_pg_class rd_rel
Definition: rel.h:111
#define FrozenTransactionId
Definition: transam.h:33
#define InvalidTransactionId
Definition: transam.h:31
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool RecoveryInProgress(void)
Definition: xlog.c:6333
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237