PostgreSQL Source Code  git master
heap_surgery.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heap_surgery.c
4  * Functions to perform surgery on the damaged heap table.
5  *
6  * Copyright (c) 2020-2022, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * contrib/pg_surgery/heap_surgery.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/heapam.h"
16 #include "access/visibilitymap.h"
17 #include "access/xloginsert.h"
18 #include "catalog/pg_am_d.h"
19 #include "catalog/pg_proc_d.h"
20 #include "miscadmin.h"
21 #include "storage/bufmgr.h"
22 #include "utils/acl.h"
23 #include "utils/rel.h"
24 
26 
27 /* Options to forcefully change the state of a heap tuple. */
29 {
33 
36 
37 static int32 tidcmp(const void *a, const void *b);
39  HeapTupleForceOption heap_force_opt);
40 static void sanity_check_tid_array(ArrayType *ta, int *ntids);
41 static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
42  OffsetNumber *next_start_ptr);
43 
44 /*-------------------------------------------------------------------------
45  * heap_force_kill()
46  *
47  * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
48  * given TID array.
49  *
50  * Usage: SELECT heap_force_kill(regclass, tid[]);
51  *-------------------------------------------------------------------------
52  */
53 Datum
55 {
57 }
58 
59 /*-------------------------------------------------------------------------
60  * heap_force_freeze()
61  *
62  * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
63  * given TID array.
64  *
65  * Usage: SELECT heap_force_freeze(regclass, tid[]);
66  *-------------------------------------------------------------------------
67  */
68 Datum
70 {
72 }
73 
74 /*-------------------------------------------------------------------------
75  * heap_force_common()
76  *
77  * Common code for heap_force_kill and heap_force_freeze
78  *-------------------------------------------------------------------------
79  */
80 static Datum
82 {
83  Oid relid = PG_GETARG_OID(0);
85  ItemPointer tids;
86  int ntids,
87  nblocks;
88  Relation rel;
89  OffsetNumber curr_start_ptr,
90  next_start_ptr;
91  bool include_this_tid[MaxHeapTuplesPerPage];
92 
93  if (RecoveryInProgress())
94  ereport(ERROR,
95  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
96  errmsg("recovery is in progress"),
97  errhint("heap surgery functions cannot be executed during recovery.")));
98 
99  /* Check inputs. */
100  sanity_check_tid_array(ta, &ntids);
101 
102  rel = relation_open(relid, RowExclusiveLock);
103 
104  /*
105  * Check target relation.
106  */
107  if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
108  ereport(ERROR,
109  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
110  errmsg("cannot operate on relation \"%s\"",
112  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
113 
114  if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
115  ereport(ERROR,
116  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
117  errmsg("only heap AM is supported")));
118 
119  /* Must be owner of the table or superuser. */
122  get_relkind_objtype(rel->rd_rel->relkind),
124 
125  tids = ((ItemPointer) ARR_DATA_PTR(ta));
126 
127  /*
128  * If there is more than one TID in the array, sort them so that we can
129  * easily fetch all the TIDs belonging to one particular page from the
130  * array.
131  */
132  if (ntids > 1)
133  qsort((void *) tids, ntids, sizeof(ItemPointerData), tidcmp);
134 
135  curr_start_ptr = next_start_ptr = 0;
136  nblocks = RelationGetNumberOfBlocks(rel);
137 
138  /*
139  * Loop, performing the necessary actions for each block.
140  */
141  while (next_start_ptr != ntids)
142  {
143  Buffer buf;
144  Buffer vmbuf = InvalidBuffer;
145  Page page;
146  BlockNumber blkno;
147  OffsetNumber curoff;
148  OffsetNumber maxoffset;
149  int i;
150  bool did_modify_page = false;
151  bool did_modify_vm = false;
152 
154 
155  /*
156  * Find all the TIDs belonging to one particular page starting from
157  * next_start_ptr and process them one by one.
158  */
159  blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
160 
161  /* Check whether the block number is valid. */
162  if (blkno >= nblocks)
163  {
164  /* Update the current_start_ptr before moving to the next page. */
165  curr_start_ptr = next_start_ptr;
166 
167  ereport(NOTICE,
168  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
169  errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
170  blkno, RelationGetRelationName(rel))));
171  continue;
172  }
173 
174  buf = ReadBuffer(rel, blkno);
176 
177  page = BufferGetPage(buf);
178 
179  maxoffset = PageGetMaxOffsetNumber(page);
180 
181  /*
182  * Figure out which TIDs we are going to process and which ones we are
183  * going to skip.
184  */
185  memset(include_this_tid, 0, sizeof(include_this_tid));
186  for (i = curr_start_ptr; i < next_start_ptr; i++)
187  {
189  ItemId itemid;
190 
191  /* Check whether the offset number is valid. */
192  if (offno == InvalidOffsetNumber || offno > maxoffset)
193  {
194  ereport(NOTICE,
195  errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
196  blkno, offno, RelationGetRelationName(rel)));
197  continue;
198  }
199 
200  itemid = PageGetItemId(page, offno);
201 
202  /* Only accept an item ID that is used. */
203  if (ItemIdIsRedirected(itemid))
204  {
205  ereport(NOTICE,
206  errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
207  blkno, offno, RelationGetRelationName(rel),
208  ItemIdGetRedirect(itemid)));
209  continue;
210  }
211  else if (ItemIdIsDead(itemid))
212  {
213  ereport(NOTICE,
214  (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
215  blkno, offno, RelationGetRelationName(rel))));
216  continue;
217  }
218  else if (!ItemIdIsUsed(itemid))
219  {
220  ereport(NOTICE,
221  (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
222  blkno, offno, RelationGetRelationName(rel))));
223  continue;
224  }
225 
226  /* Mark it for processing. */
227  Assert(offno < MaxHeapTuplesPerPage);
228  include_this_tid[offno] = true;
229  }
230 
231  /*
232  * Before entering the critical section, pin the visibility map page
233  * if it appears to be necessary.
234  */
235  if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
236  visibilitymap_pin(rel, blkno, &vmbuf);
237 
238  /* No ereport(ERROR) from here until all the changes are logged. */
240 
241  for (curoff = FirstOffsetNumber; curoff <= maxoffset;
242  curoff = OffsetNumberNext(curoff))
243  {
244  ItemId itemid;
245 
246  if (!include_this_tid[curoff])
247  continue;
248 
249  itemid = PageGetItemId(page, curoff);
250  Assert(ItemIdIsNormal(itemid));
251 
252  did_modify_page = true;
253 
254  if (heap_force_opt == HEAP_FORCE_KILL)
255  {
256  ItemIdSetDead(itemid);
257 
258  /*
259  * If the page is marked all-visible, we must clear
260  * PD_ALL_VISIBLE flag on the page header and an all-visible
261  * bit on the visibility map corresponding to the page.
262  */
263  if (PageIsAllVisible(page))
264  {
265  PageClearAllVisible(page);
266  visibilitymap_clear(rel, blkno, vmbuf,
268  did_modify_vm = true;
269  }
270  }
271  else
272  {
273  HeapTupleHeader htup;
274 
275  Assert(heap_force_opt == HEAP_FORCE_FREEZE);
276 
277  htup = (HeapTupleHeader) PageGetItem(page, itemid);
278 
279  /*
280  * Reset all visibility-related fields of the tuple. This
281  * logic should mimic heap_execute_freeze_tuple(), but we
282  * choose to reset xmin and ctid just to be sure that no
283  * potentially-garbled data is left behind.
284  */
285  ItemPointerSet(&htup->t_ctid, blkno, curoff);
288  if (htup->t_infomask & HEAP_MOVED)
289  {
290  if (htup->t_infomask & HEAP_MOVED_OFF)
292  else
294  }
295 
296  /*
297  * Clear all the visibility-related bits of this tuple and
298  * mark it as frozen. Also, get rid of HOT_UPDATED and
299  * KEYS_UPDATES bits.
300  */
301  htup->t_infomask &= ~HEAP_XACT_MASK;
303  htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
304  htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
305  }
306  }
307 
308  /*
309  * If the page was modified, only then, we mark the buffer dirty or do
310  * the WAL logging.
311  */
312  if (did_modify_page)
313  {
314  /* Mark buffer dirty before we write WAL. */
316 
317  /* XLOG stuff */
318  if (RelationNeedsWAL(rel))
319  log_newpage_buffer(buf, true);
320  }
321 
322  /* WAL log the VM page if it was modified. */
323  if (did_modify_vm && RelationNeedsWAL(rel))
324  log_newpage_buffer(vmbuf, false);
325 
327 
329 
330  if (vmbuf != InvalidBuffer)
331  ReleaseBuffer(vmbuf);
332 
333  /* Update the current_start_ptr before moving to the next page. */
334  curr_start_ptr = next_start_ptr;
335  }
336 
338 
339  pfree(ta);
340 
341  PG_RETURN_VOID();
342 }
343 
344 /*-------------------------------------------------------------------------
345  * tidcmp()
346  *
347  * Compare two item pointers, return -1, 0, or +1.
348  *
349  * See ItemPointerCompare for details.
350  * ------------------------------------------------------------------------
351  */
352 static int32
353 tidcmp(const void *a, const void *b)
354 {
355  ItemPointer iptr1 = ((const ItemPointer) a);
356  ItemPointer iptr2 = ((const ItemPointer) b);
357 
358  return ItemPointerCompare(iptr1, iptr2);
359 }
360 
361 /*-------------------------------------------------------------------------
362  * sanity_check_tid_array()
363  *
364  * Perform sanity checks on the given tid array, and set *ntids to the
365  * number of items in the array.
366  * ------------------------------------------------------------------------
367  */
368 static void
370 {
371  if (ARR_HASNULL(ta) && array_contains_nulls(ta))
372  ereport(ERROR,
373  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
374  errmsg("array must not contain nulls")));
375 
376  if (ARR_NDIM(ta) > 1)
377  ereport(ERROR,
378  (errcode(ERRCODE_DATA_EXCEPTION),
379  errmsg("argument must be empty or one-dimensional array")));
380 
381  *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
382 }
383 
384 /*-------------------------------------------------------------------------
385  * find_tids_one_page()
386  *
387  * Find all the tids residing in the same page as tids[next_start_ptr], and
388  * update next_start_ptr so that it points to the first tid in the next page.
389  *
390  * NOTE: The input tids[] array must be sorted.
391  * ------------------------------------------------------------------------
392  */
393 static BlockNumber
394 find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
395 {
396  int i;
397  BlockNumber prev_blkno,
398  blkno;
399 
400  prev_blkno = blkno = InvalidBlockNumber;
401 
402  for (i = *next_start_ptr; i < ntids; i++)
403  {
404  ItemPointerData tid = tids[i];
405 
406  blkno = ItemPointerGetBlockNumberNoCheck(&tid);
407 
408  if (i == *next_start_ptr)
409  prev_blkno = blkno;
410 
411  if (prev_blkno != blkno)
412  break;
413  }
414 
415  *next_start_ptr = i;
416  return prev_blkno;
417 }
@ ACLCHECK_NOT_OWNER
Definition: acl.h:184
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:5171
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:3512
#define PG_GETARG_ARRAYTYPE_P_COPY(n)
Definition: array.h:257
#define ARR_NDIM(a)
Definition: array.h:283
#define ARR_DATA_PTR(a)
Definition: array.h:315
#define ARR_DIMS(a)
Definition: array.h:287
#define ARR_HASNULL(a)
Definition: array.h:284
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3558
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:76
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3915
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3938
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1573
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4213
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:702
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:216
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
Pointer Page
Definition: bufpage.h:78
#define PageIsAllVisible(page)
Definition: bufpage.h:384
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:356
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:234
#define PageGetItem(page, itemId)
Definition: bufpage.h:339
#define PageClearAllVisible(page)
Definition: bufpage.h:388
signed int int32
Definition: c.h:429
int errhint(const char *fmt,...)
Definition: elog.c:1151
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define NOTICE
Definition: elog.h:29
#define ereport(elevel,...)
Definition: elog.h:143
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Datum heap_force_freeze(PG_FUNCTION_ARGS)
Definition: heap_surgery.c:69
PG_FUNCTION_INFO_V1(heap_force_kill)
Datum heap_force_kill(PG_FUNCTION_ARGS)
Definition: heap_surgery.c:54
static int32 tidcmp(const void *a, const void *b)
Definition: heap_surgery.c:353
PG_MODULE_MAGIC
Definition: heap_surgery.c:25
static BlockNumber find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
Definition: heap_surgery.c:394
static Datum heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
Definition: heap_surgery.c:81
HeapTupleForceOption
Definition: heap_surgery.c:29
@ HEAP_FORCE_KILL
Definition: heap_surgery.c:30
@ HEAP_FORCE_FREEZE
Definition: heap_surgery.c:31
static void sanity_check_tid_array(ArrayType *ta, int *ntids)
Definition: heap_surgery.c:369
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_MOVED_OFF
Definition: htup_details.h:210
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:205
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:274
#define HEAP_HOT_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:314
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:375
#define HeapTupleHeaderSetXvac(tup, xid)
Definition: htup_details.h:418
#define HEAP_MOVED
Definition: htup_details.h:212
#define HEAP_XACT_MASK
Definition: htup_details.h:214
#define HEAP_XMAX_INVALID
Definition: htup_details.h:207
#define MaxHeapTuplesPerPage
Definition: htup_details.h:568
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
#define ItemPointerGetBlockNumberNoCheck(pointer)
Definition: itemptr.h:89
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
ItemPointerData * ItemPointer
Definition: itemptr.h:49
#define ItemPointerGetOffsetNumberNoCheck(pointer)
Definition: itemptr.h:108
Assert(fmt[strlen(fmt) - 1] !='\n')
#define RowExclusiveLock
Definition: lockdefs.h:38
void pfree(void *pointer)
Definition: mcxt.c:1175
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
Oid GetUserId(void)
Definition: miscinit.c:492
ObjectType get_relkind_objtype(char relkind)
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
int errdetail_relkind_not_supported(char relkind)
Definition: pg_class.c:24
static char * buf
Definition: pg_test_fsync.c:67
#define qsort(a, b, c, d)
Definition: port.h:495
uintptr_t Datum
Definition: postgres.h:411
unsigned int Oid
Definition: postgres_ext.h:31
#define RelationGetRelid(relation)
Definition: rel.h:489
#define RelationGetRelationName(relation)
Definition: rel.h:523
#define RelationNeedsWAL(relation)
Definition: rel.h:613
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:48
ItemPointerData t_ctid
Definition: htup_details.h:160
Form_pg_class rd_rel
Definition: rel.h:109
#define FrozenTransactionId
Definition: transam.h:33
#define InvalidTransactionId
Definition: transam.h:31
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
#define VISIBILITYMAP_VALID_BITS
bool RecoveryInProgress(void)
Definition: xlog.c:5753
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1177