PostgreSQL Source Code  git master
heap_surgery.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heap_surgery.c
4  * Functions to perform surgery on the damaged heap table.
5  *
6  * Copyright (c) 2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * contrib/pg_surgery/heap_surgery.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/heapam.h"
16 #include "access/visibilitymap.h"
17 #include "catalog/pg_am_d.h"
18 #include "catalog/pg_proc_d.h"
19 #include "miscadmin.h"
20 #include "storage/bufmgr.h"
21 #include "utils/acl.h"
22 #include "utils/rel.h"
23 
25 
26 /* Options to forcefully change the state of a heap tuple. */
28 {
32 
35 
36 static int32 tidcmp(const void *a, const void *b);
38  HeapTupleForceOption heap_force_opt);
39 static void sanity_check_tid_array(ArrayType *ta, int *ntids);
40 static void sanity_check_relation(Relation rel);
41 static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
42  OffsetNumber *next_start_ptr);
43 
44 /*-------------------------------------------------------------------------
45  * heap_force_kill()
46  *
47  * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
48  * given TID array.
49  *
50  * Usage: SELECT heap_force_kill(regclass, tid[]);
51  *-------------------------------------------------------------------------
52  */
53 Datum
55 {
57 }
58 
59 /*-------------------------------------------------------------------------
60  * heap_force_freeze()
61  *
62  * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
63  * given TID array.
64  *
65  * Usage: SELECT heap_force_freeze(regclass, tid[]);
66  *-------------------------------------------------------------------------
67  */
68 Datum
70 {
72 }
73 
74 /*-------------------------------------------------------------------------
75  * heap_force_common()
76  *
77  * Common code for heap_force_kill and heap_force_freeze
78  *-------------------------------------------------------------------------
79  */
80 static Datum
82 {
83  Oid relid = PG_GETARG_OID(0);
85  ItemPointer tids;
86  int ntids,
87  nblocks;
88  Relation rel;
89  OffsetNumber curr_start_ptr,
90  next_start_ptr;
91  bool include_this_tid[MaxHeapTuplesPerPage];
92 
93  if (RecoveryInProgress())
94  ereport(ERROR,
95  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
96  errmsg("recovery is in progress"),
97  errhint("heap surgery functions cannot be executed during recovery.")));
98 
99  /* Check inputs. */
100  sanity_check_tid_array(ta, &ntids);
101 
102  rel = relation_open(relid, RowExclusiveLock);
103 
104  /* Check target relation. */
106 
107  tids = ((ItemPointer) ARR_DATA_PTR(ta));
108 
109  /*
110  * If there is more than one TID in the array, sort them so that we can
111  * easily fetch all the TIDs belonging to one particular page from the
112  * array.
113  */
114  if (ntids > 1)
115  qsort((void *) tids, ntids, sizeof(ItemPointerData), tidcmp);
116 
117  curr_start_ptr = next_start_ptr = 0;
118  nblocks = RelationGetNumberOfBlocks(rel);
119 
120  /*
121  * Loop, performing the necessary actions for each block.
122  */
123  while (next_start_ptr != ntids)
124  {
125  Buffer buf;
126  Buffer vmbuf = InvalidBuffer;
127  Page page;
128  BlockNumber blkno;
129  OffsetNumber curoff;
130  OffsetNumber maxoffset;
131  int i;
132  bool did_modify_page = false;
133  bool did_modify_vm = false;
134 
136 
137  /*
138  * Find all the TIDs belonging to one particular page starting from
139  * next_start_ptr and process them one by one.
140  */
141  blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
142 
143  /* Check whether the block number is valid. */
144  if (blkno >= nblocks)
145  {
146  /* Update the current_start_ptr before moving to the next page. */
147  curr_start_ptr = next_start_ptr;
148 
149  ereport(NOTICE,
150  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
151  errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
152  blkno, RelationGetRelationName(rel))));
153  continue;
154  }
155 
156  buf = ReadBuffer(rel, blkno);
158 
159  page = BufferGetPage(buf);
160 
161  maxoffset = PageGetMaxOffsetNumber(page);
162 
163  /*
164  * Figure out which TIDs we are going to process and which ones we are
165  * going to skip.
166  */
167  memset(include_this_tid, 0, sizeof(include_this_tid));
168  for (i = curr_start_ptr; i < next_start_ptr; i++)
169  {
171  ItemId itemid;
172 
173  /* Check whether the offset number is valid. */
174  if (offno == InvalidOffsetNumber || offno > maxoffset)
175  {
176  ereport(NOTICE,
177  errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
178  blkno, offno, RelationGetRelationName(rel)));
179  continue;
180  }
181 
182  itemid = PageGetItemId(page, offno);
183 
184  /* Only accept an item ID that is used. */
185  if (ItemIdIsRedirected(itemid))
186  {
187  ereport(NOTICE,
188  errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
189  blkno, offno, RelationGetRelationName(rel),
190  ItemIdGetRedirect(itemid)));
191  continue;
192  }
193  else if (ItemIdIsDead(itemid))
194  {
195  ereport(NOTICE,
196  (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
197  blkno, offno, RelationGetRelationName(rel))));
198  continue;
199  }
200  else if (!ItemIdIsUsed(itemid))
201  {
202  ereport(NOTICE,
203  (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
204  blkno, offno, RelationGetRelationName(rel))));
205  continue;
206  }
207 
208  /* Mark it for processing. */
209  Assert(offno < MaxHeapTuplesPerPage);
210  include_this_tid[offno] = true;
211  }
212 
213  /*
214  * Before entering the critical section, pin the visibility map page
215  * if it appears to be necessary.
216  */
217  if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
218  visibilitymap_pin(rel, blkno, &vmbuf);
219 
220  /* No ereport(ERROR) from here until all the changes are logged. */
222 
223  for (curoff = FirstOffsetNumber; curoff <= maxoffset;
224  curoff = OffsetNumberNext(curoff))
225  {
226  ItemId itemid;
227 
228  if (!include_this_tid[curoff])
229  continue;
230 
231  itemid = PageGetItemId(page, curoff);
232  Assert(ItemIdIsNormal(itemid));
233 
234  did_modify_page = true;
235 
236  if (heap_force_opt == HEAP_FORCE_KILL)
237  {
238  ItemIdSetDead(itemid);
239 
240  /*
241  * If the page is marked all-visible, we must clear
242  * PD_ALL_VISIBLE flag on the page header and an all-visible
243  * bit on the visibility map corresponding to the page.
244  */
245  if (PageIsAllVisible(page))
246  {
247  PageClearAllVisible(page);
248  visibilitymap_clear(rel, blkno, vmbuf,
250  did_modify_vm = true;
251  }
252  }
253  else
254  {
255  HeapTupleHeader htup;
256 
257  Assert(heap_force_opt == HEAP_FORCE_FREEZE);
258 
259  htup = (HeapTupleHeader) PageGetItem(page, itemid);
260 
261  /*
262  * Reset all visibility-related fields of the tuple. This
263  * logic should mimic heap_execute_freeze_tuple(), but we
264  * choose to reset xmin and ctid just to be sure that no
265  * potentially-garbled data is left behind.
266  */
267  ItemPointerSet(&htup->t_ctid, blkno, curoff);
270  if (htup->t_infomask & HEAP_MOVED)
271  {
272  if (htup->t_infomask & HEAP_MOVED_OFF)
274  else
276  }
277 
278  /*
279  * Clear all the visibility-related bits of this tuple and
280  * mark it as frozen. Also, get rid of HOT_UPDATED and
281  * KEYS_UPDATES bits.
282  */
283  htup->t_infomask &= ~HEAP_XACT_MASK;
285  htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
286  htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
287  }
288  }
289 
290  /*
291  * If the page was modified, only then, we mark the buffer dirty or do
292  * the WAL logging.
293  */
294  if (did_modify_page)
295  {
296  /* Mark buffer dirty before we write WAL. */
297  MarkBufferDirty(buf);
298 
299  /* XLOG stuff */
300  if (RelationNeedsWAL(rel))
301  log_newpage_buffer(buf, true);
302  }
303 
304  /* WAL log the VM page if it was modified. */
305  if (did_modify_vm && RelationNeedsWAL(rel))
306  log_newpage_buffer(vmbuf, false);
307 
309 
310  UnlockReleaseBuffer(buf);
311 
312  if (vmbuf != InvalidBuffer)
313  ReleaseBuffer(vmbuf);
314 
315  /* Update the current_start_ptr before moving to the next page. */
316  curr_start_ptr = next_start_ptr;
317  }
318 
320 
321  pfree(ta);
322 
323  PG_RETURN_VOID();
324 }
325 
326 /*-------------------------------------------------------------------------
327  * tidcmp()
328  *
329  * Compare two item pointers, return -1, 0, or +1.
330  *
331  * See ItemPointerCompare for details.
332  * ------------------------------------------------------------------------
333  */
334 static int32
335 tidcmp(const void *a, const void *b)
336 {
337  ItemPointer iptr1 = ((const ItemPointer) a);
338  ItemPointer iptr2 = ((const ItemPointer) b);
339 
340  return ItemPointerCompare(iptr1, iptr2);
341 }
342 
343 /*-------------------------------------------------------------------------
344  * sanity_check_tid_array()
345  *
346  * Perform sanity checks on the given tid array, and set *ntids to the
347  * number of items in the array.
348  * ------------------------------------------------------------------------
349  */
350 static void
352 {
353  if (ARR_HASNULL(ta) && array_contains_nulls(ta))
354  ereport(ERROR,
355  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
356  errmsg("array must not contain nulls")));
357 
358  if (ARR_NDIM(ta) > 1)
359  ereport(ERROR,
360  (errcode(ERRCODE_DATA_EXCEPTION),
361  errmsg("argument must be empty or one-dimensional array")));
362 
363  *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
364 }
365 
366 /*-------------------------------------------------------------------------
367  * sanity_check_relation()
368  *
369  * Perform sanity checks on the given relation.
370  * ------------------------------------------------------------------------
371  */
372 static void
374 {
375  if (rel->rd_rel->relkind != RELKIND_RELATION &&
376  rel->rd_rel->relkind != RELKIND_MATVIEW &&
377  rel->rd_rel->relkind != RELKIND_TOASTVALUE)
378  ereport(ERROR,
379  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
380  errmsg("\"%s\" is not a table, materialized view, or TOAST table",
381  RelationGetRelationName(rel))));
382 
383  if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
384  ereport(ERROR,
385  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
386  errmsg("only heap AM is supported")));
387 
388  /* Must be owner of the table or superuser. */
391  get_relkind_objtype(rel->rd_rel->relkind),
393 }
394 
395 /*-------------------------------------------------------------------------
396  * find_tids_one_page()
397  *
398  * Find all the tids residing in the same page as tids[next_start_ptr], and
399  * update next_start_ptr so that it points to the first tid in the next page.
400  *
401  * NOTE: The input tids[] array must be sorted.
402  * ------------------------------------------------------------------------
403  */
404 static BlockNumber
405 find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
406 {
407  int i;
408  BlockNumber prev_blkno,
409  blkno;
410 
411  prev_blkno = blkno = InvalidBlockNumber;
412 
413  for (i = *next_start_ptr; i < ntids; i++)
414  {
415  ItemPointerData tid = tids[i];
416 
417  blkno = ItemPointerGetBlockNumberNoCheck(&tid);
418 
419  if (i == *next_start_ptr)
420  prev_blkno = blkno;
421 
422  if (prev_blkno != blkno)
423  break;
424  }
425 
426  *next_start_ptr = i;
427  return prev_blkno;
428 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
#define ItemPointerGetOffsetNumberNoCheck(pointer)
Definition: itemptr.h:108
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3807
Datum heap_force_kill(PG_FUNCTION_ARGS)
Definition: heap_surgery.c:54
int errhint(const char *fmt,...)
Definition: elog.c:1068
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1090
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define PageIsAllVisible(page)
Definition: bufpage.h:385
Oid GetUserId(void)
Definition: miscinit.c:476
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
#define PG_GETARG_ARRAYTYPE_P_COPY(n)
Definition: array.h:252
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:205
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
#define InvalidBuffer
Definition: buf.h:25
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
int errcode(int sqlerrcode)
Definition: elog.c:610
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3511
static void sanity_check_relation(Relation rel)
Definition: heap_surgery.c:373
PG_FUNCTION_INFO_V1(heap_force_kill)
Form_pg_class rd_rel
Definition: rel.h:109
unsigned int Oid
Definition: postgres_ext.h:31
bool RecoveryInProgress(void)
Definition: xlog.c:8076
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define HeapTupleHeaderSetXvac(tup, xid)
Definition: htup_details.h:423
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
Datum heap_force_freeze(PG_FUNCTION_ARGS)
Definition: heap_surgery.c:69
signed int int32
Definition: c.h:363
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData * ItemPointer
Definition: itemptr.h:49
#define VISIBILITYMAP_VALID_BITS
Definition: visibilitymap.h:28
static void sanity_check_tid_array(ArrayType *ta, int *ntids)
Definition: heap_surgery.c:351
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
Definition: aclchk.c:3294
void pfree(void *pointer)
Definition: mcxt.c:1057
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3534
#define ERROR
Definition: elog.h:43
#define HEAP_XMAX_INVALID
Definition: htup_details.h:207
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:48
#define ARR_DIMS(a)
Definition: array.h:282
ItemPointerData t_ctid
Definition: htup_details.h:160
#define ARR_DATA_PTR(a)
Definition: array.h:310
static char * buf
Definition: pg_test_fsync.c:68
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define FirstOffsetNumber
Definition: off.h:27
#define RowExclusiveLock
Definition: lockdefs.h:38
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:380
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define ARR_HASNULL(a)
Definition: array.h:279
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
PG_MODULE_MAGIC
Definition: heap_surgery.c:24
static int32 tidcmp(const void *a, const void *b)
Definition: heap_surgery.c:335
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
uintptr_t Datum
Definition: postgres.h:367
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:352
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:278
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
#define InvalidOffsetNumber
Definition: off.h:26
#define HEAP_MOVED
Definition: htup_details.h:216
#define ereport(elevel,...)
Definition: elog.h:144
#define NOTICE
Definition: elog.h:37
#define PG_RETURN_VOID()
Definition: fmgr.h:348
#define PageClearAllVisible(page)
Definition: bufpage.h:389
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
#define HEAP_MOVED_OFF
Definition: htup_details.h:210
#define Assert(condition)
Definition: c.h:746
#define FrozenTransactionId
Definition: transam.h:33
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4687
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static BlockNumber find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
Definition: heap_surgery.c:405
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:607
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define InvalidBlockNumber
Definition: block.h:33
#define RelationNeedsWAL(relation)
Definition: rel.h:562
#define ARR_NDIM(a)
Definition: array.h:278
static Datum heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
Definition: heap_surgery.c:81
int errmsg(const char *fmt,...)
Definition: elog.c:821
#define ItemPointerGetBlockNumberNoCheck(pointer)
Definition: itemptr.h:89
int i
ObjectType get_relkind_objtype(char relkind)
HeapTupleForceOption
Definition: heap_surgery.c:27
#define HEAP_HOT_UPDATED
Definition: htup_details.h:280
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define HEAP_XACT_MASK
Definition: htup_details.h:218
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define qsort(a, b, c, d)
Definition: port.h:497
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3550
int Buffer
Definition: buf.h:23
#define RelationGetRelid(relation)
Definition: rel.h:456
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:319