PostgreSQL Source Code  git master
bufpage.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * bufpage.c
4  * POSTGRES standard buffer page code.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/storage/page/bufpage.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/htup_details.h"
18 #include "access/itup.h"
19 #include "access/xlog.h"
20 #include "pgstat.h"
21 #include "storage/checksum.h"
22 #include "utils/memdebug.h"
23 #include "utils/memutils.h"
24 
25 
26 /* GUC variable */
28 
29 
30 /* ----------------------------------------------------------------
31  * Page support functions
32  * ----------------------------------------------------------------
33  */
34 
35 /*
36  * PageInit
37  * Initializes the contents of a page.
38  * Note that we don't calculate an initial checksum here; that's not done
39  * until it's time to write.
40  */
41 void
42 PageInit(Page page, Size pageSize, Size specialSize)
43 {
44  PageHeader p = (PageHeader) page;
45 
46  specialSize = MAXALIGN(specialSize);
47 
48  Assert(pageSize == BLCKSZ);
49  Assert(pageSize > specialSize + SizeOfPageHeaderData);
50 
51  /* Make sure all fields of page are zero, as well as unused space */
52  MemSet(p, 0, pageSize);
53 
54  p->pd_flags = 0;
56  p->pd_upper = pageSize - specialSize;
57  p->pd_special = pageSize - specialSize;
59  /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
60 }
61 
62 
63 /*
64  * PageIsVerifiedExtended
65  * Check that the page header and checksum (if any) appear valid.
66  *
67  * This is called when a page has just been read in from disk. The idea is
68  * to cheaply detect trashed pages before we go nuts following bogus line
69  * pointers, testing invalid transaction identifiers, etc.
70  *
71  * It turns out to be necessary to allow zeroed pages here too. Even though
72  * this routine is *not* called when deliberately adding a page to a relation,
73  * there are scenarios in which a zeroed page might be found in a table.
74  * (Example: a backend extends a relation, then crashes before it can write
75  * any WAL entry about the new page. The kernel will already have the
76  * zeroed page in the file, and it will stay that way after restart.) So we
77  * allow zeroed pages here, and are careful that the page access macros
78  * treat such a page as empty and without free space. Eventually, VACUUM
79  * will clean up such a page and make it usable.
80  *
81  * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
82  * a checksum failure.
83  *
84  * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
85  * to pgstat.
86  */
87 bool
88 PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
89 {
90  PageHeader p = (PageHeader) page;
91  size_t *pagebytes;
92  int i;
93  bool checksum_failure = false;
94  bool header_sane = false;
95  bool all_zeroes = false;
96  uint16 checksum = 0;
97 
98  /*
99  * Don't verify page data unless the page passes basic non-zero test
100  */
101  if (!PageIsNew(page))
102  {
103  if (DataChecksumsEnabled())
104  {
105  checksum = pg_checksum_page((char *) page, blkno);
106 
107  if (checksum != p->pd_checksum)
108  checksum_failure = true;
109  }
110 
111  /*
112  * The following checks don't prove the header is correct, only that
113  * it looks sane enough to allow into the buffer pool. Later usage of
114  * the block can still reveal problems, which is why we offer the
115  * checksum option.
116  */
117  if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
118  p->pd_lower <= p->pd_upper &&
119  p->pd_upper <= p->pd_special &&
120  p->pd_special <= BLCKSZ &&
121  p->pd_special == MAXALIGN(p->pd_special))
122  header_sane = true;
123 
124  if (header_sane && !checksum_failure)
125  return true;
126  }
127 
128  /* Check all-zeroes case */
129  all_zeroes = true;
130  pagebytes = (size_t *) page;
131  for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
132  {
133  if (pagebytes[i] != 0)
134  {
135  all_zeroes = false;
136  break;
137  }
138  }
139 
140  if (all_zeroes)
141  return true;
142 
143  /*
144  * Throw a WARNING if the checksum fails, but only after we've checked for
145  * the all-zeroes case.
146  */
147  if (checksum_failure)
148  {
149  if ((flags & PIV_LOG_WARNING) != 0)
152  errmsg("page verification failed, calculated checksum %u but expected %u",
153  checksum, p->pd_checksum)));
154 
155  if ((flags & PIV_REPORT_STAT) != 0)
157 
158  if (header_sane && ignore_checksum_failure)
159  return true;
160  }
161 
162  return false;
163 }
164 
165 
166 /*
167  * PageAddItemExtended
168  *
169  * Add an item to a page. Return value is the offset at which it was
170  * inserted, or InvalidOffsetNumber if the item is not inserted for any
171  * reason. A WARNING is issued indicating the reason for the refusal.
172  *
173  * offsetNumber must be either InvalidOffsetNumber to specify finding a
174  * free line pointer, or a value between FirstOffsetNumber and one past
175  * the last existing item, to specify using that particular line pointer.
176  *
177  * If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
178  * the item at the specified offsetNumber, which must be either a
179  * currently-unused line pointer, or one past the last existing item.
180  *
181  * If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
182  * the item at the specified offsetNumber, moving existing items later
183  * in the array to make room.
184  *
185  * If offsetNumber is not valid, then assign a slot by finding the first
186  * one that is both unused and deallocated.
187  *
188  * If flag PAI_IS_HEAP is set, we enforce that there can't be more than
189  * MaxHeapTuplesPerPage line pointers on the page.
190  *
191  * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
192  */
195  Item item,
196  Size size,
197  OffsetNumber offsetNumber,
198  int flags)
199 {
200  PageHeader phdr = (PageHeader) page;
201  Size alignedSize;
202  int lower;
203  int upper;
204  ItemId itemId;
205  OffsetNumber limit;
206  bool needshuffle = false;
207 
208  /*
209  * Be wary about corrupted page pointers
210  */
211  if (phdr->pd_lower < SizeOfPageHeaderData ||
212  phdr->pd_lower > phdr->pd_upper ||
213  phdr->pd_upper > phdr->pd_special ||
214  phdr->pd_special > BLCKSZ)
215  ereport(PANIC,
217  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
218  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
219 
220  /*
221  * Select offsetNumber to place the new item at
222  */
224 
225  /* was offsetNumber passed in? */
226  if (OffsetNumberIsValid(offsetNumber))
227  {
228  /* yes, check it */
229  if ((flags & PAI_OVERWRITE) != 0)
230  {
231  if (offsetNumber < limit)
232  {
233  itemId = PageGetItemId(page, offsetNumber);
234  if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
235  {
236  elog(WARNING, "will not overwrite a used ItemId");
237  return InvalidOffsetNumber;
238  }
239  }
240  }
241  else
242  {
243  if (offsetNumber < limit)
244  needshuffle = true; /* need to move existing linp's */
245  }
246  }
247  else
248  {
249  /* offsetNumber was not passed in, so find a free slot */
250  /* if no free slot, we'll put it at limit (1st open slot) */
251  if (PageHasFreeLinePointers(page))
252  {
253  /*
254  * Scan line pointer array to locate a "recyclable" (unused)
255  * ItemId.
256  *
257  * Always use earlier items first. PageTruncateLinePointerArray
258  * can only truncate unused items when they appear as a contiguous
259  * group at the end of the line pointer array.
260  */
261  for (offsetNumber = FirstOffsetNumber;
262  offsetNumber < limit; /* limit is maxoff+1 */
263  offsetNumber++)
264  {
265  itemId = PageGetItemId(page, offsetNumber);
266 
267  /*
268  * We check for no storage as well, just to be paranoid;
269  * unused items should never have storage. Assert() that the
270  * invariant is respected too.
271  */
272  Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
273 
274  if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
275  break;
276  }
277  if (offsetNumber >= limit)
278  {
279  /* the hint is wrong, so reset it */
281  }
282  }
283  else
284  {
285  /* don't bother searching if hint says there's no free slot */
286  offsetNumber = limit;
287  }
288  }
289 
290  /* Reject placing items beyond the first unused line pointer */
291  if (offsetNumber > limit)
292  {
293  elog(WARNING, "specified item offset is too large");
294  return InvalidOffsetNumber;
295  }
296 
297  /* Reject placing items beyond heap boundary, if heap */
298  if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
299  {
300  elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
301  return InvalidOffsetNumber;
302  }
303 
304  /*
305  * Compute new lower and upper pointers for page, see if it'll fit.
306  *
307  * Note: do arithmetic as signed ints, to avoid mistakes if, say,
308  * alignedSize > pd_upper.
309  */
310  if (offsetNumber == limit || needshuffle)
311  lower = phdr->pd_lower + sizeof(ItemIdData);
312  else
313  lower = phdr->pd_lower;
314 
315  alignedSize = MAXALIGN(size);
316 
317  upper = (int) phdr->pd_upper - (int) alignedSize;
318 
319  if (lower > upper)
320  return InvalidOffsetNumber;
321 
322  /*
323  * OK to insert the item. First, shuffle the existing pointers if needed.
324  */
325  itemId = PageGetItemId(page, offsetNumber);
326 
327  if (needshuffle)
328  memmove(itemId + 1, itemId,
329  (limit - offsetNumber) * sizeof(ItemIdData));
330 
331  /* set the line pointer */
332  ItemIdSetNormal(itemId, upper, size);
333 
334  /*
335  * Items normally contain no uninitialized bytes. Core bufpage consumers
336  * conform, but this is not a necessary coding rule; a new index AM could
337  * opt to depart from it. However, data type input functions and other
338  * C-language functions that synthesize datums should initialize all
339  * bytes; datumIsEqual() relies on this. Testing here, along with the
340  * similar check in printtup(), helps to catch such mistakes.
341  *
342  * Values of the "name" type retrieved via index-only scans may contain
343  * uninitialized bytes; see comment in btrescan(). Valgrind will report
344  * this as an error, but it is safe to ignore.
345  */
347 
348  /* copy the item's data onto the page */
349  memcpy((char *) page + upper, item, size);
350 
351  /* adjust page header */
352  phdr->pd_lower = (LocationIndex) lower;
353  phdr->pd_upper = (LocationIndex) upper;
354 
355  return offsetNumber;
356 }
357 
358 
359 /*
360  * PageGetTempPage
361  * Get a temporary page in local memory for special processing.
362  * The returned page is not initialized at all; caller must do that.
363  */
364 Page
366 {
367  Size pageSize;
368  Page temp;
369 
370  pageSize = PageGetPageSize(page);
371  temp = (Page) palloc(pageSize);
372 
373  return temp;
374 }
375 
376 /*
377  * PageGetTempPageCopy
378  * Get a temporary page in local memory for special processing.
379  * The page is initialized by copying the contents of the given page.
380  */
381 Page
383 {
384  Size pageSize;
385  Page temp;
386 
387  pageSize = PageGetPageSize(page);
388  temp = (Page) palloc(pageSize);
389 
390  memcpy(temp, page, pageSize);
391 
392  return temp;
393 }
394 
395 /*
396  * PageGetTempPageCopySpecial
397  * Get a temporary page in local memory for special processing.
398  * The page is PageInit'd with the same special-space size as the
399  * given page, and the special space is copied from the given page.
400  */
401 Page
403 {
404  Size pageSize;
405  Page temp;
406 
407  pageSize = PageGetPageSize(page);
408  temp = (Page) palloc(pageSize);
409 
410  PageInit(temp, pageSize, PageGetSpecialSize(page));
411  memcpy(PageGetSpecialPointer(temp),
412  PageGetSpecialPointer(page),
413  PageGetSpecialSize(page));
414 
415  return temp;
416 }
417 
418 /*
419  * PageRestoreTempPage
420  * Copy temporary page back to permanent page after special processing
421  * and release the temporary page.
422  */
423 void
424 PageRestoreTempPage(Page tempPage, Page oldPage)
425 {
426  Size pageSize;
427 
428  pageSize = PageGetPageSize(tempPage);
429  memcpy((char *) oldPage, (char *) tempPage, pageSize);
430 
431  pfree(tempPage);
432 }
433 
434 /*
435  * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
436  */
437 typedef struct itemIdCompactData
438 {
439  uint16 offsetindex; /* linp array index */
440  int16 itemoff; /* page offset of item data */
441  uint16 alignedlen; /* MAXALIGN(item data len) */
444 
445 /*
446  * After removing or marking some line pointers unused, move the tuples to
447  * remove the gaps caused by the removed items and reorder them back into
448  * reverse line pointer order in the page.
449  *
450  * This function can often be fairly hot, so it pays to take some measures to
451  * make it as optimal as possible.
452  *
453  * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
454  * descending order of itemoff. When this is true we can just memmove()
455  * tuples towards the end of the page. This is quite a common case as it's
456  * the order that tuples are initially inserted into pages. When we call this
457  * function to defragment the tuples in the page then any new line pointers
458  * added to the page will keep that presorted order, so hitting this case is
459  * still very common for tables that are commonly updated.
460  *
461  * When the 'itemidbase' array is not presorted then we're unable to just
462  * memmove() tuples around freely. Doing so could cause us to overwrite the
463  * memory belonging to a tuple we've not moved yet. In this case, we copy all
464  * the tuples that need to be moved into a temporary buffer. We can then
465  * simply memcpy() out of that temp buffer back into the page at the correct
466  * location. Tuples are copied back into the page in the same order as the
467  * 'itemidbase' array, so we end up reordering the tuples back into reverse
468  * line pointer order. This will increase the chances of hitting the
469  * presorted case the next time around.
470  *
471  * Callers must ensure that nitems is > 0
472  */
473 static void
474 compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
475 {
476  PageHeader phdr = (PageHeader) page;
477  Offset upper;
478  Offset copy_tail;
479  Offset copy_head;
480  itemIdCompact itemidptr;
481  int i;
482 
483  /* Code within will not work correctly if nitems == 0 */
484  Assert(nitems > 0);
485 
486  if (presorted)
487  {
488 
489 #ifdef USE_ASSERT_CHECKING
490  {
491  /*
492  * Verify we've not gotten any new callers that are incorrectly
493  * passing a true presorted value.
494  */
495  Offset lastoff = phdr->pd_special;
496 
497  for (i = 0; i < nitems; i++)
498  {
499  itemidptr = &itemidbase[i];
500 
501  Assert(lastoff > itemidptr->itemoff);
502 
503  lastoff = itemidptr->itemoff;
504  }
505  }
506 #endif /* USE_ASSERT_CHECKING */
507 
508  /*
509  * 'itemidbase' is already in the optimal order, i.e, lower item
510  * pointers have a higher offset. This allows us to memmove() the
511  * tuples up to the end of the page without having to worry about
512  * overwriting other tuples that have not been moved yet.
513  *
514  * There's a good chance that there are tuples already right at the
515  * end of the page that we can simply skip over because they're
516  * already in the correct location within the page. We'll do that
517  * first...
518  */
519  upper = phdr->pd_special;
520  i = 0;
521  do
522  {
523  itemidptr = &itemidbase[i];
524  if (upper != itemidptr->itemoff + itemidptr->alignedlen)
525  break;
526  upper -= itemidptr->alignedlen;
527 
528  i++;
529  } while (i < nitems);
530 
531  /*
532  * Now that we've found the first tuple that needs to be moved, we can
533  * do the tuple compactification. We try and make the least number of
534  * memmove() calls and only call memmove() when there's a gap. When
535  * we see a gap we just move all tuples after the gap up until the
536  * point of the last move operation.
537  */
538  copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
539  for (; i < nitems; i++)
540  {
541  ItemId lp;
542 
543  itemidptr = &itemidbase[i];
544  lp = PageGetItemId(page, itemidptr->offsetindex + 1);
545 
546  if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
547  {
548  memmove((char *) page + upper,
549  page + copy_head,
550  copy_tail - copy_head);
551 
552  /*
553  * We've now moved all tuples already seen, but not the
554  * current tuple, so we set the copy_tail to the end of this
555  * tuple so it can be moved in another iteration of the loop.
556  */
557  copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
558  }
559  /* shift the target offset down by the length of this tuple */
560  upper -= itemidptr->alignedlen;
561  /* point the copy_head to the start of this tuple */
562  copy_head = itemidptr->itemoff;
563 
564  /* update the line pointer to reference the new offset */
565  lp->lp_off = upper;
566  }
567 
568  /* move the remaining tuples. */
569  memmove((char *) page + upper,
570  page + copy_head,
571  copy_tail - copy_head);
572  }
573  else
574  {
575  PGAlignedBlock scratch;
576  char *scratchptr = scratch.data;
577 
578  /*
579  * Non-presorted case: The tuples in the itemidbase array may be in
580  * any order. So, in order to move these to the end of the page we
581  * must make a temp copy of each tuple that needs to be moved before
582  * we copy them back into the page at the new offset.
583  *
584  * If a large percentage of tuples have been pruned (>75%) then we'll
585  * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
586  * just do a single memcpy() for all tuples that need to be moved.
587  * When so many tuples have been removed there's likely to be a lot of
588  * gaps and it's unlikely that many non-movable tuples remain at the
589  * end of the page.
590  */
591  if (nitems < PageGetMaxOffsetNumber(page) / 4)
592  {
593  i = 0;
594  do
595  {
596  itemidptr = &itemidbase[i];
597  memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
598  itemidptr->alignedlen);
599  i++;
600  } while (i < nitems);
601 
602  /* Set things up for the compactification code below */
603  i = 0;
604  itemidptr = &itemidbase[0];
605  upper = phdr->pd_special;
606  }
607  else
608  {
609  upper = phdr->pd_special;
610 
611  /*
612  * Many tuples are likely to already be in the correct location.
613  * There's no need to copy these into the temp buffer. Instead
614  * we'll just skip forward in the itemidbase array to the position
615  * that we do need to move tuples from so that the code below just
616  * leaves these ones alone.
617  */
618  i = 0;
619  do
620  {
621  itemidptr = &itemidbase[i];
622  if (upper != itemidptr->itemoff + itemidptr->alignedlen)
623  break;
624  upper -= itemidptr->alignedlen;
625 
626  i++;
627  } while (i < nitems);
628 
629  /* Copy all tuples that need to be moved into the temp buffer */
630  memcpy(scratchptr + phdr->pd_upper,
631  page + phdr->pd_upper,
632  upper - phdr->pd_upper);
633  }
634 
635  /*
636  * Do the tuple compactification. itemidptr is already pointing to
637  * the first tuple that we're going to move. Here we collapse the
638  * memcpy calls for adjacent tuples into a single call. This is done
639  * by delaying the memcpy call until we find a gap that needs to be
640  * closed.
641  */
642  copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
643  for (; i < nitems; i++)
644  {
645  ItemId lp;
646 
647  itemidptr = &itemidbase[i];
648  lp = PageGetItemId(page, itemidptr->offsetindex + 1);
649 
650  /* copy pending tuples when we detect a gap */
651  if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
652  {
653  memcpy((char *) page + upper,
654  scratchptr + copy_head,
655  copy_tail - copy_head);
656 
657  /*
658  * We've now copied all tuples already seen, but not the
659  * current tuple, so we set the copy_tail to the end of this
660  * tuple.
661  */
662  copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
663  }
664  /* shift the target offset down by the length of this tuple */
665  upper -= itemidptr->alignedlen;
666  /* point the copy_head to the start of this tuple */
667  copy_head = itemidptr->itemoff;
668 
669  /* update the line pointer to reference the new offset */
670  lp->lp_off = upper;
671  }
672 
673  /* Copy the remaining chunk */
674  memcpy((char *) page + upper,
675  scratchptr + copy_head,
676  copy_tail - copy_head);
677  }
678 
679  phdr->pd_upper = upper;
680 }
681 
682 /*
683  * PageRepairFragmentation
684  *
685  * Frees fragmented space on a heap page following pruning.
686  *
687  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
688  *
689  * This routine removes unused line pointers from the end of the line pointer
690  * array. This is possible when dead heap-only tuples get removed by pruning,
691  * especially when there were HOT chains with several tuples each beforehand.
692  *
693  * Caller had better have a full cleanup lock on page's buffer. As a side
694  * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
695  * needed. Caller might also need to account for a reduction in the length of
696  * the line pointer array following array truncation.
697  */
698 void
700 {
701  Offset pd_lower = ((PageHeader) page)->pd_lower;
702  Offset pd_upper = ((PageHeader) page)->pd_upper;
703  Offset pd_special = ((PageHeader) page)->pd_special;
704  Offset last_offset;
706  itemIdCompact itemidptr;
707  ItemId lp;
708  int nline,
709  nstorage,
710  nunused;
711  OffsetNumber finalusedlp = InvalidOffsetNumber;
712  int i;
713  Size totallen;
714  bool presorted = true; /* For now */
715 
716  /*
717  * It's worth the trouble to be more paranoid here than in most places,
718  * because we are about to reshuffle data in (what is usually) a shared
719  * disk buffer. If we aren't careful then corrupted pointers, lengths,
720  * etc could cause us to clobber adjacent disk buffers, spreading the data
721  * loss further. So, check everything.
722  */
723  if (pd_lower < SizeOfPageHeaderData ||
724  pd_lower > pd_upper ||
725  pd_upper > pd_special ||
726  pd_special > BLCKSZ ||
727  pd_special != MAXALIGN(pd_special))
728  ereport(ERROR,
730  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
731  pd_lower, pd_upper, pd_special)));
732 
733  /*
734  * Run through the line pointer array and collect data about live items.
735  */
736  nline = PageGetMaxOffsetNumber(page);
737  itemidptr = itemidbase;
738  nunused = totallen = 0;
739  last_offset = pd_special;
740  for (i = FirstOffsetNumber; i <= nline; i++)
741  {
742  lp = PageGetItemId(page, i);
743  if (ItemIdIsUsed(lp))
744  {
745  if (ItemIdHasStorage(lp))
746  {
747  itemidptr->offsetindex = i - 1;
748  itemidptr->itemoff = ItemIdGetOffset(lp);
749 
750  if (last_offset > itemidptr->itemoff)
751  last_offset = itemidptr->itemoff;
752  else
753  presorted = false;
754 
755  if (unlikely(itemidptr->itemoff < (int) pd_upper ||
756  itemidptr->itemoff >= (int) pd_special))
757  ereport(ERROR,
759  errmsg("corrupted line pointer: %u",
760  itemidptr->itemoff)));
761  itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
762  totallen += itemidptr->alignedlen;
763  itemidptr++;
764  }
765 
766  finalusedlp = i; /* Could be the final non-LP_UNUSED item */
767  }
768  else
769  {
770  /* Unused entries should have lp_len = 0, but make sure */
771  Assert(!ItemIdHasStorage(lp));
772  ItemIdSetUnused(lp);
773  nunused++;
774  }
775  }
776 
777  nstorage = itemidptr - itemidbase;
778  if (nstorage == 0)
779  {
780  /* Page is completely empty, so just reset it quickly */
781  ((PageHeader) page)->pd_upper = pd_special;
782  }
783  else
784  {
785  /* Need to compact the page the hard way */
786  if (totallen > (Size) (pd_special - pd_lower))
787  ereport(ERROR,
789  errmsg("corrupted item lengths: total %u, available space %u",
790  (unsigned int) totallen, pd_special - pd_lower)));
791 
792  compactify_tuples(itemidbase, nstorage, page, presorted);
793  }
794 
795  if (finalusedlp != nline)
796  {
797  /* The last line pointer is not the last used line pointer */
798  int nunusedend = nline - finalusedlp;
799 
800  Assert(nunused >= nunusedend && nunusedend > 0);
801 
802  /* remove trailing unused line pointers from the count */
803  nunused -= nunusedend;
804  /* truncate the line pointer array */
805  ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
806  }
807 
808  /* Set hint bit for PageAddItemExtended */
809  if (nunused > 0)
811  else
813 }
814 
815 /*
816  * PageTruncateLinePointerArray
817  *
818  * Removes unused line pointers at the end of the line pointer array.
819  *
820  * This routine is usable for heap pages only. It is called by VACUUM during
821  * its second pass over the heap. We expect at least one LP_UNUSED line
822  * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
823  * it just set to LP_UNUSED then it should not call here).
824  *
825  * We avoid truncating the line pointer array to 0 items, if necessary by
826  * leaving behind a single remaining LP_UNUSED item. This is a little
827  * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
828  * page behind.
829  *
830  * Caller can have either an exclusive lock or a full cleanup lock on page's
831  * buffer. The page's PD_HAS_FREE_LINES hint bit will be set or unset based
832  * on whether or not we leave behind any remaining LP_UNUSED items.
833  */
834 void
836 {
837  PageHeader phdr = (PageHeader) page;
838  bool countdone = false,
839  sethint = false;
840  int nunusedend = 0;
841 
842  /* Scan line pointer array back-to-front */
843  for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
844  {
845  ItemId lp = PageGetItemId(page, i);
846 
847  if (!countdone && i > FirstOffsetNumber)
848  {
849  /*
850  * Still determining which line pointers from the end of the array
851  * will be truncated away. Either count another line pointer as
852  * safe to truncate, or notice that it's not safe to truncate
853  * additional line pointers (stop counting line pointers).
854  */
855  if (!ItemIdIsUsed(lp))
856  nunusedend++;
857  else
858  countdone = true;
859  }
860  else
861  {
862  /*
863  * Once we've stopped counting we still need to figure out if
864  * there are any remaining LP_UNUSED line pointers somewhere more
865  * towards the front of the array.
866  */
867  if (!ItemIdIsUsed(lp))
868  {
869  /*
870  * This is an unused line pointer that we won't be truncating
871  * away -- so there is at least one. Set hint on page.
872  */
873  sethint = true;
874  break;
875  }
876  }
877  }
878 
879  if (nunusedend > 0)
880  {
881  phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
882 
883 #ifdef CLOBBER_FREED_MEMORY
884  memset((char *) page + phdr->pd_lower, 0x7F,
885  sizeof(ItemIdData) * nunusedend);
886 #endif
887  }
888  else
889  Assert(sethint);
890 
891  /* Set hint bit for PageAddItemExtended */
892  if (sethint)
894  else
896 }
897 
898 /*
899  * PageGetFreeSpace
900  * Returns the size of the free (allocatable) space on a page,
901  * reduced by the space needed for a new line pointer.
902  *
903  * Note: this should usually only be used on index pages. Use
904  * PageGetHeapFreeSpace on heap pages.
905  */
906 Size
908 {
909  int space;
910 
911  /*
912  * Use signed arithmetic here so that we behave sensibly if pd_lower >
913  * pd_upper.
914  */
915  space = (int) ((PageHeader) page)->pd_upper -
916  (int) ((PageHeader) page)->pd_lower;
917 
918  if (space < (int) sizeof(ItemIdData))
919  return 0;
920  space -= sizeof(ItemIdData);
921 
922  return (Size) space;
923 }
924 
925 /*
926  * PageGetFreeSpaceForMultipleTuples
927  * Returns the size of the free (allocatable) space on a page,
928  * reduced by the space needed for multiple new line pointers.
929  *
930  * Note: this should usually only be used on index pages. Use
931  * PageGetHeapFreeSpace on heap pages.
932  */
933 Size
935 {
936  int space;
937 
938  /*
939  * Use signed arithmetic here so that we behave sensibly if pd_lower >
940  * pd_upper.
941  */
942  space = (int) ((PageHeader) page)->pd_upper -
943  (int) ((PageHeader) page)->pd_lower;
944 
945  if (space < (int) (ntups * sizeof(ItemIdData)))
946  return 0;
947  space -= ntups * sizeof(ItemIdData);
948 
949  return (Size) space;
950 }
951 
952 /*
953  * PageGetExactFreeSpace
954  * Returns the size of the free (allocatable) space on a page,
955  * without any consideration for adding/removing line pointers.
956  */
957 Size
959 {
960  int space;
961 
962  /*
963  * Use signed arithmetic here so that we behave sensibly if pd_lower >
964  * pd_upper.
965  */
966  space = (int) ((PageHeader) page)->pd_upper -
967  (int) ((PageHeader) page)->pd_lower;
968 
969  if (space < 0)
970  return 0;
971 
972  return (Size) space;
973 }
974 
975 
976 /*
977  * PageGetHeapFreeSpace
978  * Returns the size of the free (allocatable) space on a page,
979  * reduced by the space needed for a new line pointer.
980  *
981  * The difference between this and PageGetFreeSpace is that this will return
982  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
983  * and none are free. We use this to enforce that no more than
984  * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
985  * no more tuples than that could fit anyway, in the presence of redirected
986  * or dead line pointers it'd be possible to have too many line pointers.
987  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
988  * on the number of line pointers, we make this extra check.)
989  */
990 Size
992 {
993  Size space;
994 
995  space = PageGetFreeSpace(page);
996  if (space > 0)
997  {
998  OffsetNumber offnum,
999  nline;
1000 
1001  /*
1002  * Are there already MaxHeapTuplesPerPage line pointers in the page?
1003  */
1004  nline = PageGetMaxOffsetNumber(page);
1005  if (nline >= MaxHeapTuplesPerPage)
1006  {
1007  if (PageHasFreeLinePointers(page))
1008  {
1009  /*
1010  * Since this is just a hint, we must confirm that there is
1011  * indeed a free line pointer
1012  */
1013  for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1014  {
1015  ItemId lp = PageGetItemId(page, offnum);
1016 
1017  if (!ItemIdIsUsed(lp))
1018  break;
1019  }
1020 
1021  if (offnum > nline)
1022  {
1023  /*
1024  * The hint is wrong, but we can't clear it here since we
1025  * don't have the ability to mark the page dirty.
1026  */
1027  space = 0;
1028  }
1029  }
1030  else
1031  {
1032  /*
1033  * Although the hint might be wrong, PageAddItem will believe
1034  * it anyway, so we must believe it too.
1035  */
1036  space = 0;
1037  }
1038  }
1039  }
1040  return space;
1041 }
1042 
1043 
1044 /*
1045  * PageIndexTupleDelete
1046  *
1047  * This routine does the work of removing a tuple from an index page.
1048  *
1049  * Unlike heap pages, we compact out the line pointer for the removed tuple.
1050  */
1051 void
1053 {
1054  PageHeader phdr = (PageHeader) page;
1055  char *addr;
1056  ItemId tup;
1057  Size size;
1058  unsigned offset;
1059  int nbytes;
1060  int offidx;
1061  int nline;
1062 
1063  /*
1064  * As with PageRepairFragmentation, paranoia seems justified.
1065  */
1066  if (phdr->pd_lower < SizeOfPageHeaderData ||
1067  phdr->pd_lower > phdr->pd_upper ||
1068  phdr->pd_upper > phdr->pd_special ||
1069  phdr->pd_special > BLCKSZ ||
1070  phdr->pd_special != MAXALIGN(phdr->pd_special))
1071  ereport(ERROR,
1073  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1074  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1075 
1076  nline = PageGetMaxOffsetNumber(page);
1077  if ((int) offnum <= 0 || (int) offnum > nline)
1078  elog(ERROR, "invalid index offnum: %u", offnum);
1079 
1080  /* change offset number to offset index */
1081  offidx = offnum - 1;
1082 
1083  tup = PageGetItemId(page, offnum);
1084  Assert(ItemIdHasStorage(tup));
1085  size = ItemIdGetLength(tup);
1086  offset = ItemIdGetOffset(tup);
1087 
1088  if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1089  offset != MAXALIGN(offset))
1090  ereport(ERROR,
1092  errmsg("corrupted line pointer: offset = %u, size = %u",
1093  offset, (unsigned int) size)));
1094 
1095  /* Amount of space to actually be deleted */
1096  size = MAXALIGN(size);
1097 
1098  /*
1099  * First, we want to get rid of the pd_linp entry for the index tuple. We
1100  * copy all subsequent linp's back one slot in the array. We don't use
1101  * PageGetItemId, because we are manipulating the _array_, not individual
1102  * linp's.
1103  */
1104  nbytes = phdr->pd_lower -
1105  ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
1106 
1107  if (nbytes > 0)
1108  memmove((char *) &(phdr->pd_linp[offidx]),
1109  (char *) &(phdr->pd_linp[offidx + 1]),
1110  nbytes);
1111 
1112  /*
1113  * Now move everything between the old upper bound (beginning of tuple
1114  * space) and the beginning of the deleted tuple forward, so that space in
1115  * the middle of the page is left free. If we've just deleted the tuple
1116  * at the beginning of tuple space, then there's no need to do the copy.
1117  */
1118 
1119  /* beginning of tuple space */
1120  addr = (char *) page + phdr->pd_upper;
1121 
1122  if (offset > phdr->pd_upper)
1123  memmove(addr + size, addr, offset - phdr->pd_upper);
1124 
1125  /* adjust free space boundary pointers */
1126  phdr->pd_upper += size;
1127  phdr->pd_lower -= sizeof(ItemIdData);
1128 
1129  /*
1130  * Finally, we need to adjust the linp entries that remain.
1131  *
1132  * Anything that used to be before the deleted tuple's data was moved
1133  * forward by the size of the deleted tuple.
1134  */
1135  if (!PageIsEmpty(page))
1136  {
1137  int i;
1138 
1139  nline--; /* there's one less than when we started */
1140  for (i = 1; i <= nline; i++)
1141  {
1142  ItemId ii = PageGetItemId(page, i);
1143 
1144  Assert(ItemIdHasStorage(ii));
1145  if (ItemIdGetOffset(ii) <= offset)
1146  ii->lp_off += size;
1147  }
1148  }
1149 }
1150 
1151 
1152 /*
1153  * PageIndexMultiDelete
1154  *
1155  * This routine handles the case of deleting multiple tuples from an
1156  * index page at once. It is considerably faster than a loop around
1157  * PageIndexTupleDelete ... however, the caller *must* supply the array
1158  * of item numbers to be deleted in item number order!
1159  */
1160 void
1162 {
1163  PageHeader phdr = (PageHeader) page;
1164  Offset pd_lower = phdr->pd_lower;
1165  Offset pd_upper = phdr->pd_upper;
1166  Offset pd_special = phdr->pd_special;
1167  Offset last_offset;
1169  ItemIdData newitemids[MaxIndexTuplesPerPage];
1170  itemIdCompact itemidptr;
1171  ItemId lp;
1172  int nline,
1173  nused;
1174  Size totallen;
1175  Size size;
1176  unsigned offset;
1177  int nextitm;
1178  OffsetNumber offnum;
1179  bool presorted = true; /* For now */
1180 
1182 
1183  /*
1184  * If there aren't very many items to delete, then retail
1185  * PageIndexTupleDelete is the best way. Delete the items in reverse
1186  * order so we don't have to think about adjusting item numbers for
1187  * previous deletions.
1188  *
1189  * TODO: tune the magic number here
1190  */
1191  if (nitems <= 2)
1192  {
1193  while (--nitems >= 0)
1194  PageIndexTupleDelete(page, itemnos[nitems]);
1195  return;
1196  }
1197 
1198  /*
1199  * As with PageRepairFragmentation, paranoia seems justified.
1200  */
1201  if (pd_lower < SizeOfPageHeaderData ||
1202  pd_lower > pd_upper ||
1203  pd_upper > pd_special ||
1204  pd_special > BLCKSZ ||
1205  pd_special != MAXALIGN(pd_special))
1206  ereport(ERROR,
1208  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1209  pd_lower, pd_upper, pd_special)));
1210 
1211  /*
1212  * Scan the line pointer array and build a list of just the ones we are
1213  * going to keep. Notice we do not modify the page yet, since we are
1214  * still validity-checking.
1215  */
1216  nline = PageGetMaxOffsetNumber(page);
1217  itemidptr = itemidbase;
1218  totallen = 0;
1219  nused = 0;
1220  nextitm = 0;
1221  last_offset = pd_special;
1222  for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1223  {
1224  lp = PageGetItemId(page, offnum);
1225  Assert(ItemIdHasStorage(lp));
1226  size = ItemIdGetLength(lp);
1227  offset = ItemIdGetOffset(lp);
1228  if (offset < pd_upper ||
1229  (offset + size) > pd_special ||
1230  offset != MAXALIGN(offset))
1231  ereport(ERROR,
1233  errmsg("corrupted line pointer: offset = %u, size = %u",
1234  offset, (unsigned int) size)));
1235 
1236  if (nextitm < nitems && offnum == itemnos[nextitm])
1237  {
1238  /* skip item to be deleted */
1239  nextitm++;
1240  }
1241  else
1242  {
1243  itemidptr->offsetindex = nused; /* where it will go */
1244  itemidptr->itemoff = offset;
1245 
1246  if (last_offset > itemidptr->itemoff)
1247  last_offset = itemidptr->itemoff;
1248  else
1249  presorted = false;
1250 
1251  itemidptr->alignedlen = MAXALIGN(size);
1252  totallen += itemidptr->alignedlen;
1253  newitemids[nused] = *lp;
1254  itemidptr++;
1255  nused++;
1256  }
1257  }
1258 
1259  /* this will catch invalid or out-of-order itemnos[] */
1260  if (nextitm != nitems)
1261  elog(ERROR, "incorrect index offsets supplied");
1262 
1263  if (totallen > (Size) (pd_special - pd_lower))
1264  ereport(ERROR,
1266  errmsg("corrupted item lengths: total %u, available space %u",
1267  (unsigned int) totallen, pd_special - pd_lower)));
1268 
1269  /*
1270  * Looks good. Overwrite the line pointers with the copy, from which we've
1271  * removed all the unused items.
1272  */
1273  memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
1274  phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
1275 
1276  /* and compactify the tuple data */
1277  if (nused > 0)
1278  compactify_tuples(itemidbase, nused, page, presorted);
1279  else
1280  phdr->pd_upper = pd_special;
1281 }
1282 
1283 
1284 /*
1285  * PageIndexTupleDeleteNoCompact
1286  *
1287  * Remove the specified tuple from an index page, but set its line pointer
1288  * to "unused" instead of compacting it out, except that it can be removed
1289  * if it's the last line pointer on the page.
1290  *
1291  * This is used for index AMs that require that existing TIDs of live tuples
1292  * remain unchanged, and are willing to allow unused line pointers instead.
1293  */
1294 void
1296 {
1297  PageHeader phdr = (PageHeader) page;
1298  char *addr;
1299  ItemId tup;
1300  Size size;
1301  unsigned offset;
1302  int nline;
1303 
1304  /*
1305  * As with PageRepairFragmentation, paranoia seems justified.
1306  */
1307  if (phdr->pd_lower < SizeOfPageHeaderData ||
1308  phdr->pd_lower > phdr->pd_upper ||
1309  phdr->pd_upper > phdr->pd_special ||
1310  phdr->pd_special > BLCKSZ ||
1311  phdr->pd_special != MAXALIGN(phdr->pd_special))
1312  ereport(ERROR,
1314  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1315  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1316 
1317  nline = PageGetMaxOffsetNumber(page);
1318  if ((int) offnum <= 0 || (int) offnum > nline)
1319  elog(ERROR, "invalid index offnum: %u", offnum);
1320 
1321  tup = PageGetItemId(page, offnum);
1322  Assert(ItemIdHasStorage(tup));
1323  size = ItemIdGetLength(tup);
1324  offset = ItemIdGetOffset(tup);
1325 
1326  if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1327  offset != MAXALIGN(offset))
1328  ereport(ERROR,
1330  errmsg("corrupted line pointer: offset = %u, size = %u",
1331  offset, (unsigned int) size)));
1332 
1333  /* Amount of space to actually be deleted */
1334  size = MAXALIGN(size);
1335 
1336  /*
1337  * Either set the line pointer to "unused", or zap it if it's the last
1338  * one. (Note: it's possible that the next-to-last one(s) are already
1339  * unused, but we do not trouble to try to compact them out if so.)
1340  */
1341  if ((int) offnum < nline)
1342  ItemIdSetUnused(tup);
1343  else
1344  {
1345  phdr->pd_lower -= sizeof(ItemIdData);
1346  nline--; /* there's one less than when we started */
1347  }
1348 
1349  /*
1350  * Now move everything between the old upper bound (beginning of tuple
1351  * space) and the beginning of the deleted tuple forward, so that space in
1352  * the middle of the page is left free. If we've just deleted the tuple
1353  * at the beginning of tuple space, then there's no need to do the copy.
1354  */
1355 
1356  /* beginning of tuple space */
1357  addr = (char *) page + phdr->pd_upper;
1358 
1359  if (offset > phdr->pd_upper)
1360  memmove(addr + size, addr, offset - phdr->pd_upper);
1361 
1362  /* adjust free space boundary pointer */
1363  phdr->pd_upper += size;
1364 
1365  /*
1366  * Finally, we need to adjust the linp entries that remain.
1367  *
1368  * Anything that used to be before the deleted tuple's data was moved
1369  * forward by the size of the deleted tuple.
1370  */
1371  if (!PageIsEmpty(page))
1372  {
1373  int i;
1374 
1375  for (i = 1; i <= nline; i++)
1376  {
1377  ItemId ii = PageGetItemId(page, i);
1378 
1379  if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1380  ii->lp_off += size;
1381  }
1382  }
1383 }
1384 
1385 
1386 /*
1387  * PageIndexTupleOverwrite
1388  *
1389  * Replace a specified tuple on an index page.
1390  *
1391  * The new tuple is placed exactly where the old one had been, shifting
1392  * other tuples' data up or down as needed to keep the page compacted.
1393  * This is better than deleting and reinserting the tuple, because it
1394  * avoids any data shifting when the tuple size doesn't change; and
1395  * even when it does, we avoid moving the line pointers around.
1396  * This could be used by an index AM that doesn't want to unset the
1397  * LP_DEAD bit when it happens to be set. It could conceivably also be
1398  * used by an index AM that cares about the physical order of tuples as
1399  * well as their logical/ItemId order.
1400  *
1401  * If there's insufficient space for the new tuple, return false. Other
1402  * errors represent data-corruption problems, so we just elog.
1403  */
1404 bool
1406  Item newtup, Size newsize)
1407 {
1408  PageHeader phdr = (PageHeader) page;
1409  ItemId tupid;
1410  int oldsize;
1411  unsigned offset;
1412  Size alignednewsize;
1413  int size_diff;
1414  int itemcount;
1415 
1416  /*
1417  * As with PageRepairFragmentation, paranoia seems justified.
1418  */
1419  if (phdr->pd_lower < SizeOfPageHeaderData ||
1420  phdr->pd_lower > phdr->pd_upper ||
1421  phdr->pd_upper > phdr->pd_special ||
1422  phdr->pd_special > BLCKSZ ||
1423  phdr->pd_special != MAXALIGN(phdr->pd_special))
1424  ereport(ERROR,
1426  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1427  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1428 
1429  itemcount = PageGetMaxOffsetNumber(page);
1430  if ((int) offnum <= 0 || (int) offnum > itemcount)
1431  elog(ERROR, "invalid index offnum: %u", offnum);
1432 
1433  tupid = PageGetItemId(page, offnum);
1434  Assert(ItemIdHasStorage(tupid));
1435  oldsize = ItemIdGetLength(tupid);
1436  offset = ItemIdGetOffset(tupid);
1437 
1438  if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
1439  offset != MAXALIGN(offset))
1440  ereport(ERROR,
1442  errmsg("corrupted line pointer: offset = %u, size = %u",
1443  offset, (unsigned int) oldsize)));
1444 
1445  /*
1446  * Determine actual change in space requirement, check for page overflow.
1447  */
1448  oldsize = MAXALIGN(oldsize);
1449  alignednewsize = MAXALIGN(newsize);
1450  if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
1451  return false;
1452 
1453  /*
1454  * Relocate existing data and update line pointers, unless the new tuple
1455  * is the same size as the old (after alignment), in which case there's
1456  * nothing to do. Notice that what we have to relocate is data before the
1457  * target tuple, not data after, so it's convenient to express size_diff
1458  * as the amount by which the tuple's size is decreasing, making it the
1459  * delta to add to pd_upper and affected line pointers.
1460  */
1461  size_diff = oldsize - (int) alignednewsize;
1462  if (size_diff != 0)
1463  {
1464  char *addr = (char *) page + phdr->pd_upper;
1465  int i;
1466 
1467  /* relocate all tuple data before the target tuple */
1468  memmove(addr + size_diff, addr, offset - phdr->pd_upper);
1469 
1470  /* adjust free space boundary pointer */
1471  phdr->pd_upper += size_diff;
1472 
1473  /* adjust affected line pointers too */
1474  for (i = FirstOffsetNumber; i <= itemcount; i++)
1475  {
1476  ItemId ii = PageGetItemId(page, i);
1477 
1478  /* Allow items without storage; currently only BRIN needs that */
1479  if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1480  ii->lp_off += size_diff;
1481  }
1482  }
1483 
1484  /* Update the item's tuple length without changing its lp_flags field */
1485  tupid->lp_off = offset + size_diff;
1486  tupid->lp_len = newsize;
1487 
1488  /* Copy new tuple data onto page */
1489  memcpy(PageGetItem(page, tupid), newtup, newsize);
1490 
1491  return true;
1492 }
1493 
1494 
1495 /*
1496  * Set checksum for a page in shared buffers.
1497  *
1498  * If checksums are disabled, or if the page is not initialized, just return
1499  * the input. Otherwise, we must make a copy of the page before calculating
1500  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
1501  * from making the final checksum invalid. It doesn't matter if we include or
1502  * exclude hints during the copy, as long as we write a valid page and
1503  * associated checksum.
1504  *
1505  * Returns a pointer to the block-sized data that needs to be written. Uses
1506  * statically-allocated memory, so the caller must immediately write the
1507  * returned page and not refer to it again.
1508  */
1509 char *
1511 {
1512  static char *pageCopy = NULL;
1513 
1514  /* If we don't need a checksum, just return the passed-in data */
1515  if (PageIsNew(page) || !DataChecksumsEnabled())
1516  return (char *) page;
1517 
1518  /*
1519  * We allocate the copy space once and use it over on each subsequent
1520  * call. The point of palloc'ing here, rather than having a static char
1521  * array, is first to ensure adequate alignment for the checksumming code
1522  * and second to avoid wasting space in processes that never call this.
1523  */
1524  if (pageCopy == NULL)
1526  BLCKSZ,
1528  0);
1529 
1530  memcpy(pageCopy, (char *) page, BLCKSZ);
1531  ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
1532  return pageCopy;
1533 }
1534 
1535 /*
1536  * Set checksum for a page in private memory.
1537  *
1538  * This must only be used when we know that no other process can be modifying
1539  * the page buffer.
1540  */
1541 void
1543 {
1544  /* If we don't need a checksum, just return */
1545  if (PageIsNew(page) || !DataChecksumsEnabled())
1546  return;
1547 
1548  ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
1549 }
uint32 BlockNumber
Definition: block.h:31
struct itemIdCompactData itemIdCompactData
static void compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
Definition: bufpage.c:474
bool ignore_checksum_failure
Definition: bufpage.c:27
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:424
Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
Definition: bufpage.c:934
itemIdCompactData * itemIdCompact
Definition: bufpage.c:443
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags)
Definition: bufpage.c:194
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:402
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1542
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1510
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1052
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
Page PageGetTempPageCopy(Page page)
Definition: bufpage.c:382
Page PageGetTempPage(Page page)
Definition: bufpage.c:365
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:1295
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:958
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
#define PD_VALID_FLAG_BITS
Definition: bufpage.h:188
PageHeaderData * PageHeader
Definition: bufpage.h:170
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
Pointer Page
Definition: bufpage.h:78
static void PageSetHasFreeLinePointers(Page page)
Definition: bufpage.h:399
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
#define PIV_LOG_WARNING
Definition: bufpage.h:465
static Size PageGetPageSize(Page page)
Definition: bufpage.h:273
#define SizeOfPageHeaderData
Definition: bufpage.h:213
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
#define PG_PAGE_LAYOUT_VERSION
Definition: bufpage.h:202
static char * PageGetSpecialPointer(Page page)
Definition: bufpage.h:336
static void PageClearHasFreeLinePointers(Page page)
Definition: bufpage.h:404
static bool PageHasFreeLinePointers(Page page)
Definition: bufpage.h:394
uint16 LocationIndex
Definition: bufpage.h:87
#define PIV_REPORT_STAT
Definition: bufpage.h:466
static void PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
Definition: bufpage.h:296
#define PAI_IS_HEAP
Definition: bufpage.h:462
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define PAI_OVERWRITE
Definition: bufpage.h:461
static uint16 PageGetSpecialSize(Page page)
Definition: bufpage.h:313
unsigned short uint16
Definition: c.h:492
signed short int16
Definition: c.h:480
#define MAXALIGN(LEN)
Definition: c.h:798
#define unlikely(x)
Definition: c.h:298
#define MemSet(start, val, len)
Definition: c.h:1007
signed int Offset
Definition: c.h:611
size_t Size
Definition: c.h:592
uint16 pg_checksum_page(char *page, BlockNumber blkno)
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define nitems(x)
Definition: indent.h:31
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Pointer Item
Definition: item.h:17
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdGetOffset(itemId)
Definition: itemid.h:65
struct ItemIdData ItemIdData
#define ItemIdSetNormal(itemId, off, len)
Definition: itemid.h:140
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
#define MaxIndexTuplesPerPage
Definition: itup.h:165
Assert(fmt[strlen(fmt) - 1] !='\n')
void * MemoryContextAllocAligned(MemoryContext context, Size size, Size alignto, int flags)
Definition: mcxt.c:1396
void pfree(void *pointer)
Definition: mcxt.c:1508
MemoryContext TopMemoryContext
Definition: mcxt.c:137
void * palloc(Size size)
Definition: mcxt.c:1304
#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size)
Definition: memdebug.h:23
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
static bool checksum_failure
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define PG_IO_ALIGN_SIZE
void pgstat_report_checksum_failure(void)
static pg_noinline void Size size
Definition: slab.c:607
unsigned lp_len
Definition: itemid.h:29
unsigned lp_off
Definition: itemid.h:27
LocationIndex pd_special
Definition: bufpage.h:164
LocationIndex pd_upper
Definition: bufpage.h:163
uint16 pd_flags
Definition: bufpage.h:161
uint16 pd_checksum
Definition: bufpage.h:160
LocationIndex pd_lower
Definition: bufpage.h:162
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]
Definition: bufpage.h:167
uint16 offsetindex
Definition: bufpage.c:439
uint16 alignedlen
Definition: bufpage.c:441
char data[BLCKSZ]
Definition: c.h:1106
bool DataChecksumsEnabled(void)
Definition: xlog.c:4478