PostgreSQL Source Code  git master
bufpage.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * bufpage.c
4  * POSTGRES standard buffer page code.
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/storage/page/bufpage.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/htup_details.h"
18 #include "access/itup.h"
19 #include "access/xlog.h"
20 #include "pgstat.h"
21 #include "storage/checksum.h"
22 #include "utils/memdebug.h"
23 #include "utils/memutils.h"
24 
25 
26 /* GUC variable */
28 
29 
30 /* ----------------------------------------------------------------
31  * Page support functions
32  * ----------------------------------------------------------------
33  */
34 
35 /*
36  * PageInit
37  * Initializes the contents of a page.
38  * Note that we don't calculate an initial checksum here; that's not done
39  * until it's time to write.
40  */
41 void
42 PageInit(Page page, Size pageSize, Size specialSize)
43 {
44  PageHeader p = (PageHeader) page;
45 
46  specialSize = MAXALIGN(specialSize);
47 
48  Assert(pageSize == BLCKSZ);
49  Assert(pageSize > specialSize + SizeOfPageHeaderData);
50 
51  /* Make sure all fields of page are zero, as well as unused space */
52  MemSet(p, 0, pageSize);
53 
54  p->pd_flags = 0;
56  p->pd_upper = pageSize - specialSize;
57  p->pd_special = pageSize - specialSize;
59  /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
60 }
61 
62 
63 /*
64  * PageIsVerified
65  * Check that the page header and checksum (if any) appear valid.
66  *
67  * This is called when a page has just been read in from disk. The idea is
68  * to cheaply detect trashed pages before we go nuts following bogus line
69  * pointers, testing invalid transaction identifiers, etc.
70  *
71  * It turns out to be necessary to allow zeroed pages here too. Even though
72  * this routine is *not* called when deliberately adding a page to a relation,
73  * there are scenarios in which a zeroed page might be found in a table.
74  * (Example: a backend extends a relation, then crashes before it can write
75  * any WAL entry about the new page. The kernel will already have the
76  * zeroed page in the file, and it will stay that way after restart.) So we
77  * allow zeroed pages here, and are careful that the page access macros
78  * treat such a page as empty and without free space. Eventually, VACUUM
79  * will clean up such a page and make it usable.
80  */
81 bool
83 {
84  PageHeader p = (PageHeader) page;
85  size_t *pagebytes;
86  int i;
87  bool checksum_failure = false;
88  bool header_sane = false;
89  bool all_zeroes = false;
90  uint16 checksum = 0;
91 
92  /*
93  * Don't verify page data unless the page passes basic non-zero test
94  */
95  if (!PageIsNew(page))
96  {
98  {
99  checksum = pg_checksum_page((char *) page, blkno);
100 
101  if (checksum != p->pd_checksum)
102  checksum_failure = true;
103  }
104 
105  /*
106  * The following checks don't prove the header is correct, only that
107  * it looks sane enough to allow into the buffer pool. Later usage of
108  * the block can still reveal problems, which is why we offer the
109  * checksum option.
110  */
111  if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
112  p->pd_lower <= p->pd_upper &&
113  p->pd_upper <= p->pd_special &&
114  p->pd_special <= BLCKSZ &&
115  p->pd_special == MAXALIGN(p->pd_special))
116  header_sane = true;
117 
118  if (header_sane && !checksum_failure)
119  return true;
120  }
121 
122  /* Check all-zeroes case */
123  all_zeroes = true;
124  pagebytes = (size_t *) page;
125  for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
126  {
127  if (pagebytes[i] != 0)
128  {
129  all_zeroes = false;
130  break;
131  }
132  }
133 
134  if (all_zeroes)
135  return true;
136 
137  /*
138  * Throw a WARNING if the checksum fails, but only after we've checked for
139  * the all-zeroes case.
140  */
141  if (checksum_failure)
142  {
145  errmsg("page verification failed, calculated checksum %u but expected %u",
146  checksum, p->pd_checksum)));
147 
149 
150  if (header_sane && ignore_checksum_failure)
151  return true;
152  }
153 
154  return false;
155 }
156 
157 
158 /*
159  * PageAddItemExtended
160  *
161  * Add an item to a page. Return value is the offset at which it was
162  * inserted, or InvalidOffsetNumber if the item is not inserted for any
163  * reason. A WARNING is issued indicating the reason for the refusal.
164  *
165  * offsetNumber must be either InvalidOffsetNumber to specify finding a
166  * free line pointer, or a value between FirstOffsetNumber and one past
167  * the last existing item, to specify using that particular line pointer.
168  *
169  * If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
170  * the item at the specified offsetNumber, which must be either a
171  * currently-unused line pointer, or one past the last existing item.
172  *
173  * If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
174  * the item at the specified offsetNumber, moving existing items later
175  * in the array to make room.
176  *
177  * If offsetNumber is not valid, then assign a slot by finding the first
178  * one that is both unused and deallocated.
179  *
180  * If flag PAI_IS_HEAP is set, we enforce that there can't be more than
181  * MaxHeapTuplesPerPage line pointers on the page.
182  *
183  * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
184  */
187  Item item,
188  Size size,
189  OffsetNumber offsetNumber,
190  int flags)
191 {
192  PageHeader phdr = (PageHeader) page;
193  Size alignedSize;
194  int lower;
195  int upper;
196  ItemId itemId;
197  OffsetNumber limit;
198  bool needshuffle = false;
199 
200  /*
201  * Be wary about corrupted page pointers
202  */
203  if (phdr->pd_lower < SizeOfPageHeaderData ||
204  phdr->pd_lower > phdr->pd_upper ||
205  phdr->pd_upper > phdr->pd_special ||
206  phdr->pd_special > BLCKSZ)
207  ereport(PANIC,
209  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
210  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
211 
212  /*
213  * Select offsetNumber to place the new item at
214  */
216 
217  /* was offsetNumber passed in? */
218  if (OffsetNumberIsValid(offsetNumber))
219  {
220  /* yes, check it */
221  if ((flags & PAI_OVERWRITE) != 0)
222  {
223  if (offsetNumber < limit)
224  {
225  itemId = PageGetItemId(phdr, offsetNumber);
226  if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
227  {
228  elog(WARNING, "will not overwrite a used ItemId");
229  return InvalidOffsetNumber;
230  }
231  }
232  }
233  else
234  {
235  if (offsetNumber < limit)
236  needshuffle = true; /* need to move existing linp's */
237  }
238  }
239  else
240  {
241  /* offsetNumber was not passed in, so find a free slot */
242  /* if no free slot, we'll put it at limit (1st open slot) */
243  if (PageHasFreeLinePointers(phdr))
244  {
245  /*
246  * Look for "recyclable" (unused) ItemId. We check for no storage
247  * as well, just to be paranoid --- unused items should never have
248  * storage.
249  */
250  for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
251  {
252  itemId = PageGetItemId(phdr, offsetNumber);
253  if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
254  break;
255  }
256  if (offsetNumber >= limit)
257  {
258  /* the hint is wrong, so reset it */
260  }
261  }
262  else
263  {
264  /* don't bother searching if hint says there's no free slot */
265  offsetNumber = limit;
266  }
267  }
268 
269  /* Reject placing items beyond the first unused line pointer */
270  if (offsetNumber > limit)
271  {
272  elog(WARNING, "specified item offset is too large");
273  return InvalidOffsetNumber;
274  }
275 
276  /* Reject placing items beyond heap boundary, if heap */
277  if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
278  {
279  elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
280  return InvalidOffsetNumber;
281  }
282 
283  /*
284  * Compute new lower and upper pointers for page, see if it'll fit.
285  *
286  * Note: do arithmetic as signed ints, to avoid mistakes if, say,
287  * alignedSize > pd_upper.
288  */
289  if (offsetNumber == limit || needshuffle)
290  lower = phdr->pd_lower + sizeof(ItemIdData);
291  else
292  lower = phdr->pd_lower;
293 
294  alignedSize = MAXALIGN(size);
295 
296  upper = (int) phdr->pd_upper - (int) alignedSize;
297 
298  if (lower > upper)
299  return InvalidOffsetNumber;
300 
301  /*
302  * OK to insert the item. First, shuffle the existing pointers if needed.
303  */
304  itemId = PageGetItemId(phdr, offsetNumber);
305 
306  if (needshuffle)
307  memmove(itemId + 1, itemId,
308  (limit - offsetNumber) * sizeof(ItemIdData));
309 
310  /* set the line pointer */
311  ItemIdSetNormal(itemId, upper, size);
312 
313  /*
314  * Items normally contain no uninitialized bytes. Core bufpage consumers
315  * conform, but this is not a necessary coding rule; a new index AM could
316  * opt to depart from it. However, data type input functions and other
317  * C-language functions that synthesize datums should initialize all
318  * bytes; datumIsEqual() relies on this. Testing here, along with the
319  * similar check in printtup(), helps to catch such mistakes.
320  *
321  * Values of the "name" type retrieved via index-only scans may contain
322  * uninitialized bytes; see comment in btrescan(). Valgrind will report
323  * this as an error, but it is safe to ignore.
324  */
325  VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
326 
327  /* copy the item's data onto the page */
328  memcpy((char *) page + upper, item, size);
329 
330  /* adjust page header */
331  phdr->pd_lower = (LocationIndex) lower;
332  phdr->pd_upper = (LocationIndex) upper;
333 
334  return offsetNumber;
335 }
336 
337 
338 /*
339  * PageGetTempPage
340  * Get a temporary page in local memory for special processing.
341  * The returned page is not initialized at all; caller must do that.
342  */
343 Page
345 {
346  Size pageSize;
347  Page temp;
348 
349  pageSize = PageGetPageSize(page);
350  temp = (Page) palloc(pageSize);
351 
352  return temp;
353 }
354 
355 /*
356  * PageGetTempPageCopy
357  * Get a temporary page in local memory for special processing.
358  * The page is initialized by copying the contents of the given page.
359  */
360 Page
362 {
363  Size pageSize;
364  Page temp;
365 
366  pageSize = PageGetPageSize(page);
367  temp = (Page) palloc(pageSize);
368 
369  memcpy(temp, page, pageSize);
370 
371  return temp;
372 }
373 
374 /*
375  * PageGetTempPageCopySpecial
376  * Get a temporary page in local memory for special processing.
377  * The page is PageInit'd with the same special-space size as the
378  * given page, and the special space is copied from the given page.
379  */
380 Page
382 {
383  Size pageSize;
384  Page temp;
385 
386  pageSize = PageGetPageSize(page);
387  temp = (Page) palloc(pageSize);
388 
389  PageInit(temp, pageSize, PageGetSpecialSize(page));
390  memcpy(PageGetSpecialPointer(temp),
391  PageGetSpecialPointer(page),
392  PageGetSpecialSize(page));
393 
394  return temp;
395 }
396 
397 /*
398  * PageRestoreTempPage
399  * Copy temporary page back to permanent page after special processing
400  * and release the temporary page.
401  */
402 void
403 PageRestoreTempPage(Page tempPage, Page oldPage)
404 {
405  Size pageSize;
406 
407  pageSize = PageGetPageSize(tempPage);
408  memcpy((char *) oldPage, (char *) tempPage, pageSize);
409 
410  pfree(tempPage);
411 }
412 
413 /*
414  * sorting support for PageRepairFragmentation and PageIndexMultiDelete
415  */
416 typedef struct itemIdSortData
417 {
418  uint16 offsetindex; /* linp array index */
419  int16 itemoff; /* page offset of item data */
420  uint16 alignedlen; /* MAXALIGN(item data len) */
423 
424 static int
425 itemoffcompare(const void *itemidp1, const void *itemidp2)
426 {
427  /* Sort in decreasing itemoff order */
428  return ((itemIdSort) itemidp2)->itemoff -
429  ((itemIdSort) itemidp1)->itemoff;
430 }
431 
432 /*
433  * After removing or marking some line pointers unused, move the tuples to
434  * remove the gaps caused by the removed items.
435  */
436 static void
437 compactify_tuples(itemIdSort itemidbase, int nitems, Page page)
438 {
439  PageHeader phdr = (PageHeader) page;
440  Offset upper;
441  int i;
442 
443  /* sort itemIdSortData array into decreasing itemoff order */
444  qsort((char *) itemidbase, nitems, sizeof(itemIdSortData),
446 
447  upper = phdr->pd_special;
448  for (i = 0; i < nitems; i++)
449  {
450  itemIdSort itemidptr = &itemidbase[i];
451  ItemId lp;
452 
453  lp = PageGetItemId(page, itemidptr->offsetindex + 1);
454  upper -= itemidptr->alignedlen;
455  memmove((char *) page + upper,
456  (char *) page + itemidptr->itemoff,
457  itemidptr->alignedlen);
458  lp->lp_off = upper;
459  }
460 
461  phdr->pd_upper = upper;
462 }
463 
464 /*
465  * PageRepairFragmentation
466  *
467  * Frees fragmented space on a page.
468  * It doesn't remove unused line pointers! Please don't change this.
469  *
470  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
471  *
472  * As a side effect, the page's PD_HAS_FREE_LINES hint bit is updated.
473  */
474 void
476 {
477  Offset pd_lower = ((PageHeader) page)->pd_lower;
478  Offset pd_upper = ((PageHeader) page)->pd_upper;
479  Offset pd_special = ((PageHeader) page)->pd_special;
481  itemIdSort itemidptr;
482  ItemId lp;
483  int nline,
484  nstorage,
485  nunused;
486  int i;
487  Size totallen;
488 
489  /*
490  * It's worth the trouble to be more paranoid here than in most places,
491  * because we are about to reshuffle data in (what is usually) a shared
492  * disk buffer. If we aren't careful then corrupted pointers, lengths,
493  * etc could cause us to clobber adjacent disk buffers, spreading the data
494  * loss further. So, check everything.
495  */
496  if (pd_lower < SizeOfPageHeaderData ||
497  pd_lower > pd_upper ||
498  pd_upper > pd_special ||
499  pd_special > BLCKSZ ||
500  pd_special != MAXALIGN(pd_special))
501  ereport(ERROR,
503  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
504  pd_lower, pd_upper, pd_special)));
505 
506  /*
507  * Run through the line pointer array and collect data about live items.
508  */
509  nline = PageGetMaxOffsetNumber(page);
510  itemidptr = itemidbase;
511  nunused = totallen = 0;
512  for (i = FirstOffsetNumber; i <= nline; i++)
513  {
514  lp = PageGetItemId(page, i);
515  if (ItemIdIsUsed(lp))
516  {
517  if (ItemIdHasStorage(lp))
518  {
519  itemidptr->offsetindex = i - 1;
520  itemidptr->itemoff = ItemIdGetOffset(lp);
521  if (unlikely(itemidptr->itemoff < (int) pd_upper ||
522  itemidptr->itemoff >= (int) pd_special))
523  ereport(ERROR,
525  errmsg("corrupted line pointer: %u",
526  itemidptr->itemoff)));
527  itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
528  totallen += itemidptr->alignedlen;
529  itemidptr++;
530  }
531  }
532  else
533  {
534  /* Unused entries should have lp_len = 0, but make sure */
535  ItemIdSetUnused(lp);
536  nunused++;
537  }
538  }
539 
540  nstorage = itemidptr - itemidbase;
541  if (nstorage == 0)
542  {
543  /* Page is completely empty, so just reset it quickly */
544  ((PageHeader) page)->pd_upper = pd_special;
545  }
546  else
547  {
548  /* Need to compact the page the hard way */
549  if (totallen > (Size) (pd_special - pd_lower))
550  ereport(ERROR,
552  errmsg("corrupted item lengths: total %u, available space %u",
553  (unsigned int) totallen, pd_special - pd_lower)));
554 
555  compactify_tuples(itemidbase, nstorage, page);
556  }
557 
558  /* Set hint bit for PageAddItem */
559  if (nunused > 0)
561  else
563 }
564 
565 /*
566  * PageGetFreeSpace
567  * Returns the size of the free (allocatable) space on a page,
568  * reduced by the space needed for a new line pointer.
569  *
570  * Note: this should usually only be used on index pages. Use
571  * PageGetHeapFreeSpace on heap pages.
572  */
573 Size
575 {
576  int space;
577 
578  /*
579  * Use signed arithmetic here so that we behave sensibly if pd_lower >
580  * pd_upper.
581  */
582  space = (int) ((PageHeader) page)->pd_upper -
583  (int) ((PageHeader) page)->pd_lower;
584 
585  if (space < (int) sizeof(ItemIdData))
586  return 0;
587  space -= sizeof(ItemIdData);
588 
589  return (Size) space;
590 }
591 
592 /*
593  * PageGetFreeSpaceForMultipleTuples
594  * Returns the size of the free (allocatable) space on a page,
595  * reduced by the space needed for multiple new line pointers.
596  *
597  * Note: this should usually only be used on index pages. Use
598  * PageGetHeapFreeSpace on heap pages.
599  */
600 Size
602 {
603  int space;
604 
605  /*
606  * Use signed arithmetic here so that we behave sensibly if pd_lower >
607  * pd_upper.
608  */
609  space = (int) ((PageHeader) page)->pd_upper -
610  (int) ((PageHeader) page)->pd_lower;
611 
612  if (space < (int) (ntups * sizeof(ItemIdData)))
613  return 0;
614  space -= ntups * sizeof(ItemIdData);
615 
616  return (Size) space;
617 }
618 
619 /*
620  * PageGetExactFreeSpace
621  * Returns the size of the free (allocatable) space on a page,
622  * without any consideration for adding/removing line pointers.
623  */
624 Size
626 {
627  int space;
628 
629  /*
630  * Use signed arithmetic here so that we behave sensibly if pd_lower >
631  * pd_upper.
632  */
633  space = (int) ((PageHeader) page)->pd_upper -
634  (int) ((PageHeader) page)->pd_lower;
635 
636  if (space < 0)
637  return 0;
638 
639  return (Size) space;
640 }
641 
642 
643 /*
644  * PageGetHeapFreeSpace
645  * Returns the size of the free (allocatable) space on a page,
646  * reduced by the space needed for a new line pointer.
647  *
648  * The difference between this and PageGetFreeSpace is that this will return
649  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
650  * and none are free. We use this to enforce that no more than
651  * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
652  * no more tuples than that could fit anyway, in the presence of redirected
653  * or dead line pointers it'd be possible to have too many line pointers.
654  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
655  * on the number of line pointers, we make this extra check.)
656  */
657 Size
659 {
660  Size space;
661 
662  space = PageGetFreeSpace(page);
663  if (space > 0)
664  {
665  OffsetNumber offnum,
666  nline;
667 
668  /*
669  * Are there already MaxHeapTuplesPerPage line pointers in the page?
670  */
671  nline = PageGetMaxOffsetNumber(page);
672  if (nline >= MaxHeapTuplesPerPage)
673  {
675  {
676  /*
677  * Since this is just a hint, we must confirm that there is
678  * indeed a free line pointer
679  */
680  for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
681  {
682  ItemId lp = PageGetItemId(page, offnum);
683 
684  if (!ItemIdIsUsed(lp))
685  break;
686  }
687 
688  if (offnum > nline)
689  {
690  /*
691  * The hint is wrong, but we can't clear it here since we
692  * don't have the ability to mark the page dirty.
693  */
694  space = 0;
695  }
696  }
697  else
698  {
699  /*
700  * Although the hint might be wrong, PageAddItem will believe
701  * it anyway, so we must believe it too.
702  */
703  space = 0;
704  }
705  }
706  }
707  return space;
708 }
709 
710 
711 /*
712  * PageIndexTupleDelete
713  *
714  * This routine does the work of removing a tuple from an index page.
715  *
716  * Unlike heap pages, we compact out the line pointer for the removed tuple.
717  */
718 void
720 {
721  PageHeader phdr = (PageHeader) page;
722  char *addr;
723  ItemId tup;
724  Size size;
725  unsigned offset;
726  int nbytes;
727  int offidx;
728  int nline;
729 
730  /*
731  * As with PageRepairFragmentation, paranoia seems justified.
732  */
733  if (phdr->pd_lower < SizeOfPageHeaderData ||
734  phdr->pd_lower > phdr->pd_upper ||
735  phdr->pd_upper > phdr->pd_special ||
736  phdr->pd_special > BLCKSZ ||
737  phdr->pd_special != MAXALIGN(phdr->pd_special))
738  ereport(ERROR,
740  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
741  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
742 
743  nline = PageGetMaxOffsetNumber(page);
744  if ((int) offnum <= 0 || (int) offnum > nline)
745  elog(ERROR, "invalid index offnum: %u", offnum);
746 
747  /* change offset number to offset index */
748  offidx = offnum - 1;
749 
750  tup = PageGetItemId(page, offnum);
751  Assert(ItemIdHasStorage(tup));
752  size = ItemIdGetLength(tup);
753  offset = ItemIdGetOffset(tup);
754 
755  if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
756  offset != MAXALIGN(offset))
757  ereport(ERROR,
759  errmsg("corrupted line pointer: offset = %u, size = %u",
760  offset, (unsigned int) size)));
761 
762  /* Amount of space to actually be deleted */
763  size = MAXALIGN(size);
764 
765  /*
766  * First, we want to get rid of the pd_linp entry for the index tuple. We
767  * copy all subsequent linp's back one slot in the array. We don't use
768  * PageGetItemId, because we are manipulating the _array_, not individual
769  * linp's.
770  */
771  nbytes = phdr->pd_lower -
772  ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
773 
774  if (nbytes > 0)
775  memmove((char *) &(phdr->pd_linp[offidx]),
776  (char *) &(phdr->pd_linp[offidx + 1]),
777  nbytes);
778 
779  /*
780  * Now move everything between the old upper bound (beginning of tuple
781  * space) and the beginning of the deleted tuple forward, so that space in
782  * the middle of the page is left free. If we've just deleted the tuple
783  * at the beginning of tuple space, then there's no need to do the copy.
784  */
785 
786  /* beginning of tuple space */
787  addr = (char *) page + phdr->pd_upper;
788 
789  if (offset > phdr->pd_upper)
790  memmove(addr + size, addr, offset - phdr->pd_upper);
791 
792  /* adjust free space boundary pointers */
793  phdr->pd_upper += size;
794  phdr->pd_lower -= sizeof(ItemIdData);
795 
796  /*
797  * Finally, we need to adjust the linp entries that remain.
798  *
799  * Anything that used to be before the deleted tuple's data was moved
800  * forward by the size of the deleted tuple.
801  */
802  if (!PageIsEmpty(page))
803  {
804  int i;
805 
806  nline--; /* there's one less than when we started */
807  for (i = 1; i <= nline; i++)
808  {
809  ItemId ii = PageGetItemId(phdr, i);
810 
812  if (ItemIdGetOffset(ii) <= offset)
813  ii->lp_off += size;
814  }
815  }
816 }
817 
818 
819 /*
820  * PageIndexMultiDelete
821  *
822  * This routine handles the case of deleting multiple tuples from an
823  * index page at once. It is considerably faster than a loop around
824  * PageIndexTupleDelete ... however, the caller *must* supply the array
825  * of item numbers to be deleted in item number order!
826  */
827 void
828 PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
829 {
830  PageHeader phdr = (PageHeader) page;
831  Offset pd_lower = phdr->pd_lower;
832  Offset pd_upper = phdr->pd_upper;
833  Offset pd_special = phdr->pd_special;
835  ItemIdData newitemids[MaxIndexTuplesPerPage];
836  itemIdSort itemidptr;
837  ItemId lp;
838  int nline,
839  nused;
840  Size totallen;
841  Size size;
842  unsigned offset;
843  int nextitm;
844  OffsetNumber offnum;
845 
846  Assert(nitems <= MaxIndexTuplesPerPage);
847 
848  /*
849  * If there aren't very many items to delete, then retail
850  * PageIndexTupleDelete is the best way. Delete the items in reverse
851  * order so we don't have to think about adjusting item numbers for
852  * previous deletions.
853  *
854  * TODO: tune the magic number here
855  */
856  if (nitems <= 2)
857  {
858  while (--nitems >= 0)
859  PageIndexTupleDelete(page, itemnos[nitems]);
860  return;
861  }
862 
863  /*
864  * As with PageRepairFragmentation, paranoia seems justified.
865  */
866  if (pd_lower < SizeOfPageHeaderData ||
867  pd_lower > pd_upper ||
868  pd_upper > pd_special ||
869  pd_special > BLCKSZ ||
870  pd_special != MAXALIGN(pd_special))
871  ereport(ERROR,
873  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
874  pd_lower, pd_upper, pd_special)));
875 
876  /*
877  * Scan the line pointer array and build a list of just the ones we are
878  * going to keep. Notice we do not modify the page yet, since we are
879  * still validity-checking.
880  */
881  nline = PageGetMaxOffsetNumber(page);
882  itemidptr = itemidbase;
883  totallen = 0;
884  nused = 0;
885  nextitm = 0;
886  for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
887  {
888  lp = PageGetItemId(page, offnum);
890  size = ItemIdGetLength(lp);
891  offset = ItemIdGetOffset(lp);
892  if (offset < pd_upper ||
893  (offset + size) > pd_special ||
894  offset != MAXALIGN(offset))
895  ereport(ERROR,
897  errmsg("corrupted line pointer: offset = %u, size = %u",
898  offset, (unsigned int) size)));
899 
900  if (nextitm < nitems && offnum == itemnos[nextitm])
901  {
902  /* skip item to be deleted */
903  nextitm++;
904  }
905  else
906  {
907  itemidptr->offsetindex = nused; /* where it will go */
908  itemidptr->itemoff = offset;
909  itemidptr->alignedlen = MAXALIGN(size);
910  totallen += itemidptr->alignedlen;
911  newitemids[nused] = *lp;
912  itemidptr++;
913  nused++;
914  }
915  }
916 
917  /* this will catch invalid or out-of-order itemnos[] */
918  if (nextitm != nitems)
919  elog(ERROR, "incorrect index offsets supplied");
920 
921  if (totallen > (Size) (pd_special - pd_lower))
922  ereport(ERROR,
924  errmsg("corrupted item lengths: total %u, available space %u",
925  (unsigned int) totallen, pd_special - pd_lower)));
926 
927  /*
928  * Looks good. Overwrite the line pointers with the copy, from which we've
929  * removed all the unused items.
930  */
931  memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
932  phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
933 
934  /* and compactify the tuple data */
935  compactify_tuples(itemidbase, nused, page);
936 }
937 
938 
939 /*
940  * PageIndexTupleDeleteNoCompact
941  *
942  * Remove the specified tuple from an index page, but set its line pointer
943  * to "unused" instead of compacting it out, except that it can be removed
944  * if it's the last line pointer on the page.
945  *
946  * This is used for index AMs that require that existing TIDs of live tuples
947  * remain unchanged, and are willing to allow unused line pointers instead.
948  */
949 void
951 {
952  PageHeader phdr = (PageHeader) page;
953  char *addr;
954  ItemId tup;
955  Size size;
956  unsigned offset;
957  int nline;
958 
959  /*
960  * As with PageRepairFragmentation, paranoia seems justified.
961  */
962  if (phdr->pd_lower < SizeOfPageHeaderData ||
963  phdr->pd_lower > phdr->pd_upper ||
964  phdr->pd_upper > phdr->pd_special ||
965  phdr->pd_special > BLCKSZ ||
966  phdr->pd_special != MAXALIGN(phdr->pd_special))
967  ereport(ERROR,
969  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
970  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
971 
972  nline = PageGetMaxOffsetNumber(page);
973  if ((int) offnum <= 0 || (int) offnum > nline)
974  elog(ERROR, "invalid index offnum: %u", offnum);
975 
976  tup = PageGetItemId(page, offnum);
977  Assert(ItemIdHasStorage(tup));
978  size = ItemIdGetLength(tup);
979  offset = ItemIdGetOffset(tup);
980 
981  if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
982  offset != MAXALIGN(offset))
983  ereport(ERROR,
985  errmsg("corrupted line pointer: offset = %u, size = %u",
986  offset, (unsigned int) size)));
987 
988  /* Amount of space to actually be deleted */
989  size = MAXALIGN(size);
990 
991  /*
992  * Either set the line pointer to "unused", or zap it if it's the last
993  * one. (Note: it's possible that the next-to-last one(s) are already
994  * unused, but we do not trouble to try to compact them out if so.)
995  */
996  if ((int) offnum < nline)
997  ItemIdSetUnused(tup);
998  else
999  {
1000  phdr->pd_lower -= sizeof(ItemIdData);
1001  nline--; /* there's one less than when we started */
1002  }
1003 
1004  /*
1005  * Now move everything between the old upper bound (beginning of tuple
1006  * space) and the beginning of the deleted tuple forward, so that space in
1007  * the middle of the page is left free. If we've just deleted the tuple
1008  * at the beginning of tuple space, then there's no need to do the copy.
1009  */
1010 
1011  /* beginning of tuple space */
1012  addr = (char *) page + phdr->pd_upper;
1013 
1014  if (offset > phdr->pd_upper)
1015  memmove(addr + size, addr, offset - phdr->pd_upper);
1016 
1017  /* adjust free space boundary pointer */
1018  phdr->pd_upper += size;
1019 
1020  /*
1021  * Finally, we need to adjust the linp entries that remain.
1022  *
1023  * Anything that used to be before the deleted tuple's data was moved
1024  * forward by the size of the deleted tuple.
1025  */
1026  if (!PageIsEmpty(page))
1027  {
1028  int i;
1029 
1030  for (i = 1; i <= nline; i++)
1031  {
1032  ItemId ii = PageGetItemId(phdr, i);
1033 
1034  if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1035  ii->lp_off += size;
1036  }
1037  }
1038 }
1039 
1040 
1041 /*
1042  * PageIndexTupleOverwrite
1043  *
1044  * Replace a specified tuple on an index page.
1045  *
1046  * The new tuple is placed exactly where the old one had been, shifting
1047  * other tuples' data up or down as needed to keep the page compacted.
1048  * This is better than deleting and reinserting the tuple, because it
1049  * avoids any data shifting when the tuple size doesn't change; and
1050  * even when it does, we avoid moving the line pointers around.
1051  * This could be used by an index AM that doesn't want to unset the
1052  * LP_DEAD bit when it happens to be set. It could conceivably also be
1053  * used by an index AM that cares about the physical order of tuples as
1054  * well as their logical/ItemId order.
1055  *
1056  * If there's insufficient space for the new tuple, return false. Other
1057  * errors represent data-corruption problems, so we just elog.
1058  */
1059 bool
1061  Item newtup, Size newsize)
1062 {
1063  PageHeader phdr = (PageHeader) page;
1064  ItemId tupid;
1065  int oldsize;
1066  unsigned offset;
1067  Size alignednewsize;
1068  int size_diff;
1069  int itemcount;
1070 
1071  /*
1072  * As with PageRepairFragmentation, paranoia seems justified.
1073  */
1074  if (phdr->pd_lower < SizeOfPageHeaderData ||
1075  phdr->pd_lower > phdr->pd_upper ||
1076  phdr->pd_upper > phdr->pd_special ||
1077  phdr->pd_special > BLCKSZ ||
1078  phdr->pd_special != MAXALIGN(phdr->pd_special))
1079  ereport(ERROR,
1081  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1082  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1083 
1084  itemcount = PageGetMaxOffsetNumber(page);
1085  if ((int) offnum <= 0 || (int) offnum > itemcount)
1086  elog(ERROR, "invalid index offnum: %u", offnum);
1087 
1088  tupid = PageGetItemId(page, offnum);
1089  Assert(ItemIdHasStorage(tupid));
1090  oldsize = ItemIdGetLength(tupid);
1091  offset = ItemIdGetOffset(tupid);
1092 
1093  if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
1094  offset != MAXALIGN(offset))
1095  ereport(ERROR,
1097  errmsg("corrupted line pointer: offset = %u, size = %u",
1098  offset, (unsigned int) oldsize)));
1099 
1100  /*
1101  * Determine actual change in space requirement, check for page overflow.
1102  */
1103  oldsize = MAXALIGN(oldsize);
1104  alignednewsize = MAXALIGN(newsize);
1105  if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
1106  return false;
1107 
1108  /*
1109  * Relocate existing data and update line pointers, unless the new tuple
1110  * is the same size as the old (after alignment), in which case there's
1111  * nothing to do. Notice that what we have to relocate is data before the
1112  * target tuple, not data after, so it's convenient to express size_diff
1113  * as the amount by which the tuple's size is decreasing, making it the
1114  * delta to add to pd_upper and affected line pointers.
1115  */
1116  size_diff = oldsize - (int) alignednewsize;
1117  if (size_diff != 0)
1118  {
1119  char *addr = (char *) page + phdr->pd_upper;
1120  int i;
1121 
1122  /* relocate all tuple data before the target tuple */
1123  memmove(addr + size_diff, addr, offset - phdr->pd_upper);
1124 
1125  /* adjust free space boundary pointer */
1126  phdr->pd_upper += size_diff;
1127 
1128  /* adjust affected line pointers too */
1129  for (i = FirstOffsetNumber; i <= itemcount; i++)
1130  {
1131  ItemId ii = PageGetItemId(phdr, i);
1132 
1133  /* Allow items without storage; currently only BRIN needs that */
1134  if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1135  ii->lp_off += size_diff;
1136  }
1137  }
1138 
1139  /* Update the item's tuple length without changing its lp_flags field */
1140  tupid->lp_off = offset + size_diff;
1141  tupid->lp_len = newsize;
1142 
1143  /* Copy new tuple data onto page */
1144  memcpy(PageGetItem(page, tupid), newtup, newsize);
1145 
1146  return true;
1147 }
1148 
1149 
1150 /*
1151  * Set checksum for a page in shared buffers.
1152  *
1153  * If checksums are disabled, or if the page is not initialized, just return
1154  * the input. Otherwise, we must make a copy of the page before calculating
1155  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
1156  * from making the final checksum invalid. It doesn't matter if we include or
1157  * exclude hints during the copy, as long as we write a valid page and
1158  * associated checksum.
1159  *
1160  * Returns a pointer to the block-sized data that needs to be written. Uses
1161  * statically-allocated memory, so the caller must immediately write the
1162  * returned page and not refer to it again.
1163  */
1164 char *
1166 {
1167  static char *pageCopy = NULL;
1168 
1169  /* If we don't need a checksum, just return the passed-in data */
1170  if (PageIsNew(page) || !DataChecksumsEnabled())
1171  return (char *) page;
1172 
1173  /*
1174  * We allocate the copy space once and use it over on each subsequent
1175  * call. The point of palloc'ing here, rather than having a static char
1176  * array, is first to ensure adequate alignment for the checksumming code
1177  * and second to avoid wasting space in processes that never call this.
1178  */
1179  if (pageCopy == NULL)
1180  pageCopy = MemoryContextAlloc(TopMemoryContext, BLCKSZ);
1181 
1182  memcpy(pageCopy, (char *) page, BLCKSZ);
1183  ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
1184  return pageCopy;
1185 }
1186 
1187 /*
1188  * Set checksum for a page in private memory.
1189  *
1190  * This must only be used when we know that no other process can be modifying
1191  * the page buffer.
1192  */
1193 void
1195 {
1196  /* If we don't need a checksum, just return */
1197  if (PageIsNew(page) || !DataChecksumsEnabled())
1198  return;
1199 
1200  ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
1201 }
signed short int16
Definition: c.h:354
#define PageClearHasFreeLinePointers(page)
Definition: bufpage.h:375
void pgstat_report_checksum_failure(void)
Definition: pgstat.c:1550
#define PageIsEmpty(page)
Definition: bufpage.h:222
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:403
bool PageIsVerified(Page page, BlockNumber blkno)
Definition: bufpage.c:82
uint16 pd_flags
Definition: bufpage.h:157
#define PageSetPageSizeAndVersion(page, size, version)
Definition: bufpage.h:285
int16 itemoff
Definition: bufpage.c:419
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:43
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:719
bool DataChecksumsEnabled(void)
Definition: xlog.c:4884
#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size)
Definition: memdebug.h:23
#define PAI_OVERWRITE
Definition: bufpage.h:413
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
Pointer Item
Definition: item.h:17
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1165
int errcode(int sqlerrcode)
Definition: elog.c:610
static bool checksum_failure
#define MemSet(start, val, len)
Definition: c.h:971
uint32 BlockNumber
Definition: block.h:31
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:74
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define PANIC
Definition: elog.h:53
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
#define PG_PAGE_LAYOUT_VERSION
Definition: bufpage.h:199
uint16 alignedlen
Definition: bufpage.c:420
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:574
uint16 offsetindex
Definition: bufpage.c:418
uint16 OffsetNumber
Definition: off.h:24
uint16 pd_checksum
Definition: bufpage.h:156
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:381
unsigned short uint16
Definition: c.h:366
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ERROR
Definition: elog.h:43
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:658
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1060
#define ItemIdSetNormal(itemId, off, len)
Definition: itemid.h:140
#define FirstOffsetNumber
Definition: off.h:27
#define PageGetPageSize(page)
Definition: bufpage.h:268
struct ItemIdData ItemIdData
#define ItemIdGetOffset(itemId)
Definition: itemid.h:65
itemIdSortData * itemIdSort
Definition: bufpage.c:422
MemoryContext TopMemoryContext
Definition: mcxt.c:44
static void compactify_tuples(itemIdSort itemidbase, int nitems, Page page)
Definition: bufpage.c:437
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
LocationIndex pd_special
Definition: bufpage.h:160
#define WARNING
Definition: elog.h:40
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
Page PageGetTempPage(Page page)
Definition: bufpage.c:344
unsigned lp_off
Definition: itemid.h:27
static int itemoffcompare(const void *itemidp1, const void *itemidp2)
Definition: bufpage.c:425
uint16 LocationIndex
Definition: bufpage.h:87
unsigned lp_len
Definition: itemid.h:27
Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
Definition: bufpage.c:601
#define InvalidOffsetNumber
Definition: off.h:26
#define ereport(elevel,...)
Definition: elog.h:144
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:950
struct itemIdSortData itemIdSortData
PageHeaderData * PageHeader
Definition: bufpage.h:166
#define Assert(condition)
Definition: c.h:738
signed int Offset
Definition: c.h:485
#define PD_VALID_FLAG_BITS
Definition: bufpage.h:185
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:828
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:466
#define PageGetSpecialPointer(page)
Definition: bufpage.h:326
OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags)
Definition: bufpage.c:186
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1194
#define MAXALIGN(LEN)
Definition: c.h:691
#define PageGetSpecialSize(page)
Definition: bufpage.h:300
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:625
bool ignore_checksum_failure
Definition: bufpage.c:27
void PageRepairFragmentation(Page page)
Definition: bufpage.c:475
#define PageIsNew(page)
Definition: bufpage.h:229
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:824
Page PageGetTempPageCopy(Page page)
Definition: bufpage.c:361
#define PageSetHasFreeLinePointers(page)
Definition: bufpage.h:373
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
#define PAI_IS_HEAP
Definition: bufpage.h:414
#define elog(elevel,...)
Definition: elog.h:214
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]
Definition: bufpage.h:163
#define unlikely(x)
Definition: c.h:206
#define PageHasFreeLinePointers(page)
Definition: bufpage.h:371
#define qsort(a, b, c, d)
Definition: port.h:478
LocationIndex pd_upper
Definition: bufpage.h:159
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
uint16 pg_checksum_page(char *page, BlockNumber blkno)
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
LocationIndex pd_lower
Definition: bufpage.h:158
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42