PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
bufpage.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * bufpage.c
4  * POSTGRES standard buffer page code.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/storage/page/bufpage.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/htup_details.h"
18 #include "access/itup.h"
19 #include "access/xlog.h"
20 #include "storage/checksum.h"
21 #include "utils/memdebug.h"
22 #include "utils/memutils.h"
23 
24 
25 /* GUC variable */
27 
28 
29 /* ----------------------------------------------------------------
30  * Page support functions
31  * ----------------------------------------------------------------
32  */
33 
34 /*
35  * PageInit
36  * Initializes the contents of a page.
37  * Note that we don't calculate an initial checksum here; that's not done
38  * until it's time to write.
39  */
40 void
41 PageInit(Page page, Size pageSize, Size specialSize)
42 {
43  PageHeader p = (PageHeader) page;
44 
45  specialSize = MAXALIGN(specialSize);
46 
47  Assert(pageSize == BLCKSZ);
48  Assert(pageSize > specialSize + SizeOfPageHeaderData);
49 
50  /* Make sure all fields of page are zero, as well as unused space */
51  MemSet(p, 0, pageSize);
52 
53  p->pd_flags = 0;
55  p->pd_upper = pageSize - specialSize;
56  p->pd_special = pageSize - specialSize;
58  /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
59 }
60 
61 
62 /*
63  * PageIsVerified
64  * Check that the page header and checksum (if any) appear valid.
65  *
66  * This is called when a page has just been read in from disk. The idea is
67  * to cheaply detect trashed pages before we go nuts following bogus item
68  * pointers, testing invalid transaction identifiers, etc.
69  *
70  * It turns out to be necessary to allow zeroed pages here too. Even though
71  * this routine is *not* called when deliberately adding a page to a relation,
72  * there are scenarios in which a zeroed page might be found in a table.
73  * (Example: a backend extends a relation, then crashes before it can write
74  * any WAL entry about the new page. The kernel will already have the
75  * zeroed page in the file, and it will stay that way after restart.) So we
76  * allow zeroed pages here, and are careful that the page access macros
77  * treat such a page as empty and without free space. Eventually, VACUUM
78  * will clean up such a page and make it usable.
79  */
80 bool
82 {
83  PageHeader p = (PageHeader) page;
84  size_t *pagebytes;
85  int i;
86  bool checksum_failure = false;
87  bool header_sane = false;
88  bool all_zeroes = false;
89  uint16 checksum = 0;
90 
91  /*
92  * Don't verify page data unless the page passes basic non-zero test
93  */
94  if (!PageIsNew(page))
95  {
97  {
98  checksum = pg_checksum_page((char *) page, blkno);
99 
100  if (checksum != p->pd_checksum)
101  checksum_failure = true;
102  }
103 
104  /*
105  * The following checks don't prove the header is correct, only that
106  * it looks sane enough to allow into the buffer pool. Later usage of
107  * the block can still reveal problems, which is why we offer the
108  * checksum option.
109  */
110  if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
111  p->pd_lower <= p->pd_upper &&
112  p->pd_upper <= p->pd_special &&
113  p->pd_special <= BLCKSZ &&
114  p->pd_special == MAXALIGN(p->pd_special))
115  header_sane = true;
116 
117  if (header_sane && !checksum_failure)
118  return true;
119  }
120 
121  /*
122  * Check all-zeroes case. Luckily BLCKSZ is guaranteed to always be a
123  * multiple of size_t - and it's much faster to compare memory using the
124  * native word size.
125  */
126  StaticAssertStmt(BLCKSZ == (BLCKSZ / sizeof(size_t)) * sizeof(size_t),
127  "BLCKSZ has to be a multiple of sizeof(size_t)");
128 
129  all_zeroes = true;
130  pagebytes = (size_t *) page;
131  for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
132  {
133  if (pagebytes[i] != 0)
134  {
135  all_zeroes = false;
136  break;
137  }
138  }
139 
140  if (all_zeroes)
141  return true;
142 
143  /*
144  * Throw a WARNING if the checksum fails, but only after we've checked for
145  * the all-zeroes case.
146  */
147  if (checksum_failure)
148  {
150  (ERRCODE_DATA_CORRUPTED,
151  errmsg("page verification failed, calculated checksum %u but expected %u",
152  checksum, p->pd_checksum)));
153 
154  if (header_sane && ignore_checksum_failure)
155  return true;
156  }
157 
158  return false;
159 }
160 
161 
162 /*
163  * PageAddItemExtended
164  *
165  * Add an item to a page. Return value is the offset at which it was
166  * inserted, or InvalidOffsetNumber if the item is not inserted for any
167  * reason. A WARNING is issued indicating the reason for the refusal.
168  *
169  * offsetNumber must be either InvalidOffsetNumber to specify finding a
170  * free item pointer, or a value between FirstOffsetNumber and one past
171  * the last existing item, to specify using that particular item pointer.
172  *
173  * If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
174  * the item at the specified offsetNumber, which must be either a
175  * currently-unused item pointer, or one past the last existing item.
176  *
177  * If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
178  * the item at the specified offsetNumber, moving existing items later
179  * in the array to make room.
180  *
181  * If offsetNumber is not valid, then assign a slot by finding the first
182  * one that is both unused and deallocated.
183  *
184  * If flag PAI_IS_HEAP is set, we enforce that there can't be more than
185  * MaxHeapTuplesPerPage line pointers on the page.
186  *
187  * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
188  */
191  Item item,
192  Size size,
193  OffsetNumber offsetNumber,
194  int flags)
195 {
196  PageHeader phdr = (PageHeader) page;
197  Size alignedSize;
198  int lower;
199  int upper;
200  ItemId itemId;
201  OffsetNumber limit;
202  bool needshuffle = false;
203 
204  /*
205  * Be wary about corrupted page pointers
206  */
207  if (phdr->pd_lower < SizeOfPageHeaderData ||
208  phdr->pd_lower > phdr->pd_upper ||
209  phdr->pd_upper > phdr->pd_special ||
210  phdr->pd_special > BLCKSZ)
211  ereport(PANIC,
212  (errcode(ERRCODE_DATA_CORRUPTED),
213  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
214  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
215 
216  /*
217  * Select offsetNumber to place the new item at
218  */
220 
221  /* was offsetNumber passed in? */
222  if (OffsetNumberIsValid(offsetNumber))
223  {
224  /* yes, check it */
225  if ((flags & PAI_OVERWRITE) != 0)
226  {
227  if (offsetNumber < limit)
228  {
229  itemId = PageGetItemId(phdr, offsetNumber);
230  if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
231  {
232  elog(WARNING, "will not overwrite a used ItemId");
233  return InvalidOffsetNumber;
234  }
235  }
236  }
237  else
238  {
239  if (offsetNumber < limit)
240  needshuffle = true; /* need to move existing linp's */
241  }
242  }
243  else
244  {
245  /* offsetNumber was not passed in, so find a free slot */
246  /* if no free slot, we'll put it at limit (1st open slot) */
247  if (PageHasFreeLinePointers(phdr))
248  {
249  /*
250  * Look for "recyclable" (unused) ItemId. We check for no storage
251  * as well, just to be paranoid --- unused items should never have
252  * storage.
253  */
254  for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
255  {
256  itemId = PageGetItemId(phdr, offsetNumber);
257  if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
258  break;
259  }
260  if (offsetNumber >= limit)
261  {
262  /* the hint is wrong, so reset it */
264  }
265  }
266  else
267  {
268  /* don't bother searching if hint says there's no free slot */
269  offsetNumber = limit;
270  }
271  }
272 
273  /* Reject placing items beyond the first unused line pointer */
274  if (offsetNumber > limit)
275  {
276  elog(WARNING, "specified item offset is too large");
277  return InvalidOffsetNumber;
278  }
279 
280  /* Reject placing items beyond heap boundary, if heap */
281  if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
282  {
283  elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
284  return InvalidOffsetNumber;
285  }
286 
287  /*
288  * Compute new lower and upper pointers for page, see if it'll fit.
289  *
290  * Note: do arithmetic as signed ints, to avoid mistakes if, say,
291  * alignedSize > pd_upper.
292  */
293  if (offsetNumber == limit || needshuffle)
294  lower = phdr->pd_lower + sizeof(ItemIdData);
295  else
296  lower = phdr->pd_lower;
297 
298  alignedSize = MAXALIGN(size);
299 
300  upper = (int) phdr->pd_upper - (int) alignedSize;
301 
302  if (lower > upper)
303  return InvalidOffsetNumber;
304 
305  /*
306  * OK to insert the item. First, shuffle the existing pointers if needed.
307  */
308  itemId = PageGetItemId(phdr, offsetNumber);
309 
310  if (needshuffle)
311  memmove(itemId + 1, itemId,
312  (limit - offsetNumber) * sizeof(ItemIdData));
313 
314  /* set the item pointer */
315  ItemIdSetNormal(itemId, upper, size);
316 
317  /*
318  * Items normally contain no uninitialized bytes. Core bufpage consumers
319  * conform, but this is not a necessary coding rule; a new index AM could
320  * opt to depart from it. However, data type input functions and other
321  * C-language functions that synthesize datums should initialize all
322  * bytes; datumIsEqual() relies on this. Testing here, along with the
323  * similar check in printtup(), helps to catch such mistakes.
324  *
325  * Values of the "name" type retrieved via index-only scans may contain
326  * uninitialized bytes; see comment in btrescan(). Valgrind will report
327  * this as an error, but it is safe to ignore.
328  */
329  VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
330 
331  /* copy the item's data onto the page */
332  memcpy((char *) page + upper, item, size);
333 
334  /* adjust page header */
335  phdr->pd_lower = (LocationIndex) lower;
336  phdr->pd_upper = (LocationIndex) upper;
337 
338  return offsetNumber;
339 }
340 
341 
342 /*
343  * PageGetTempPage
344  * Get a temporary page in local memory for special processing.
345  * The returned page is not initialized at all; caller must do that.
346  */
347 Page
349 {
350  Size pageSize;
351  Page temp;
352 
353  pageSize = PageGetPageSize(page);
354  temp = (Page) palloc(pageSize);
355 
356  return temp;
357 }
358 
359 /*
360  * PageGetTempPageCopy
361  * Get a temporary page in local memory for special processing.
362  * The page is initialized by copying the contents of the given page.
363  */
364 Page
366 {
367  Size pageSize;
368  Page temp;
369 
370  pageSize = PageGetPageSize(page);
371  temp = (Page) palloc(pageSize);
372 
373  memcpy(temp, page, pageSize);
374 
375  return temp;
376 }
377 
378 /*
379  * PageGetTempPageCopySpecial
380  * Get a temporary page in local memory for special processing.
381  * The page is PageInit'd with the same special-space size as the
382  * given page, and the special space is copied from the given page.
383  */
384 Page
386 {
387  Size pageSize;
388  Page temp;
389 
390  pageSize = PageGetPageSize(page);
391  temp = (Page) palloc(pageSize);
392 
393  PageInit(temp, pageSize, PageGetSpecialSize(page));
394  memcpy(PageGetSpecialPointer(temp),
395  PageGetSpecialPointer(page),
396  PageGetSpecialSize(page));
397 
398  return temp;
399 }
400 
401 /*
402  * PageRestoreTempPage
403  * Copy temporary page back to permanent page after special processing
404  * and release the temporary page.
405  */
406 void
407 PageRestoreTempPage(Page tempPage, Page oldPage)
408 {
409  Size pageSize;
410 
411  pageSize = PageGetPageSize(tempPage);
412  memcpy((char *) oldPage, (char *) tempPage, pageSize);
413 
414  pfree(tempPage);
415 }
416 
417 /*
418  * sorting support for PageRepairFragmentation and PageIndexMultiDelete
419  */
420 typedef struct itemIdSortData
421 {
422  uint16 offsetindex; /* linp array index */
423  int16 itemoff; /* page offset of item data */
424  uint16 alignedlen; /* MAXALIGN(item data len) */
427 
428 static int
429 itemoffcompare(const void *itemidp1, const void *itemidp2)
430 {
431  /* Sort in decreasing itemoff order */
432  return ((itemIdSort) itemidp2)->itemoff -
433  ((itemIdSort) itemidp1)->itemoff;
434 }
435 
436 /*
437  * After removing or marking some line pointers unused, move the tuples to
438  * remove the gaps caused by the removed items.
439  */
440 static void
441 compactify_tuples(itemIdSort itemidbase, int nitems, Page page)
442 {
443  PageHeader phdr = (PageHeader) page;
444  Offset upper;
445  int i;
446 
447  /* sort itemIdSortData array into decreasing itemoff order */
448  qsort((char *) itemidbase, nitems, sizeof(itemIdSortData),
450 
451  upper = phdr->pd_special;
452  for (i = 0; i < nitems; i++)
453  {
454  itemIdSort itemidptr = &itemidbase[i];
455  ItemId lp;
456 
457  lp = PageGetItemId(page, itemidptr->offsetindex + 1);
458  upper -= itemidptr->alignedlen;
459  memmove((char *) page + upper,
460  (char *) page + itemidptr->itemoff,
461  itemidptr->alignedlen);
462  lp->lp_off = upper;
463  }
464 
465  phdr->pd_upper = upper;
466 }
467 
468 /*
469  * PageRepairFragmentation
470  *
471  * Frees fragmented space on a page.
472  * It doesn't remove unused line pointers! Please don't change this.
473  *
474  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
475  *
476  * As a side effect, the page's PD_HAS_FREE_LINES hint bit is updated.
477  */
478 void
480 {
481  Offset pd_lower = ((PageHeader) page)->pd_lower;
482  Offset pd_upper = ((PageHeader) page)->pd_upper;
483  Offset pd_special = ((PageHeader) page)->pd_special;
484  ItemId lp;
485  int nline,
486  nstorage,
487  nunused;
488  int i;
489  Size totallen;
490 
491  /*
492  * It's worth the trouble to be more paranoid here than in most places,
493  * because we are about to reshuffle data in (what is usually) a shared
494  * disk buffer. If we aren't careful then corrupted pointers, lengths,
495  * etc could cause us to clobber adjacent disk buffers, spreading the data
496  * loss further. So, check everything.
497  */
498  if (pd_lower < SizeOfPageHeaderData ||
499  pd_lower > pd_upper ||
500  pd_upper > pd_special ||
501  pd_special > BLCKSZ ||
502  pd_special != MAXALIGN(pd_special))
503  ereport(ERROR,
504  (errcode(ERRCODE_DATA_CORRUPTED),
505  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
506  pd_lower, pd_upper, pd_special)));
507 
508  nline = PageGetMaxOffsetNumber(page);
509  nunused = nstorage = 0;
510  for (i = FirstOffsetNumber; i <= nline; i++)
511  {
512  lp = PageGetItemId(page, i);
513  if (ItemIdIsUsed(lp))
514  {
515  if (ItemIdHasStorage(lp))
516  nstorage++;
517  }
518  else
519  {
520  /* Unused entries should have lp_len = 0, but make sure */
521  ItemIdSetUnused(lp);
522  nunused++;
523  }
524  }
525 
526  if (nstorage == 0)
527  {
528  /* Page is completely empty, so just reset it quickly */
529  ((PageHeader) page)->pd_upper = pd_special;
530  }
531  else
532  {
533  /* Need to compact the page the hard way */
535  itemIdSort itemidptr = itemidbase;
536 
537  totallen = 0;
538  for (i = 0; i < nline; i++)
539  {
540  lp = PageGetItemId(page, i + 1);
541  if (ItemIdHasStorage(lp))
542  {
543  itemidptr->offsetindex = i;
544  itemidptr->itemoff = ItemIdGetOffset(lp);
545  if (itemidptr->itemoff < (int) pd_upper ||
546  itemidptr->itemoff >= (int) pd_special)
547  ereport(ERROR,
548  (errcode(ERRCODE_DATA_CORRUPTED),
549  errmsg("corrupted item pointer: %u",
550  itemidptr->itemoff)));
551  itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
552  totallen += itemidptr->alignedlen;
553  itemidptr++;
554  }
555  }
556 
557  if (totallen > (Size) (pd_special - pd_lower))
558  ereport(ERROR,
559  (errcode(ERRCODE_DATA_CORRUPTED),
560  errmsg("corrupted item lengths: total %u, available space %u",
561  (unsigned int) totallen, pd_special - pd_lower)));
562 
563  compactify_tuples(itemidbase, nstorage, page);
564  }
565 
566  /* Set hint bit for PageAddItem */
567  if (nunused > 0)
569  else
571 }
572 
573 /*
574  * PageGetFreeSpace
575  * Returns the size of the free (allocatable) space on a page,
576  * reduced by the space needed for a new line pointer.
577  *
578  * Note: this should usually only be used on index pages. Use
579  * PageGetHeapFreeSpace on heap pages.
580  */
581 Size
583 {
584  int space;
585 
586  /*
587  * Use signed arithmetic here so that we behave sensibly if pd_lower >
588  * pd_upper.
589  */
590  space = (int) ((PageHeader) page)->pd_upper -
591  (int) ((PageHeader) page)->pd_lower;
592 
593  if (space < (int) sizeof(ItemIdData))
594  return 0;
595  space -= sizeof(ItemIdData);
596 
597  return (Size) space;
598 }
599 
600 /*
601  * PageGetFreeSpaceForMultipleTuples
602  * Returns the size of the free (allocatable) space on a page,
603  * reduced by the space needed for multiple new line pointers.
604  *
605  * Note: this should usually only be used on index pages. Use
606  * PageGetHeapFreeSpace on heap pages.
607  */
608 Size
610 {
611  int space;
612 
613  /*
614  * Use signed arithmetic here so that we behave sensibly if pd_lower >
615  * pd_upper.
616  */
617  space = (int) ((PageHeader) page)->pd_upper -
618  (int) ((PageHeader) page)->pd_lower;
619 
620  if (space < (int) (ntups * sizeof(ItemIdData)))
621  return 0;
622  space -= ntups * sizeof(ItemIdData);
623 
624  return (Size) space;
625 }
626 
627 /*
628  * PageGetExactFreeSpace
629  * Returns the size of the free (allocatable) space on a page,
630  * without any consideration for adding/removing line pointers.
631  */
632 Size
634 {
635  int space;
636 
637  /*
638  * Use signed arithmetic here so that we behave sensibly if pd_lower >
639  * pd_upper.
640  */
641  space = (int) ((PageHeader) page)->pd_upper -
642  (int) ((PageHeader) page)->pd_lower;
643 
644  if (space < 0)
645  return 0;
646 
647  return (Size) space;
648 }
649 
650 
651 /*
652  * PageGetHeapFreeSpace
653  * Returns the size of the free (allocatable) space on a page,
654  * reduced by the space needed for a new line pointer.
655  *
656  * The difference between this and PageGetFreeSpace is that this will return
657  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
658  * and none are free. We use this to enforce that no more than
659  * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
660  * no more tuples than that could fit anyway, in the presence of redirected
661  * or dead line pointers it'd be possible to have too many line pointers.
662  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
663  * on the number of line pointers, we make this extra check.)
664  */
665 Size
667 {
668  Size space;
669 
670  space = PageGetFreeSpace(page);
671  if (space > 0)
672  {
673  OffsetNumber offnum,
674  nline;
675 
676  /*
677  * Are there already MaxHeapTuplesPerPage line pointers in the page?
678  */
679  nline = PageGetMaxOffsetNumber(page);
680  if (nline >= MaxHeapTuplesPerPage)
681  {
683  {
684  /*
685  * Since this is just a hint, we must confirm that there is
686  * indeed a free line pointer
687  */
688  for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
689  {
690  ItemId lp = PageGetItemId(page, offnum);
691 
692  if (!ItemIdIsUsed(lp))
693  break;
694  }
695 
696  if (offnum > nline)
697  {
698  /*
699  * The hint is wrong, but we can't clear it here since we
700  * don't have the ability to mark the page dirty.
701  */
702  space = 0;
703  }
704  }
705  else
706  {
707  /*
708  * Although the hint might be wrong, PageAddItem will believe
709  * it anyway, so we must believe it too.
710  */
711  space = 0;
712  }
713  }
714  }
715  return space;
716 }
717 
718 
719 /*
720  * PageIndexTupleDelete
721  *
722  * This routine does the work of removing a tuple from an index page.
723  *
724  * Unlike heap pages, we compact out the line pointer for the removed tuple.
725  */
726 void
728 {
729  PageHeader phdr = (PageHeader) page;
730  char *addr;
731  ItemId tup;
732  Size size;
733  unsigned offset;
734  int nbytes;
735  int offidx;
736  int nline;
737 
738  /*
739  * As with PageRepairFragmentation, paranoia seems justified.
740  */
741  if (phdr->pd_lower < SizeOfPageHeaderData ||
742  phdr->pd_lower > phdr->pd_upper ||
743  phdr->pd_upper > phdr->pd_special ||
744  phdr->pd_special > BLCKSZ ||
745  phdr->pd_special != MAXALIGN(phdr->pd_special))
746  ereport(ERROR,
747  (errcode(ERRCODE_DATA_CORRUPTED),
748  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
749  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
750 
751  nline = PageGetMaxOffsetNumber(page);
752  if ((int) offnum <= 0 || (int) offnum > nline)
753  elog(ERROR, "invalid index offnum: %u", offnum);
754 
755  /* change offset number to offset index */
756  offidx = offnum - 1;
757 
758  tup = PageGetItemId(page, offnum);
759  Assert(ItemIdHasStorage(tup));
760  size = ItemIdGetLength(tup);
761  offset = ItemIdGetOffset(tup);
762 
763  if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
764  offset != MAXALIGN(offset))
765  ereport(ERROR,
766  (errcode(ERRCODE_DATA_CORRUPTED),
767  errmsg("corrupted item pointer: offset = %u, size = %u",
768  offset, (unsigned int) size)));
769 
770  /* Amount of space to actually be deleted */
771  size = MAXALIGN(size);
772 
773  /*
774  * First, we want to get rid of the pd_linp entry for the index tuple. We
775  * copy all subsequent linp's back one slot in the array. We don't use
776  * PageGetItemId, because we are manipulating the _array_, not individual
777  * linp's.
778  */
779  nbytes = phdr->pd_lower -
780  ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
781 
782  if (nbytes > 0)
783  memmove((char *) &(phdr->pd_linp[offidx]),
784  (char *) &(phdr->pd_linp[offidx + 1]),
785  nbytes);
786 
787  /*
788  * Now move everything between the old upper bound (beginning of tuple
789  * space) and the beginning of the deleted tuple forward, so that space in
790  * the middle of the page is left free. If we've just deleted the tuple
791  * at the beginning of tuple space, then there's no need to do the copy.
792  */
793 
794  /* beginning of tuple space */
795  addr = (char *) page + phdr->pd_upper;
796 
797  if (offset > phdr->pd_upper)
798  memmove(addr + size, addr, offset - phdr->pd_upper);
799 
800  /* adjust free space boundary pointers */
801  phdr->pd_upper += size;
802  phdr->pd_lower -= sizeof(ItemIdData);
803 
804  /*
805  * Finally, we need to adjust the linp entries that remain.
806  *
807  * Anything that used to be before the deleted tuple's data was moved
808  * forward by the size of the deleted tuple.
809  */
810  if (!PageIsEmpty(page))
811  {
812  int i;
813 
814  nline--; /* there's one less than when we started */
815  for (i = 1; i <= nline; i++)
816  {
817  ItemId ii = PageGetItemId(phdr, i);
818 
820  if (ItemIdGetOffset(ii) <= offset)
821  ii->lp_off += size;
822  }
823  }
824 }
825 
826 
827 /*
828  * PageIndexMultiDelete
829  *
830  * This routine handles the case of deleting multiple tuples from an
831  * index page at once. It is considerably faster than a loop around
832  * PageIndexTupleDelete ... however, the caller *must* supply the array
833  * of item numbers to be deleted in item number order!
834  */
835 void
836 PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
837 {
838  PageHeader phdr = (PageHeader) page;
839  Offset pd_lower = phdr->pd_lower;
840  Offset pd_upper = phdr->pd_upper;
841  Offset pd_special = phdr->pd_special;
843  ItemIdData newitemids[MaxIndexTuplesPerPage];
844  itemIdSort itemidptr;
845  ItemId lp;
846  int nline,
847  nused;
848  Size totallen;
849  Size size;
850  unsigned offset;
851  int nextitm;
852  OffsetNumber offnum;
853 
854  Assert(nitems <= MaxIndexTuplesPerPage);
855 
856  /*
857  * If there aren't very many items to delete, then retail
858  * PageIndexTupleDelete is the best way. Delete the items in reverse
859  * order so we don't have to think about adjusting item numbers for
860  * previous deletions.
861  *
862  * TODO: tune the magic number here
863  */
864  if (nitems <= 2)
865  {
866  while (--nitems >= 0)
867  PageIndexTupleDelete(page, itemnos[nitems]);
868  return;
869  }
870 
871  /*
872  * As with PageRepairFragmentation, paranoia seems justified.
873  */
874  if (pd_lower < SizeOfPageHeaderData ||
875  pd_lower > pd_upper ||
876  pd_upper > pd_special ||
877  pd_special > BLCKSZ ||
878  pd_special != MAXALIGN(pd_special))
879  ereport(ERROR,
880  (errcode(ERRCODE_DATA_CORRUPTED),
881  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
882  pd_lower, pd_upper, pd_special)));
883 
884  /*
885  * Scan the item pointer array and build a list of just the ones we are
886  * going to keep. Notice we do not modify the page yet, since we are
887  * still validity-checking.
888  */
889  nline = PageGetMaxOffsetNumber(page);
890  itemidptr = itemidbase;
891  totallen = 0;
892  nused = 0;
893  nextitm = 0;
894  for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
895  {
896  lp = PageGetItemId(page, offnum);
898  size = ItemIdGetLength(lp);
899  offset = ItemIdGetOffset(lp);
900  if (offset < pd_upper ||
901  (offset + size) > pd_special ||
902  offset != MAXALIGN(offset))
903  ereport(ERROR,
904  (errcode(ERRCODE_DATA_CORRUPTED),
905  errmsg("corrupted item pointer: offset = %u, size = %u",
906  offset, (unsigned int) size)));
907 
908  if (nextitm < nitems && offnum == itemnos[nextitm])
909  {
910  /* skip item to be deleted */
911  nextitm++;
912  }
913  else
914  {
915  itemidptr->offsetindex = nused; /* where it will go */
916  itemidptr->itemoff = offset;
917  itemidptr->alignedlen = MAXALIGN(size);
918  totallen += itemidptr->alignedlen;
919  newitemids[nused] = *lp;
920  itemidptr++;
921  nused++;
922  }
923  }
924 
925  /* this will catch invalid or out-of-order itemnos[] */
926  if (nextitm != nitems)
927  elog(ERROR, "incorrect index offsets supplied");
928 
929  if (totallen > (Size) (pd_special - pd_lower))
930  ereport(ERROR,
931  (errcode(ERRCODE_DATA_CORRUPTED),
932  errmsg("corrupted item lengths: total %u, available space %u",
933  (unsigned int) totallen, pd_special - pd_lower)));
934 
935  /*
936  * Looks good. Overwrite the line pointers with the copy, from which we've
937  * removed all the unused items.
938  */
939  memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
940  phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
941 
942  /* and compactify the tuple data */
943  compactify_tuples(itemidbase, nused, page);
944 }
945 
946 
947 /*
948  * PageIndexTupleDeleteNoCompact
949  *
950  * Remove the specified tuple from an index page, but set its line pointer
951  * to "unused" instead of compacting it out, except that it can be removed
952  * if it's the last line pointer on the page.
953  *
954  * This is used for index AMs that require that existing TIDs of live tuples
955  * remain unchanged, and are willing to allow unused line pointers instead.
956  */
957 void
959 {
960  PageHeader phdr = (PageHeader) page;
961  char *addr;
962  ItemId tup;
963  Size size;
964  unsigned offset;
965  int nline;
966 
967  /*
968  * As with PageRepairFragmentation, paranoia seems justified.
969  */
970  if (phdr->pd_lower < SizeOfPageHeaderData ||
971  phdr->pd_lower > phdr->pd_upper ||
972  phdr->pd_upper > phdr->pd_special ||
973  phdr->pd_special > BLCKSZ ||
974  phdr->pd_special != MAXALIGN(phdr->pd_special))
975  ereport(ERROR,
976  (errcode(ERRCODE_DATA_CORRUPTED),
977  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
978  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
979 
980  nline = PageGetMaxOffsetNumber(page);
981  if ((int) offnum <= 0 || (int) offnum > nline)
982  elog(ERROR, "invalid index offnum: %u", offnum);
983 
984  tup = PageGetItemId(page, offnum);
985  Assert(ItemIdHasStorage(tup));
986  size = ItemIdGetLength(tup);
987  offset = ItemIdGetOffset(tup);
988 
989  if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
990  offset != MAXALIGN(offset))
991  ereport(ERROR,
992  (errcode(ERRCODE_DATA_CORRUPTED),
993  errmsg("corrupted item pointer: offset = %u, size = %u",
994  offset, (unsigned int) size)));
995 
996  /* Amount of space to actually be deleted */
997  size = MAXALIGN(size);
998 
999  /*
1000  * Either set the item pointer to "unused", or zap it if it's the last
1001  * one. (Note: it's possible that the next-to-last one(s) are already
1002  * unused, but we do not trouble to try to compact them out if so.)
1003  */
1004  if ((int) offnum < nline)
1005  ItemIdSetUnused(tup);
1006  else
1007  {
1008  phdr->pd_lower -= sizeof(ItemIdData);
1009  nline--; /* there's one less than when we started */
1010  }
1011 
1012  /*
1013  * Now move everything between the old upper bound (beginning of tuple
1014  * space) and the beginning of the deleted tuple forward, so that space in
1015  * the middle of the page is left free. If we've just deleted the tuple
1016  * at the beginning of tuple space, then there's no need to do the copy.
1017  */
1018 
1019  /* beginning of tuple space */
1020  addr = (char *) page + phdr->pd_upper;
1021 
1022  if (offset > phdr->pd_upper)
1023  memmove(addr + size, addr, offset - phdr->pd_upper);
1024 
1025  /* adjust free space boundary pointer */
1026  phdr->pd_upper += size;
1027 
1028  /*
1029  * Finally, we need to adjust the linp entries that remain.
1030  *
1031  * Anything that used to be before the deleted tuple's data was moved
1032  * forward by the size of the deleted tuple.
1033  */
1034  if (!PageIsEmpty(page))
1035  {
1036  int i;
1037 
1038  for (i = 1; i <= nline; i++)
1039  {
1040  ItemId ii = PageGetItemId(phdr, i);
1041 
1042  if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1043  ii->lp_off += size;
1044  }
1045  }
1046 }
1047 
1048 
1049 /*
1050  * PageIndexTupleOverwrite
1051  *
1052  * Replace a specified tuple on an index page.
1053  *
1054  * The new tuple is placed exactly where the old one had been, shifting
1055  * other tuples' data up or down as needed to keep the page compacted.
1056  * This is better than deleting and reinserting the tuple, because it
1057  * avoids any data shifting when the tuple size doesn't change; and
1058  * even when it does, we avoid moving the item pointers around.
1059  * Conceivably this could also be of use to an index AM that cares about
1060  * the physical order of tuples as well as their ItemId order.
1061  *
1062  * If there's insufficient space for the new tuple, return false. Other
1063  * errors represent data-corruption problems, so we just elog.
1064  */
1065 bool
1067  Item newtup, Size newsize)
1068 {
1069  PageHeader phdr = (PageHeader) page;
1070  ItemId tupid;
1071  int oldsize;
1072  unsigned offset;
1073  Size alignednewsize;
1074  int size_diff;
1075  int itemcount;
1076 
1077  /*
1078  * As with PageRepairFragmentation, paranoia seems justified.
1079  */
1080  if (phdr->pd_lower < SizeOfPageHeaderData ||
1081  phdr->pd_lower > phdr->pd_upper ||
1082  phdr->pd_upper > phdr->pd_special ||
1083  phdr->pd_special > BLCKSZ ||
1084  phdr->pd_special != MAXALIGN(phdr->pd_special))
1085  ereport(ERROR,
1086  (errcode(ERRCODE_DATA_CORRUPTED),
1087  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1088  phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1089 
1090  itemcount = PageGetMaxOffsetNumber(page);
1091  if ((int) offnum <= 0 || (int) offnum > itemcount)
1092  elog(ERROR, "invalid index offnum: %u", offnum);
1093 
1094  tupid = PageGetItemId(page, offnum);
1095  Assert(ItemIdHasStorage(tupid));
1096  oldsize = ItemIdGetLength(tupid);
1097  offset = ItemIdGetOffset(tupid);
1098 
1099  if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
1100  offset != MAXALIGN(offset))
1101  ereport(ERROR,
1102  (errcode(ERRCODE_DATA_CORRUPTED),
1103  errmsg("corrupted item pointer: offset = %u, size = %u",
1104  offset, (unsigned int) oldsize)));
1105 
1106  /*
1107  * Determine actual change in space requirement, check for page overflow.
1108  */
1109  oldsize = MAXALIGN(oldsize);
1110  alignednewsize = MAXALIGN(newsize);
1111  if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
1112  return false;
1113 
1114  /*
1115  * Relocate existing data and update line pointers, unless the new tuple
1116  * is the same size as the old (after alignment), in which case there's
1117  * nothing to do. Notice that what we have to relocate is data before the
1118  * target tuple, not data after, so it's convenient to express size_diff
1119  * as the amount by which the tuple's size is decreasing, making it the
1120  * delta to add to pd_upper and affected line pointers.
1121  */
1122  size_diff = oldsize - (int) alignednewsize;
1123  if (size_diff != 0)
1124  {
1125  char *addr = (char *) page + phdr->pd_upper;
1126  int i;
1127 
1128  /* relocate all tuple data before the target tuple */
1129  memmove(addr + size_diff, addr, offset - phdr->pd_upper);
1130 
1131  /* adjust free space boundary pointer */
1132  phdr->pd_upper += size_diff;
1133 
1134  /* adjust affected line pointers too */
1135  for (i = FirstOffsetNumber; i <= itemcount; i++)
1136  {
1137  ItemId ii = PageGetItemId(phdr, i);
1138 
1139  /* Allow items without storage; currently only BRIN needs that */
1140  if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1141  ii->lp_off += size_diff;
1142  }
1143  }
1144 
1145  /* Update the item's tuple length (other fields shouldn't change) */
1146  ItemIdSetNormal(tupid, offset + size_diff, newsize);
1147 
1148  /* Copy new tuple data onto page */
1149  memcpy(PageGetItem(page, tupid), newtup, newsize);
1150 
1151  return true;
1152 }
1153 
1154 
1155 /*
1156  * Set checksum for a page in shared buffers.
1157  *
1158  * If checksums are disabled, or if the page is not initialized, just return
1159  * the input. Otherwise, we must make a copy of the page before calculating
1160  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
1161  * from making the final checksum invalid. It doesn't matter if we include or
1162  * exclude hints during the copy, as long as we write a valid page and
1163  * associated checksum.
1164  *
1165  * Returns a pointer to the block-sized data that needs to be written. Uses
1166  * statically-allocated memory, so the caller must immediately write the
1167  * returned page and not refer to it again.
1168  */
1169 char *
1171 {
1172  static char *pageCopy = NULL;
1173 
1174  /* If we don't need a checksum, just return the passed-in data */
1175  if (PageIsNew(page) || !DataChecksumsEnabled())
1176  return (char *) page;
1177 
1178  /*
1179  * We allocate the copy space once and use it over on each subsequent
1180  * call. The point of palloc'ing here, rather than having a static char
1181  * array, is first to ensure adequate alignment for the checksumming code
1182  * and second to avoid wasting space in processes that never call this.
1183  */
1184  if (pageCopy == NULL)
1185  pageCopy = MemoryContextAlloc(TopMemoryContext, BLCKSZ);
1186 
1187  memcpy(pageCopy, (char *) page, BLCKSZ);
1188  ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
1189  return pageCopy;
1190 }
1191 
1192 /*
1193  * Set checksum for a page in private memory.
1194  *
1195  * This must only be used when we know that no other process can be modifying
1196  * the page buffer.
1197  */
1198 void
1200 {
1201  /* If we don't need a checksum, just return */
1202  if (PageIsNew(page) || !DataChecksumsEnabled())
1203  return;
1204 
1205  ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
1206 }
signed short int16
Definition: c.h:255
#define PageClearHasFreeLinePointers(page)
Definition: bufpage.h:372
#define PageIsEmpty(page)
Definition: bufpage.h:219
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:407
bool PageIsVerified(Page page, BlockNumber blkno)
Definition: bufpage.c:81
uint16 pd_flags
Definition: bufpage.h:153
#define PageSetPageSizeAndVersion(page, size, version)
Definition: bufpage.h:282
int16 itemoff
Definition: bufpage.c:423
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:43
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:727
bool DataChecksumsEnabled(void)
Definition: xlog.c:4707
#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size)
Definition: memdebug.h:23
#define PAI_OVERWRITE
Definition: bufpage.h:410
#define ItemIdIsUsed(itemId)
Definition: itemid.h:91
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
Pointer Item
Definition: item.h:17
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1170
int errcode(int sqlerrcode)
Definition: elog.c:575
#define MemSet(start, val, len)
Definition: c.h:857
uint32 BlockNumber
Definition: block.h:31
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:74
#define SizeOfPageHeaderData
Definition: bufpage.h:213
#define PANIC
Definition: elog.h:53
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
#define PG_PAGE_LAYOUT_VERSION
Definition: bufpage.h:196
uint16 alignedlen
Definition: bufpage.c:424
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:582
uint16 offsetindex
Definition: bufpage.c:422
uint16 OffsetNumber
Definition: off.h:24
uint16 pd_checksum
Definition: bufpage.h:152
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:385
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:757
unsigned short uint16
Definition: c.h:267
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
void pfree(void *pointer)
Definition: mcxt.c:950
#define ERROR
Definition: elog.h:43
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:666
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1066
#define ItemIdSetNormal(itemId, off, len)
Definition: itemid.h:139
#define memmove(d, s, c)
Definition: c.h:1058
#define FirstOffsetNumber
Definition: off.h:27
#define PageGetPageSize(page)
Definition: bufpage.h:265
struct ItemIdData ItemIdData
#define ItemIdGetOffset(itemId)
Definition: itemid.h:64
itemIdSortData * itemIdSort
Definition: bufpage.c:426
#define ereport(elevel, rest)
Definition: elog.h:122
MemoryContext TopMemoryContext
Definition: mcxt.c:43
static void compactify_tuples(itemIdSort itemidbase, int nitems, Page page)
Definition: bufpage.c:441
LocationIndex pd_special
Definition: bufpage.h:156
#define WARNING
Definition: elog.h:40
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
Page PageGetTempPage(Page page)
Definition: bufpage.c:348
unsigned lp_off
Definition: itemid.h:26
static int itemoffcompare(const void *itemidp1, const void *itemidp2)
Definition: bufpage.c:429
uint16 LocationIndex
Definition: bufpage.h:83
Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
Definition: bufpage.c:609
#define InvalidOffsetNumber
Definition: off.h:26
#define ItemIdHasStorage(itemId)
Definition: itemid.h:119
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:958
struct itemIdSortData itemIdSortData
PageHeaderData * PageHeader
Definition: bufpage.h:162
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
signed int Offset
Definition: c.h:375
#define PD_VALID_FLAG_BITS
Definition: bufpage.h:182
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:836
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:356
#define PageGetSpecialPointer(page)
Definition: bufpage.h:323
OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags)
Definition: bufpage.c:190
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1199
#define MAXALIGN(LEN)
Definition: c.h:588
#define PageGetSpecialSize(page)
Definition: bufpage.h:297
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:633
bool ignore_checksum_failure
Definition: bufpage.c:26
void PageRepairFragmentation(Page page)
Definition: bufpage.c:479
#define PageIsNew(page)
Definition: bufpage.h:226
#define MaxIndexTuplesPerPage
Definition: itup.h:137
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
Page PageGetTempPageCopy(Page page)
Definition: bufpage.c:365
#define PageSetHasFreeLinePointers(page)
Definition: bufpage.h:370
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:707
#define PAI_IS_HEAP
Definition: bufpage.h:411
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:40
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]
Definition: bufpage.h:159
#define PageHasFreeLinePointers(page)
Definition: bufpage.h:368
#define elog
Definition: elog.h:219
#define qsort(a, b, c, d)
Definition: port.h:440
LocationIndex pd_upper
Definition: bufpage.h:155
#define ItemIdSetUnused(itemId)
Definition: itemid.h:127
uint16 pg_checksum_page(char *page, BlockNumber blkno)
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
LocationIndex pd_lower
Definition: bufpage.h:154
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:41