PostgreSQL Source Code  git master
brin_pageops.c
Go to the documentation of this file.
1 /*
2  * brin_pageops.c
3  * Page-handling routines for BRIN indexes
4  *
5  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/access/brin/brin_pageops.c
10  */
11 #include "postgres.h"
12 
13 #include "access/brin_page.h"
14 #include "access/brin_pageops.h"
15 #include "access/brin_revmap.h"
16 #include "access/brin_xlog.h"
17 #include "access/xloginsert.h"
18 #include "miscadmin.h"
19 #include "storage/bufmgr.h"
20 #include "storage/freespace.h"
21 #include "storage/lmgr.h"
22 #include "storage/smgr.h"
23 #include "utils/rel.h"
24 
25 /*
26  * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page. We can tolerate
27  * a single item per page, unlike other index AMs.
28  */
29 #define BrinMaxItemSize \
30  MAXALIGN_DOWN(BLCKSZ - \
31  (MAXALIGN(SizeOfPageHeaderData + \
32  sizeof(ItemIdData)) + \
33  MAXALIGN(sizeof(BrinSpecialSpace))))
34 
35 static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
36  bool *extended);
37 static Size br_page_get_freespace(Page page);
38 static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
39 
40 
41 /*
42  * Update tuple origtup (size origsz), located in offset oldoff of buffer
43  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
44  * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit.
45  *
46  * If samepage is true, attempt to put the new tuple in the same page, but if
47  * there's no room, use some other one.
48  *
49  * If the update is successful, return true; the revmap is updated to point to
50  * the new tuple. If the update is not done for whatever reason, return false.
51  * Caller may retry the update if this happens.
52  */
53 bool
54 brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
55  BrinRevmap *revmap, BlockNumber heapBlk,
56  Buffer oldbuf, OffsetNumber oldoff,
57  const BrinTuple *origtup, Size origsz,
58  const BrinTuple *newtup, Size newsz,
59  bool samepage)
60 {
61  Page oldpage;
62  ItemId oldlp;
63  BrinTuple *oldtup;
64  Size oldsz;
65  Buffer newbuf;
67  bool extended;
68 
69  Assert(newsz == MAXALIGN(newsz));
70 
71  /* If the item is oversized, don't bother. */
72  if (newsz > BrinMaxItemSize)
73  {
74  ereport(ERROR,
75  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
76  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
77  newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
78  return false; /* keep compiler quiet */
79  }
80 
81  /* make sure the revmap is long enough to contain the entry we need */
82  brinRevmapExtend(revmap, heapBlk);
83 
84  if (!samepage)
85  {
86  /* need a page on which to put the item */
87  newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
88  if (!BufferIsValid(newbuf))
89  {
90  Assert(!extended);
91  return false;
92  }
93 
94  /*
95  * Note: it's possible (though unlikely) that the returned newbuf is
96  * the same as oldbuf, if brin_getinsertbuffer determined that the old
97  * buffer does in fact have enough space.
98  */
99  if (newbuf == oldbuf)
100  {
101  Assert(!extended);
102  newbuf = InvalidBuffer;
103  }
104  else
105  newblk = BufferGetBlockNumber(newbuf);
106  }
107  else
108  {
110  newbuf = InvalidBuffer;
111  extended = false;
112  }
113  oldpage = BufferGetPage(oldbuf);
114  oldlp = PageGetItemId(oldpage, oldoff);
115 
116  /*
117  * Check that the old tuple wasn't updated concurrently: it might have
118  * moved someplace else entirely, and for that matter the whole page
119  * might've become a revmap page. Note that in the first two cases
120  * checked here, the "oldlp" we just calculated is garbage; but
121  * PageGetItemId() is simple enough that it was safe to do that
122  * calculation anyway.
123  */
124  if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
125  oldoff > PageGetMaxOffsetNumber(oldpage) ||
126  !ItemIdIsNormal(oldlp))
127  {
129 
130  /*
131  * If this happens, and the new buffer was obtained by extending the
132  * relation, then we need to ensure we don't leave it uninitialized or
133  * forget about it.
134  */
135  if (BufferIsValid(newbuf))
136  {
137  if (extended)
138  brin_initialize_empty_new_buffer(idxrel, newbuf);
139  UnlockReleaseBuffer(newbuf);
140  if (extended)
141  FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
142  }
143  return false;
144  }
145 
146  oldsz = ItemIdGetLength(oldlp);
147  oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
148 
149  /*
150  * ... or it might have been updated in place to different contents.
151  */
152  if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
153  {
155  if (BufferIsValid(newbuf))
156  {
157  /* As above, initialize and record new page if we got one */
158  if (extended)
159  brin_initialize_empty_new_buffer(idxrel, newbuf);
160  UnlockReleaseBuffer(newbuf);
161  if (extended)
162  FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
163  }
164  return false;
165  }
166 
167  /*
168  * Great, the old tuple is intact. We can proceed with the update.
169  *
170  * If there's enough room in the old page for the new tuple, replace it.
171  *
172  * Note that there might now be enough space on the page even though the
173  * caller told us there isn't, if a concurrent update moved another tuple
174  * elsewhere or replaced a tuple with a smaller one.
175  */
176  if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
177  brin_can_do_samepage_update(oldbuf, origsz, newsz))
178  {
180  if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
181  elog(ERROR, "failed to replace BRIN tuple");
182  MarkBufferDirty(oldbuf);
183 
184  /* XLOG stuff */
185  if (RelationNeedsWAL(idxrel))
186  {
188  XLogRecPtr recptr;
190 
191  xlrec.offnum = oldoff;
192 
193  XLogBeginInsert();
194  XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
195 
197  XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
198 
199  recptr = XLogInsert(RM_BRIN_ID, info);
200 
201  PageSetLSN(oldpage, recptr);
202  }
203 
205 
207 
208  if (BufferIsValid(newbuf))
209  {
210  /* As above, initialize and record new page if we got one */
211  if (extended)
212  brin_initialize_empty_new_buffer(idxrel, newbuf);
213  UnlockReleaseBuffer(newbuf);
214  if (extended)
215  FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
216  }
217 
218  return true;
219  }
220  else if (newbuf == InvalidBuffer)
221  {
222  /*
223  * Not enough space, but caller said that there was. Tell them to
224  * start over.
225  */
227  return false;
228  }
229  else
230  {
231  /*
232  * Not enough free space on the oldpage. Put the new tuple on the new
233  * page, and update the revmap.
234  */
235  Page newpage = BufferGetPage(newbuf);
236  Buffer revmapbuf;
237  ItemPointerData newtid;
238  OffsetNumber newoff;
239  Size freespace = 0;
240 
241  revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
242 
244 
245  /*
246  * We need to initialize the page if it's newly obtained. Note we
247  * will WAL-log the initialization as part of the update, so we don't
248  * need to do that here.
249  */
250  if (extended)
252 
253  PageIndexTupleDeleteNoCompact(oldpage, oldoff);
254  newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
255  InvalidOffsetNumber, false, false);
256  if (newoff == InvalidOffsetNumber)
257  elog(ERROR, "failed to add BRIN tuple to new page");
258  MarkBufferDirty(oldbuf);
259  MarkBufferDirty(newbuf);
260 
261  /* needed to update FSM below */
262  if (extended)
263  freespace = br_page_get_freespace(newpage);
264 
265  ItemPointerSet(&newtid, newblk, newoff);
266  brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
267  MarkBufferDirty(revmapbuf);
268 
269  /* XLOG stuff */
270  if (RelationNeedsWAL(idxrel))
271  {
272  xl_brin_update xlrec;
273  XLogRecPtr recptr;
274  uint8 info;
275 
276  info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
277 
278  xlrec.insert.offnum = newoff;
279  xlrec.insert.heapBlk = heapBlk;
280  xlrec.insert.pagesPerRange = pagesPerRange;
281  xlrec.oldOffnum = oldoff;
282 
283  XLogBeginInsert();
284 
285  /* new page */
286  XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
287 
288  XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
289  XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
290 
291  /* revmap page */
292  XLogRegisterBuffer(1, revmapbuf, 0);
293 
294  /* old page */
296 
297  recptr = XLogInsert(RM_BRIN_ID, info);
298 
299  PageSetLSN(oldpage, recptr);
300  PageSetLSN(newpage, recptr);
301  PageSetLSN(BufferGetPage(revmapbuf), recptr);
302  }
303 
305 
306  LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
308  UnlockReleaseBuffer(newbuf);
309 
310  if (extended)
311  {
312  RecordPageWithFreeSpace(idxrel, newblk, freespace);
313  FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
314  }
315 
316  return true;
317  }
318 }
319 
320 /*
321  * Return whether brin_doupdate can do a samepage update.
322  */
323 bool
325 {
326  return
327  ((newsz <= origsz) ||
328  PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
329 }
330 
331 /*
332  * Insert an index tuple into the index relation. The revmap is updated to
333  * mark the range containing the given page as pointing to the inserted entry.
334  * A WAL record is written.
335  *
336  * The buffer, if valid, is first checked for free space to insert the new
337  * entry; if there isn't enough, a new buffer is obtained and pinned. No
338  * buffer lock must be held on entry, no buffer lock is held on exit.
339  *
340  * Return value is the offset number where the tuple was inserted.
341  */
343 brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
344  BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
345  BrinTuple *tup, Size itemsz)
346 {
347  Page page;
348  BlockNumber blk;
349  OffsetNumber off;
350  Size freespace = 0;
351  Buffer revmapbuf;
352  ItemPointerData tid;
353  bool extended;
354 
355  Assert(itemsz == MAXALIGN(itemsz));
356 
357  /* If the item is oversized, don't even bother. */
358  if (itemsz > BrinMaxItemSize)
359  {
360  ereport(ERROR,
361  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
362  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
363  itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
364  return InvalidOffsetNumber; /* keep compiler quiet */
365  }
366 
367  /* Make sure the revmap is long enough to contain the entry we need */
368  brinRevmapExtend(revmap, heapBlk);
369 
370  /*
371  * Acquire lock on buffer supplied by caller, if any. If it doesn't have
372  * enough space, unpin it to obtain a new one below.
373  */
374  if (BufferIsValid(*buffer))
375  {
376  /*
377  * It's possible that another backend (or ourselves!) extended the
378  * revmap over the page we held a pin on, so we cannot assume that
379  * it's still a regular page.
380  */
382  if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
383  {
384  UnlockReleaseBuffer(*buffer);
385  *buffer = InvalidBuffer;
386  }
387  }
388 
389  /*
390  * If we still don't have a usable buffer, have brin_getinsertbuffer
391  * obtain one for us.
392  */
393  if (!BufferIsValid(*buffer))
394  {
395  do
396  *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
397  while (!BufferIsValid(*buffer));
398  }
399  else
400  extended = false;
401 
402  /* Now obtain lock on revmap buffer */
403  revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
404 
405  page = BufferGetPage(*buffer);
406  blk = BufferGetBlockNumber(*buffer);
407 
408  /* Execute the actual insertion */
410  if (extended)
412  off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
413  false, false);
414  if (off == InvalidOffsetNumber)
415  elog(ERROR, "failed to add BRIN tuple to new page");
416  MarkBufferDirty(*buffer);
417 
418  /* needed to update FSM below */
419  if (extended)
420  freespace = br_page_get_freespace(page);
421 
422  ItemPointerSet(&tid, blk, off);
423  brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
424  MarkBufferDirty(revmapbuf);
425 
426  /* XLOG stuff */
427  if (RelationNeedsWAL(idxrel))
428  {
429  xl_brin_insert xlrec;
430  XLogRecPtr recptr;
431  uint8 info;
432 
433  info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
434  xlrec.heapBlk = heapBlk;
435  xlrec.pagesPerRange = pagesPerRange;
436  xlrec.offnum = off;
437 
438  XLogBeginInsert();
439  XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
440 
441  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
442  XLogRegisterBufData(0, (char *) tup, itemsz);
443 
444  XLogRegisterBuffer(1, revmapbuf, 0);
445 
446  recptr = XLogInsert(RM_BRIN_ID, info);
447 
448  PageSetLSN(page, recptr);
449  PageSetLSN(BufferGetPage(revmapbuf), recptr);
450  }
451 
453 
454  /* Tuple is firmly on buffer; we can release our locks */
455  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
456  LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
457 
458  BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
459  blk, off, heapBlk));
460 
461  if (extended)
462  {
463  RecordPageWithFreeSpace(idxrel, blk, freespace);
464  FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
465  }
466 
467  return off;
468 }
469 
470 /*
471  * Initialize a page with the given type.
472  *
473  * Caller is responsible for marking it dirty, as appropriate.
474  */
475 void
477 {
478  PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
479 
480  BrinPageType(page) = type;
481 }
482 
483 /*
484  * Initialize a new BRIN index's metapage.
485  */
486 void
487 brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
488 {
489  BrinMetaPageData *metadata;
490 
492 
493  metadata = (BrinMetaPageData *) PageGetContents(page);
494 
495  metadata->brinMagic = BRIN_META_MAGIC;
496  metadata->brinVersion = version;
497  metadata->pagesPerRange = pagesPerRange;
498 
499  /*
500  * Note we cheat here a little. 0 is not a valid revmap block number
501  * (because it's the metapage buffer), but doing this enables the first
502  * revmap page to be created when the index is.
503  */
504  metadata->lastRevmapPage = 0;
505 
506  /*
507  * Set pd_lower just past the end of the metadata. This is essential,
508  * because without doing so, metadata will be lost if xlog.c compresses
509  * the page.
510  */
511  ((PageHeader) page)->pd_lower =
512  ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
513 }
514 
515 /*
516  * Initiate page evacuation protocol.
517  *
518  * The page must be locked in exclusive mode by the caller.
519  *
520  * If the page is not yet initialized or empty, return false without doing
521  * anything; it can be used for revmap without any further changes. If it
522  * contains tuples, mark it for evacuation and return true.
523  */
524 bool
526 {
527  OffsetNumber off;
528  OffsetNumber maxoff;
529  Page page;
530 
531  page = BufferGetPage(buf);
532 
533  if (PageIsNew(page))
534  return false;
535 
536  maxoff = PageGetMaxOffsetNumber(page);
537  for (off = FirstOffsetNumber; off <= maxoff; off++)
538  {
539  ItemId lp;
540 
541  lp = PageGetItemId(page, off);
542  if (ItemIdIsUsed(lp))
543  {
544  /*
545  * Prevent other backends from adding more stuff to this page:
546  * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
547  * can no longer be used to add new tuples. Note that this flag
548  * is not WAL-logged, except accidentally.
549  */
551  MarkBufferDirtyHint(buf, true);
552 
553  return true;
554  }
555  }
556  return false;
557 }
558 
559 /*
560  * Move all tuples out of a page.
561  *
562  * The caller must hold lock on the page. The lock and pin are released.
563  */
564 void
566  BrinRevmap *revmap, Buffer buf)
567 {
568  OffsetNumber off;
569  OffsetNumber maxoff;
570  Page page;
571  BrinTuple *btup = NULL;
572  Size btupsz = 0;
573 
574  page = BufferGetPage(buf);
575 
577 
578  maxoff = PageGetMaxOffsetNumber(page);
579  for (off = FirstOffsetNumber; off <= maxoff; off++)
580  {
581  BrinTuple *tup;
582  Size sz;
583  ItemId lp;
584 
586 
587  lp = PageGetItemId(page, off);
588  if (ItemIdIsUsed(lp))
589  {
590  sz = ItemIdGetLength(lp);
591  tup = (BrinTuple *) PageGetItem(page, lp);
592  tup = brin_copy_tuple(tup, sz, btup, &btupsz);
593 
595 
596  if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
597  buf, off, tup, sz, tup, sz, false))
598  off--; /* retry */
599 
601 
602  /* It's possible that someone extended the revmap over this page */
603  if (!BRIN_IS_REGULAR_PAGE(page))
604  break;
605  }
606  }
607 
609 }
610 
611 /*
612  * Given a BRIN index page, initialize it if necessary, and record its
613  * current free space in the FSM.
614  *
615  * The main use for this is when, during vacuuming, an uninitialized page is
616  * found, which could be the result of relation extension followed by a crash
617  * before the page can be used.
618  *
619  * Here, we don't bother to update upper FSM pages, instead expecting that our
620  * caller (brin_vacuum_scan) will fix them at the end of the scan. Elsewhere
621  * in this file, it's generally a good idea to propagate additions of free
622  * space into the upper FSM pages immediately.
623  */
624 void
626 {
627  Page page = BufferGetPage(buf);
628 
629  /*
630  * If a page was left uninitialized, initialize it now; also record it in
631  * FSM.
632  *
633  * Somebody else might be extending the relation concurrently. To avoid
634  * re-initializing the page before they can grab the buffer lock, we
635  * acquire the extension lock momentarily. Since they hold the extension
636  * lock from before getting the page and after its been initialized, we're
637  * sure to see their initialization.
638  */
639  if (PageIsNew(page))
640  {
643 
645  if (PageIsNew(page))
646  {
649  return;
650  }
652  }
653 
654  /* Nothing to be done for non-regular index pages */
657  return;
658 
659  /* Measure free space and record it */
661  br_page_get_freespace(page));
662 }
663 
664 /*
665  * Return a pinned and exclusively locked buffer which can be used to insert an
666  * index item of size itemsz (caller must ensure not to request sizes
667  * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in
668  * an order determined to avoid deadlocks).
669  *
670  * If we find that the old page is no longer a regular index page (because
671  * of a revmap extension), the old buffer is unlocked and we return
672  * InvalidBuffer.
673  *
674  * If there's no existing page with enough free space to accommodate the new
675  * item, the relation is extended. If this happens, *extended is set to true,
676  * and it is the caller's responsibility to initialize the page (and WAL-log
677  * that fact) prior to use. The caller should also update the FSM with the
678  * page's remaining free space after the insertion.
679  *
680  * Note that the caller is not expected to update FSM unless *extended is set
681  * true. This policy means that we'll update FSM when a page is created, and
682  * when it's found to have too little space for a desired tuple insertion,
683  * but not every single time we add a tuple to the page.
684  *
685  * Note that in some corner cases it is possible for this routine to extend
686  * the relation and then not return the new page. It is this routine's
687  * responsibility to WAL-log the page initialization and to record the page in
688  * FSM if that happens, since the caller certainly can't do it.
689  */
690 static Buffer
692  bool *extended)
693 {
694  BlockNumber oldblk;
695  BlockNumber newblk;
696  Page page;
697  Size freespace;
698 
699  /* callers must have checked */
700  Assert(itemsz <= BrinMaxItemSize);
701 
702  if (BufferIsValid(oldbuf))
703  oldblk = BufferGetBlockNumber(oldbuf);
704  else
705  oldblk = InvalidBlockNumber;
706 
707  /* Choose initial target page, re-using existing target if known */
708  newblk = RelationGetTargetBlock(irel);
709  if (newblk == InvalidBlockNumber)
710  newblk = GetPageWithFreeSpace(irel, itemsz);
711 
712  /*
713  * Loop until we find a page with sufficient free space. By the time we
714  * return to caller out of this loop, both buffers are valid and locked;
715  * if we have to restart here, neither page is locked and newblk isn't
716  * pinned (if it's even valid).
717  */
718  for (;;)
719  {
720  Buffer buf;
721  bool extensionLockHeld = false;
722 
724 
725  *extended = false;
726 
727  if (newblk == InvalidBlockNumber)
728  {
729  /*
730  * There's not enough free space in any existing index page,
731  * according to the FSM: extend the relation to obtain a shiny new
732  * page.
733  *
734  * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
735  * which'd avoid the need to hold the extension lock during buffer
736  * reclaim.
737  */
738  if (!RELATION_IS_LOCAL(irel))
739  {
741  extensionLockHeld = true;
742  }
743  buf = ReadBuffer(irel, P_NEW);
744  newblk = BufferGetBlockNumber(buf);
745  *extended = true;
746 
747  BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
749  }
750  else if (newblk == oldblk)
751  {
752  /*
753  * There's an odd corner-case here where the FSM is out-of-date,
754  * and gave us the old page.
755  */
756  buf = oldbuf;
757  }
758  else
759  {
760  buf = ReadBuffer(irel, newblk);
761  }
762 
763  /*
764  * We lock the old buffer first, if it's earlier than the new one; but
765  * then we need to check that it hasn't been turned into a revmap page
766  * concurrently. If we detect that that happened, give up and tell
767  * caller to start over.
768  */
769  if (BufferIsValid(oldbuf) && oldblk < newblk)
770  {
772  if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
773  {
775 
776  /*
777  * It is possible that the new page was obtained from
778  * extending the relation. In that case, we must be sure to
779  * record it in the FSM before leaving, because otherwise the
780  * space would be lost forever. However, we cannot let an
781  * uninitialized page get in the FSM, so we need to initialize
782  * it first.
783  */
784  if (*extended)
786 
787  if (extensionLockHeld)
789 
791 
792  if (*extended)
793  {
794  FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
795  /* shouldn't matter, but don't confuse caller */
796  *extended = false;
797  }
798 
799  return InvalidBuffer;
800  }
801  }
802 
804 
805  if (extensionLockHeld)
807 
808  page = BufferGetPage(buf);
809 
810  /*
811  * We have a new buffer to insert into. Check that the new page has
812  * enough free space, and return it if it does; otherwise start over.
813  * (br_page_get_freespace also checks that the FSM didn't hand us a
814  * page that has since been repurposed for the revmap.)
815  */
816  freespace = *extended ?
818  if (freespace >= itemsz)
819  {
820  RelationSetTargetBlock(irel, newblk);
821 
822  /*
823  * Lock the old buffer if not locked already. Note that in this
824  * case we know for sure it's a regular page: it's later than the
825  * new page we just got, which is not a revmap page, and revmap
826  * pages are always consecutive.
827  */
828  if (BufferIsValid(oldbuf) && oldblk > newblk)
829  {
832  }
833 
834  return buf;
835  }
836 
837  /* This page is no good. */
838 
839  /*
840  * If an entirely new page does not contain enough free space for the
841  * new item, then surely that item is oversized. Complain loudly; but
842  * first make sure we initialize the page and record it as free, for
843  * next time.
844  */
845  if (*extended)
846  {
848  /* since this should not happen, skip FreeSpaceMapVacuum */
849 
850  ereport(ERROR,
851  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
852  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
853  itemsz, freespace, RelationGetRelationName(irel))));
854  return InvalidBuffer; /* keep compiler quiet */
855  }
856 
857  if (newblk != oldblk)
859  if (BufferIsValid(oldbuf) && oldblk <= newblk)
861 
862  /*
863  * Update the FSM with the new, presumably smaller, freespace value
864  * for this page, then search for a new target page.
865  */
866  newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
867  }
868 }
869 
870 /*
871  * Initialize a page as an empty regular BRIN page, WAL-log this, and record
872  * the page in FSM.
873  *
874  * There are several corner situations in which we extend the relation to
875  * obtain a new page and later find that we cannot use it immediately. When
876  * that happens, we don't want to leave the page go unrecorded in FSM, because
877  * there is no mechanism to get the space back and the index would bloat.
878  * Also, because we would not WAL-log the action that would initialize the
879  * page, the page would go uninitialized in a standby (or after recovery).
880  *
881  * While we record the page in FSM here, caller is responsible for doing FSM
882  * upper-page update if that seems appropriate.
883  */
884 static void
886 {
887  Page page;
888 
889  BRIN_elog((DEBUG2,
890  "brin_initialize_empty_new_buffer: initializing blank page %u",
891  BufferGetBlockNumber(buffer)));
892 
894  page = BufferGetPage(buffer);
896  MarkBufferDirty(buffer);
897  log_newpage_buffer(buffer, true);
899 
900  /*
901  * We update the FSM for this page, but this is not WAL-logged. This is
902  * acceptable because VACUUM will scan the index and update the FSM with
903  * pages whose FSM records were forgotten in a crash.
904  */
906  br_page_get_freespace(page));
907 }
908 
909 
910 /*
911  * Return the amount of free space on a regular BRIN index page.
912  *
913  * If the page is not a regular page, or has been marked with the
914  * BRIN_EVACUATE_PAGE flag, returns 0.
915  */
916 static Size
918 {
919  if (!BRIN_IS_REGULAR_PAGE(page) ||
920  (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
921  return 0;
922  else
923  return PageGetFreeSpace(page);
924 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
#define BRIN_elog(args)
Definition: brin_internal.h:85
#define BRIN_IS_META_PAGE(page)
Definition: brin_page.h:55
#define BrinPageFlags(page)
Definition: brin_page.h:46
#define BRIN_META_MAGIC
Definition: brin_page.h:73
#define BRIN_EVACUATE_PAGE
Definition: brin_page.h:60
#define BRIN_PAGETYPE_REGULAR
Definition: brin_page.h:53
#define BRIN_PAGETYPE_META
Definition: brin_page.h:51
#define BRIN_IS_REVMAP_PAGE(page)
Definition: brin_page.h:56
#define BrinPageType(page)
Definition: brin_page.h:42
#define BRIN_IS_REGULAR_PAGE(page)
Definition: brin_page.h:57
void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf)
Definition: brin_pageops.c:565
bool brin_start_evacuating_page(Relation idxRel, Buffer buf)
Definition: brin_pageops.c:525
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:54
static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
Definition: brin_pageops.c:885
void brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:625
#define BrinMaxItemSize
Definition: brin_pageops.c:29
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:343
void brin_page_init(Page page, uint16 type)
Definition: brin_pageops.c:476
static Size br_page_get_freespace(Page page)
Definition: brin_pageops.c:917
static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, bool *extended)
Definition: brin_pageops.c:691
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:487
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:324
void brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:113
void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid)
Definition: brin_revmap.c:156
Buffer brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:135
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:446
bool brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
Definition: brin_tuple.c:465
#define SizeOfBrinInsert
Definition: brin_xlog.h:74
#define SizeOfBrinUpdate
Definition: brin_xlog.h:95
#define XLOG_BRIN_SAMEPAGE_UPDATE
Definition: brin_xlog.h:34
#define SizeOfBrinSamepageUpdate
Definition: brin_xlog.h:107
#define XLOG_BRIN_INIT_PAGE
Definition: brin_xlog.h:43
#define XLOG_BRIN_UPDATE
Definition: brin_xlog.h:33
#define XLOG_BRIN_INSERT
Definition: brin_xlog.h:32
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3386
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4573
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4590
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2198
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4808
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4637
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:735
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
#define P_NEW
Definition: bufmgr.h:152
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:159
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:1295
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:958
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
PageHeaderData * PageHeader
Definition: bufpage.h:170
static char * PageGetContents(Page page)
Definition: bufpage.h:254
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
unsigned short uint16
Definition: c.h:494
#define unconstify(underlying_type, expr)
Definition: c.h:1255
#define MAXALIGN(LEN)
Definition: c.h:800
unsigned char uint8
Definition: c.h:493
size_t Size
Definition: c.h:594
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:150
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:182
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:133
Pointer Item
Definition: item.h:17
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
Assert(fmt[strlen(fmt) - 1] !='\n')
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:431
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:481
#define ExclusiveLock
Definition: lockdefs.h:42
#define ShareLock
Definition: lockdefs.h:40
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static char * buf
Definition: pg_test_fsync.c:73
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:649
#define RelationGetRelationName(relation)
Definition: rel.h:538
#define RelationGetTargetBlock(relation)
Definition: rel.h:602
#define RelationNeedsWAL(relation)
Definition: rel.h:629
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:609
uint32 brinVersion
Definition: brin_page.h:67
uint32 brinMagic
Definition: brin_page.h:66
BlockNumber lastRevmapPage
Definition: brin_page.h:69
BlockNumber pagesPerRange
Definition: brin_page.h:68
BlockNumber bt_blkno
Definition: brin_tuple.h:66
OffsetNumber offnum
Definition: brin_xlog.h:71
BlockNumber pagesPerRange
Definition: brin_xlog.h:68
BlockNumber heapBlk
Definition: brin_xlog.h:65
OffsetNumber offnum
Definition: brin_xlog.h:104
OffsetNumber oldOffnum
Definition: brin_xlog.h:90
xl_brin_insert insert
Definition: brin_xlog.h:92
const char * type
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:365
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:475
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:406
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1238
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33