PostgreSQL Source Code  git master
brin_pageops.c
Go to the documentation of this file.
1 /*
2  * brin_pageops.c
3  * Page-handling routines for BRIN indexes
4  *
5  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/access/brin/brin_pageops.c
10  */
11 #include "postgres.h"
12 
13 #include "access/brin_page.h"
14 #include "access/brin_pageops.h"
15 #include "access/brin_revmap.h"
16 #include "access/brin_xlog.h"
17 #include "access/xloginsert.h"
18 #include "miscadmin.h"
19 #include "storage/bufmgr.h"
20 #include "storage/freespace.h"
21 #include "storage/lmgr.h"
22 #include "utils/rel.h"
23 
24 /*
25  * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page. We can tolerate
26  * a single item per page, unlike other index AMs.
27  */
28 #define BrinMaxItemSize \
29  MAXALIGN_DOWN(BLCKSZ - \
30  (MAXALIGN(SizeOfPageHeaderData + \
31  sizeof(ItemIdData)) + \
32  MAXALIGN(sizeof(BrinSpecialSpace))))
33 
34 static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
35  bool *extended);
36 static Size br_page_get_freespace(Page page);
37 static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
38 
39 
40 /*
41  * Update tuple origtup (size origsz), located in offset oldoff of buffer
42  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
43  * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit.
44  *
45  * If samepage is true, attempt to put the new tuple in the same page, but if
46  * there's no room, use some other one.
47  *
48  * If the update is successful, return true; the revmap is updated to point to
49  * the new tuple. If the update is not done for whatever reason, return false.
50  * Caller may retry the update if this happens.
51  */
52 bool
53 brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
54  BrinRevmap *revmap, BlockNumber heapBlk,
55  Buffer oldbuf, OffsetNumber oldoff,
56  const BrinTuple *origtup, Size origsz,
57  const BrinTuple *newtup, Size newsz,
58  bool samepage)
59 {
60  Page oldpage;
61  ItemId oldlp;
62  BrinTuple *oldtup;
63  Size oldsz;
64  Buffer newbuf;
66  bool extended;
67 
68  Assert(newsz == MAXALIGN(newsz));
69 
70  /* If the item is oversized, don't bother. */
71  if (newsz > BrinMaxItemSize)
72  {
73  ereport(ERROR,
74  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
75  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
76  newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
77  return false; /* keep compiler quiet */
78  }
79 
80  /* make sure the revmap is long enough to contain the entry we need */
81  brinRevmapExtend(revmap, heapBlk);
82 
83  if (!samepage)
84  {
85  /* need a page on which to put the item */
86  newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
87  if (!BufferIsValid(newbuf))
88  {
89  Assert(!extended);
90  return false;
91  }
92 
93  /*
94  * Note: it's possible (though unlikely) that the returned newbuf is
95  * the same as oldbuf, if brin_getinsertbuffer determined that the old
96  * buffer does in fact have enough space.
97  */
98  if (newbuf == oldbuf)
99  {
100  Assert(!extended);
101  newbuf = InvalidBuffer;
102  }
103  else
104  newblk = BufferGetBlockNumber(newbuf);
105  }
106  else
107  {
109  newbuf = InvalidBuffer;
110  extended = false;
111  }
112  oldpage = BufferGetPage(oldbuf);
113  oldlp = PageGetItemId(oldpage, oldoff);
114 
115  /*
116  * Check that the old tuple wasn't updated concurrently: it might have
117  * moved someplace else entirely, and for that matter the whole page
118  * might've become a revmap page. Note that in the first two cases
119  * checked here, the "oldlp" we just calculated is garbage; but
120  * PageGetItemId() is simple enough that it was safe to do that
121  * calculation anyway.
122  */
123  if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
124  oldoff > PageGetMaxOffsetNumber(oldpage) ||
125  !ItemIdIsNormal(oldlp))
126  {
128 
129  /*
130  * If this happens, and the new buffer was obtained by extending the
131  * relation, then we need to ensure we don't leave it uninitialized or
132  * forget about it.
133  */
134  if (BufferIsValid(newbuf))
135  {
136  if (extended)
137  brin_initialize_empty_new_buffer(idxrel, newbuf);
138  UnlockReleaseBuffer(newbuf);
139  if (extended)
140  FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
141  }
142  return false;
143  }
144 
145  oldsz = ItemIdGetLength(oldlp);
146  oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
147 
148  /*
149  * ... or it might have been updated in place to different contents.
150  */
151  if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
152  {
154  if (BufferIsValid(newbuf))
155  {
156  /* As above, initialize and record new page if we got one */
157  if (extended)
158  brin_initialize_empty_new_buffer(idxrel, newbuf);
159  UnlockReleaseBuffer(newbuf);
160  if (extended)
161  FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
162  }
163  return false;
164  }
165 
166  /*
167  * Great, the old tuple is intact. We can proceed with the update.
168  *
169  * If there's enough room in the old page for the new tuple, replace it.
170  *
171  * Note that there might now be enough space on the page even though the
172  * caller told us there isn't, if a concurrent update moved another tuple
173  * elsewhere or replaced a tuple with a smaller one.
174  */
175  if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
176  brin_can_do_samepage_update(oldbuf, origsz, newsz))
177  {
179  if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
180  elog(ERROR, "failed to replace BRIN tuple");
181  MarkBufferDirty(oldbuf);
182 
183  /* XLOG stuff */
184  if (RelationNeedsWAL(idxrel))
185  {
187  XLogRecPtr recptr;
189 
190  xlrec.offnum = oldoff;
191 
192  XLogBeginInsert();
193  XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
194 
196  XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
197 
198  recptr = XLogInsert(RM_BRIN_ID, info);
199 
200  PageSetLSN(oldpage, recptr);
201  }
202 
204 
206 
207  if (BufferIsValid(newbuf))
208  {
209  /* As above, initialize and record new page if we got one */
210  if (extended)
211  brin_initialize_empty_new_buffer(idxrel, newbuf);
212  UnlockReleaseBuffer(newbuf);
213  if (extended)
214  FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
215  }
216 
217  return true;
218  }
219  else if (newbuf == InvalidBuffer)
220  {
221  /*
222  * Not enough space, but caller said that there was. Tell them to
223  * start over.
224  */
226  return false;
227  }
228  else
229  {
230  /*
231  * Not enough free space on the oldpage. Put the new tuple on the new
232  * page, and update the revmap.
233  */
234  Page newpage = BufferGetPage(newbuf);
235  Buffer revmapbuf;
236  ItemPointerData newtid;
237  OffsetNumber newoff;
238  Size freespace = 0;
239 
240  revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
241 
243 
244  /*
245  * We need to initialize the page if it's newly obtained. Note we
246  * will WAL-log the initialization as part of the update, so we don't
247  * need to do that here.
248  */
249  if (extended)
251 
252  PageIndexTupleDeleteNoCompact(oldpage, oldoff);
253  newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
254  InvalidOffsetNumber, false, false);
255  if (newoff == InvalidOffsetNumber)
256  elog(ERROR, "failed to add BRIN tuple to new page");
257  MarkBufferDirty(oldbuf);
258  MarkBufferDirty(newbuf);
259 
260  /* needed to update FSM below */
261  if (extended)
262  freespace = br_page_get_freespace(newpage);
263 
264  ItemPointerSet(&newtid, newblk, newoff);
265  brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
266  MarkBufferDirty(revmapbuf);
267 
268  /* XLOG stuff */
269  if (RelationNeedsWAL(idxrel))
270  {
271  xl_brin_update xlrec;
272  XLogRecPtr recptr;
273  uint8 info;
274 
275  info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
276 
277  xlrec.insert.offnum = newoff;
278  xlrec.insert.heapBlk = heapBlk;
279  xlrec.insert.pagesPerRange = pagesPerRange;
280  xlrec.oldOffnum = oldoff;
281 
282  XLogBeginInsert();
283 
284  /* new page */
285  XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
286 
287  XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
288  XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
289 
290  /* revmap page */
291  XLogRegisterBuffer(1, revmapbuf, 0);
292 
293  /* old page */
295 
296  recptr = XLogInsert(RM_BRIN_ID, info);
297 
298  PageSetLSN(oldpage, recptr);
299  PageSetLSN(newpage, recptr);
300  PageSetLSN(BufferGetPage(revmapbuf), recptr);
301  }
302 
304 
305  LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
307  UnlockReleaseBuffer(newbuf);
308 
309  if (extended)
310  {
311  RecordPageWithFreeSpace(idxrel, newblk, freespace);
312  FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
313  }
314 
315  return true;
316  }
317 }
318 
319 /*
320  * Return whether brin_doupdate can do a samepage update.
321  */
322 bool
324 {
325  return
326  ((newsz <= origsz) ||
327  PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
328 }
329 
330 /*
331  * Insert an index tuple into the index relation. The revmap is updated to
332  * mark the range containing the given page as pointing to the inserted entry.
333  * A WAL record is written.
334  *
335  * The buffer, if valid, is first checked for free space to insert the new
336  * entry; if there isn't enough, a new buffer is obtained and pinned. No
337  * buffer lock must be held on entry, no buffer lock is held on exit.
338  *
339  * Return value is the offset number where the tuple was inserted.
340  */
342 brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
343  BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
344  BrinTuple *tup, Size itemsz)
345 {
346  Page page;
347  BlockNumber blk;
348  OffsetNumber off;
349  Size freespace = 0;
350  Buffer revmapbuf;
351  ItemPointerData tid;
352  bool extended;
353 
354  Assert(itemsz == MAXALIGN(itemsz));
355 
356  /* If the item is oversized, don't even bother. */
357  if (itemsz > BrinMaxItemSize)
358  {
359  ereport(ERROR,
360  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
361  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
362  itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
363  return InvalidOffsetNumber; /* keep compiler quiet */
364  }
365 
366  /* Make sure the revmap is long enough to contain the entry we need */
367  brinRevmapExtend(revmap, heapBlk);
368 
369  /*
370  * Acquire lock on buffer supplied by caller, if any. If it doesn't have
371  * enough space, unpin it to obtain a new one below.
372  */
373  if (BufferIsValid(*buffer))
374  {
375  /*
376  * It's possible that another backend (or ourselves!) extended the
377  * revmap over the page we held a pin on, so we cannot assume that
378  * it's still a regular page.
379  */
381  if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
382  {
383  UnlockReleaseBuffer(*buffer);
384  *buffer = InvalidBuffer;
385  }
386  }
387 
388  /*
389  * If we still don't have a usable buffer, have brin_getinsertbuffer
390  * obtain one for us.
391  */
392  if (!BufferIsValid(*buffer))
393  {
394  do
395  *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
396  while (!BufferIsValid(*buffer));
397  }
398  else
399  extended = false;
400 
401  /* Now obtain lock on revmap buffer */
402  revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
403 
404  page = BufferGetPage(*buffer);
405  blk = BufferGetBlockNumber(*buffer);
406 
407  /* Execute the actual insertion */
409  if (extended)
411  off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
412  false, false);
413  if (off == InvalidOffsetNumber)
414  elog(ERROR, "failed to add BRIN tuple to new page");
415  MarkBufferDirty(*buffer);
416 
417  /* needed to update FSM below */
418  if (extended)
419  freespace = br_page_get_freespace(page);
420 
421  ItemPointerSet(&tid, blk, off);
422  brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
423  MarkBufferDirty(revmapbuf);
424 
425  /* XLOG stuff */
426  if (RelationNeedsWAL(idxrel))
427  {
428  xl_brin_insert xlrec;
429  XLogRecPtr recptr;
430  uint8 info;
431 
432  info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
433  xlrec.heapBlk = heapBlk;
434  xlrec.pagesPerRange = pagesPerRange;
435  xlrec.offnum = off;
436 
437  XLogBeginInsert();
438  XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
439 
440  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
441  XLogRegisterBufData(0, (char *) tup, itemsz);
442 
443  XLogRegisterBuffer(1, revmapbuf, 0);
444 
445  recptr = XLogInsert(RM_BRIN_ID, info);
446 
447  PageSetLSN(page, recptr);
448  PageSetLSN(BufferGetPage(revmapbuf), recptr);
449  }
450 
452 
453  /* Tuple is firmly on buffer; we can release our locks */
454  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
455  LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
456 
457  BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
458  blk, off, heapBlk));
459 
460  if (extended)
461  {
462  RecordPageWithFreeSpace(idxrel, blk, freespace);
463  FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
464  }
465 
466  return off;
467 }
468 
469 /*
470  * Initialize a page with the given type.
471  *
472  * Caller is responsible for marking it dirty, as appropriate.
473  */
474 void
476 {
477  PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
478 
479  BrinPageType(page) = type;
480 }
481 
482 /*
483  * Initialize a new BRIN index's metapage.
484  */
485 void
486 brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
487 {
488  BrinMetaPageData *metadata;
489 
491 
492  metadata = (BrinMetaPageData *) PageGetContents(page);
493 
494  metadata->brinMagic = BRIN_META_MAGIC;
495  metadata->brinVersion = version;
496  metadata->pagesPerRange = pagesPerRange;
497 
498  /*
499  * Note we cheat here a little. 0 is not a valid revmap block number
500  * (because it's the metapage buffer), but doing this enables the first
501  * revmap page to be created when the index is.
502  */
503  metadata->lastRevmapPage = 0;
504 
505  /*
506  * Set pd_lower just past the end of the metadata. This is essential,
507  * because without doing so, metadata will be lost if xlog.c compresses
508  * the page.
509  */
510  ((PageHeader) page)->pd_lower =
511  ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
512 }
513 
514 /*
515  * Initiate page evacuation protocol.
516  *
517  * The page must be locked in exclusive mode by the caller.
518  *
519  * If the page is not yet initialized or empty, return false without doing
520  * anything; it can be used for revmap without any further changes. If it
521  * contains tuples, mark it for evacuation and return true.
522  */
523 bool
525 {
526  OffsetNumber off;
527  OffsetNumber maxoff;
528  Page page;
529 
530  page = BufferGetPage(buf);
531 
532  if (PageIsNew(page))
533  return false;
534 
535  maxoff = PageGetMaxOffsetNumber(page);
536  for (off = FirstOffsetNumber; off <= maxoff; off++)
537  {
538  ItemId lp;
539 
540  lp = PageGetItemId(page, off);
541  if (ItemIdIsUsed(lp))
542  {
543  /*
544  * Prevent other backends from adding more stuff to this page:
545  * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
546  * can no longer be used to add new tuples. Note that this flag
547  * is not WAL-logged, except accidentally.
548  */
550  MarkBufferDirtyHint(buf, true);
551 
552  return true;
553  }
554  }
555  return false;
556 }
557 
558 /*
559  * Move all tuples out of a page.
560  *
561  * The caller must hold lock on the page. The lock and pin are released.
562  */
563 void
565  BrinRevmap *revmap, Buffer buf)
566 {
567  OffsetNumber off;
568  OffsetNumber maxoff;
569  Page page;
570  BrinTuple *btup = NULL;
571  Size btupsz = 0;
572 
573  page = BufferGetPage(buf);
574 
576 
577  maxoff = PageGetMaxOffsetNumber(page);
578  for (off = FirstOffsetNumber; off <= maxoff; off++)
579  {
580  BrinTuple *tup;
581  Size sz;
582  ItemId lp;
583 
585 
586  lp = PageGetItemId(page, off);
587  if (ItemIdIsUsed(lp))
588  {
589  sz = ItemIdGetLength(lp);
590  tup = (BrinTuple *) PageGetItem(page, lp);
591  tup = brin_copy_tuple(tup, sz, btup, &btupsz);
592 
594 
595  if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
596  buf, off, tup, sz, tup, sz, false))
597  off--; /* retry */
598 
600 
601  /* It's possible that someone extended the revmap over this page */
602  if (!BRIN_IS_REGULAR_PAGE(page))
603  break;
604  }
605  }
606 
608 }
609 
610 /*
611  * Given a BRIN index page, initialize it if necessary, and record its
612  * current free space in the FSM.
613  *
614  * The main use for this is when, during vacuuming, an uninitialized page is
615  * found, which could be the result of relation extension followed by a crash
616  * before the page can be used.
617  *
618  * Here, we don't bother to update upper FSM pages, instead expecting that our
619  * caller (brin_vacuum_scan) will fix them at the end of the scan. Elsewhere
620  * in this file, it's generally a good idea to propagate additions of free
621  * space into the upper FSM pages immediately.
622  */
623 void
625 {
626  Page page = BufferGetPage(buf);
627 
628  /*
629  * If a page was left uninitialized, initialize it now; also record it in
630  * FSM.
631  *
632  * Somebody else might be extending the relation concurrently. To avoid
633  * re-initializing the page before they can grab the buffer lock, we
634  * acquire the extension lock momentarily. Since they hold the extension
635  * lock from before getting the page and after its been initialized, we're
636  * sure to see their initialization.
637  */
638  if (PageIsNew(page))
639  {
642 
644  if (PageIsNew(page))
645  {
648  return;
649  }
651  }
652 
653  /* Nothing to be done for non-regular index pages */
656  return;
657 
658  /* Measure free space and record it */
660  br_page_get_freespace(page));
661 }
662 
663 /*
664  * Return a pinned and exclusively locked buffer which can be used to insert an
665  * index item of size itemsz (caller must ensure not to request sizes
666  * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in
667  * an order determined to avoid deadlocks).
668  *
669  * If we find that the old page is no longer a regular index page (because
670  * of a revmap extension), the old buffer is unlocked and we return
671  * InvalidBuffer.
672  *
673  * If there's no existing page with enough free space to accommodate the new
674  * item, the relation is extended. If this happens, *extended is set to true,
675  * and it is the caller's responsibility to initialize the page (and WAL-log
676  * that fact) prior to use. The caller should also update the FSM with the
677  * page's remaining free space after the insertion.
678  *
679  * Note that the caller is not expected to update FSM unless *extended is set
680  * true. This policy means that we'll update FSM when a page is created, and
681  * when it's found to have too little space for a desired tuple insertion,
682  * but not every single time we add a tuple to the page.
683  *
684  * Note that in some corner cases it is possible for this routine to extend
685  * the relation and then not return the new page. It is this routine's
686  * responsibility to WAL-log the page initialization and to record the page in
687  * FSM if that happens, since the caller certainly can't do it.
688  */
689 static Buffer
691  bool *extended)
692 {
693  BlockNumber oldblk;
694  BlockNumber newblk;
695  Page page;
696  Size freespace;
697 
698  /* callers must have checked */
699  Assert(itemsz <= BrinMaxItemSize);
700 
701  if (BufferIsValid(oldbuf))
702  oldblk = BufferGetBlockNumber(oldbuf);
703  else
704  oldblk = InvalidBlockNumber;
705 
706  /* Choose initial target page, re-using existing target if known */
707  newblk = RelationGetTargetBlock(irel);
708  if (newblk == InvalidBlockNumber)
709  newblk = GetPageWithFreeSpace(irel, itemsz);
710 
711  /*
712  * Loop until we find a page with sufficient free space. By the time we
713  * return to caller out of this loop, both buffers are valid and locked;
714  * if we have to restart here, neither page is locked and newblk isn't
715  * pinned (if it's even valid).
716  */
717  for (;;)
718  {
719  Buffer buf;
720  bool extensionLockHeld = false;
721 
723 
724  *extended = false;
725 
726  if (newblk == InvalidBlockNumber)
727  {
728  /*
729  * There's not enough free space in any existing index page,
730  * according to the FSM: extend the relation to obtain a shiny new
731  * page.
732  *
733  * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
734  * which'd avoid the need to hold the extension lock during buffer
735  * reclaim.
736  */
737  if (!RELATION_IS_LOCAL(irel))
738  {
740  extensionLockHeld = true;
741  }
742  buf = ReadBuffer(irel, P_NEW);
743  newblk = BufferGetBlockNumber(buf);
744  *extended = true;
745 
746  BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
748  }
749  else if (newblk == oldblk)
750  {
751  /*
752  * There's an odd corner-case here where the FSM is out-of-date,
753  * and gave us the old page.
754  */
755  buf = oldbuf;
756  }
757  else
758  {
759  buf = ReadBuffer(irel, newblk);
760  }
761 
762  /*
763  * We lock the old buffer first, if it's earlier than the new one; but
764  * then we need to check that it hasn't been turned into a revmap page
765  * concurrently. If we detect that that happened, give up and tell
766  * caller to start over.
767  */
768  if (BufferIsValid(oldbuf) && oldblk < newblk)
769  {
771  if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
772  {
774 
775  /*
776  * It is possible that the new page was obtained from
777  * extending the relation. In that case, we must be sure to
778  * record it in the FSM before leaving, because otherwise the
779  * space would be lost forever. However, we cannot let an
780  * uninitialized page get in the FSM, so we need to initialize
781  * it first.
782  */
783  if (*extended)
785 
786  if (extensionLockHeld)
788 
790 
791  if (*extended)
792  {
793  FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
794  /* shouldn't matter, but don't confuse caller */
795  *extended = false;
796  }
797 
798  return InvalidBuffer;
799  }
800  }
801 
803 
804  if (extensionLockHeld)
806 
807  page = BufferGetPage(buf);
808 
809  /*
810  * We have a new buffer to insert into. Check that the new page has
811  * enough free space, and return it if it does; otherwise start over.
812  * (br_page_get_freespace also checks that the FSM didn't hand us a
813  * page that has since been repurposed for the revmap.)
814  */
815  freespace = *extended ?
817  if (freespace >= itemsz)
818  {
819  RelationSetTargetBlock(irel, newblk);
820 
821  /*
822  * Lock the old buffer if not locked already. Note that in this
823  * case we know for sure it's a regular page: it's later than the
824  * new page we just got, which is not a revmap page, and revmap
825  * pages are always consecutive.
826  */
827  if (BufferIsValid(oldbuf) && oldblk > newblk)
828  {
831  }
832 
833  return buf;
834  }
835 
836  /* This page is no good. */
837 
838  /*
839  * If an entirely new page does not contain enough free space for the
840  * new item, then surely that item is oversized. Complain loudly; but
841  * first make sure we initialize the page and record it as free, for
842  * next time.
843  */
844  if (*extended)
845  {
847  /* since this should not happen, skip FreeSpaceMapVacuum */
848 
849  ereport(ERROR,
850  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
851  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
852  itemsz, freespace, RelationGetRelationName(irel))));
853  return InvalidBuffer; /* keep compiler quiet */
854  }
855 
856  if (newblk != oldblk)
858  if (BufferIsValid(oldbuf) && oldblk <= newblk)
860 
861  /*
862  * Update the FSM with the new, presumably smaller, freespace value
863  * for this page, then search for a new target page.
864  */
865  newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
866  }
867 }
868 
869 /*
870  * Initialize a page as an empty regular BRIN page, WAL-log this, and record
871  * the page in FSM.
872  *
873  * There are several corner situations in which we extend the relation to
874  * obtain a new page and later find that we cannot use it immediately. When
875  * that happens, we don't want to leave the page go unrecorded in FSM, because
876  * there is no mechanism to get the space back and the index would bloat.
877  * Also, because we would not WAL-log the action that would initialize the
878  * page, the page would go uninitialized in a standby (or after recovery).
879  *
880  * While we record the page in FSM here, caller is responsible for doing FSM
881  * upper-page update if that seems appropriate.
882  */
883 static void
885 {
886  Page page;
887 
888  BRIN_elog((DEBUG2,
889  "brin_initialize_empty_new_buffer: initializing blank page %u",
890  BufferGetBlockNumber(buffer)));
891 
893  page = BufferGetPage(buffer);
895  MarkBufferDirty(buffer);
896  log_newpage_buffer(buffer, true);
898 
899  /*
900  * We update the FSM for this page, but this is not WAL-logged. This is
901  * acceptable because VACUUM will scan the index and update the FSM with
902  * pages whose FSM records were forgotten in a crash.
903  */
905  br_page_get_freespace(page));
906 }
907 
908 
909 /*
910  * Return the amount of free space on a regular BRIN index page.
911  *
912  * If the page is not a regular page, or has been marked with the
913  * BRIN_EVACUATE_PAGE flag, returns 0.
914  */
915 static Size
917 {
918  if (!BRIN_IS_REGULAR_PAGE(page) ||
919  (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
920  return 0;
921  else
922  return PageGetFreeSpace(page);
923 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
#define BRIN_elog(args)
Definition: brin_internal.h:85
#define BRIN_IS_META_PAGE(page)
Definition: brin_page.h:55
#define BrinPageFlags(page)
Definition: brin_page.h:46
#define BRIN_META_MAGIC
Definition: brin_page.h:73
#define BRIN_EVACUATE_PAGE
Definition: brin_page.h:60
#define BRIN_PAGETYPE_REGULAR
Definition: brin_page.h:53
#define BRIN_PAGETYPE_META
Definition: brin_page.h:51
#define BRIN_IS_REVMAP_PAGE(page)
Definition: brin_page.h:56
#define BrinPageType(page)
Definition: brin_page.h:42
#define BRIN_IS_REGULAR_PAGE(page)
Definition: brin_page.h:57
void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf)
Definition: brin_pageops.c:564
bool brin_start_evacuating_page(Relation idxRel, Buffer buf)
Definition: brin_pageops.c:524
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:53
static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
Definition: brin_pageops.c:884
void brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:624
#define BrinMaxItemSize
Definition: brin_pageops.c:28
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:342
void brin_page_init(Page page, uint16 type)
Definition: brin_pageops.c:475
static Size br_page_get_freespace(Page page)
Definition: brin_pageops.c:916
static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, bool *extended)
Definition: brin_pageops.c:690
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:486
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:323
void brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:112
void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid)
Definition: brin_revmap.c:155
Buffer brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:134
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:446
bool brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
Definition: brin_tuple.c:465
#define SizeOfBrinInsert
Definition: brin_xlog.h:74
#define SizeOfBrinUpdate
Definition: brin_xlog.h:95
#define XLOG_BRIN_SAMEPAGE_UPDATE
Definition: brin_xlog.h:34
#define SizeOfBrinSamepageUpdate
Definition: brin_xlog.h:107
#define XLOG_BRIN_INIT_PAGE
Definition: brin_xlog.h:43
#define XLOG_BRIN_UPDATE
Definition: brin_xlog.h:33
#define XLOG_BRIN_INSERT
Definition: brin_xlog.h:32
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3667
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4850
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4867
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2474
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5085
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4914
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:193
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:194
#define P_NEW
Definition: bufmgr.h:188
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:404
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:195
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:355
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:1295
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:958
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
PageHeaderData * PageHeader
Definition: bufpage.h:170
static char * PageGetContents(Page page)
Definition: bufpage.h:254
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
unsigned short uint16
Definition: c.h:505
#define unconstify(underlying_type, expr)
Definition: c.h:1245
#define MAXALIGN(LEN)
Definition: c.h:811
#define Assert(condition)
Definition: c.h:858
unsigned char uint8
Definition: c.h:504
size_t Size
Definition: c.h:605
int errcode(int sqlerrcode)
Definition: elog.c:857
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:154
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:194
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:137
Pointer Item
Definition: item.h:17
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:430
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:480
#define ExclusiveLock
Definition: lockdefs.h:42
#define ShareLock
Definition: lockdefs.h:40
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static char * buf
Definition: pg_test_fsync.c:73
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationGetTargetBlock(relation)
Definition: rel.h:601
#define RelationNeedsWAL(relation)
Definition: rel.h:628
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:608
uint32 brinVersion
Definition: brin_page.h:67
uint32 brinMagic
Definition: brin_page.h:66
BlockNumber lastRevmapPage
Definition: brin_page.h:69
BlockNumber pagesPerRange
Definition: brin_page.h:68
BlockNumber bt_blkno
Definition: brin_tuple.h:66
OffsetNumber offnum
Definition: brin_xlog.h:71
BlockNumber pagesPerRange
Definition: brin_xlog.h:68
BlockNumber heapBlk
Definition: brin_xlog.h:65
OffsetNumber offnum
Definition: brin_xlog.h:104
OffsetNumber oldOffnum
Definition: brin_xlog.h:90
xl_brin_insert insert
Definition: brin_xlog.h:92
const char * type
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:405
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33