PostgreSQL Source Code git master
brin_pageops.c
Go to the documentation of this file.
1/*
2 * brin_pageops.c
3 * Page-handling routines for BRIN indexes
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * IDENTIFICATION
9 * src/backend/access/brin/brin_pageops.c
10 */
11#include "postgres.h"
12
13#include "access/brin_page.h"
14#include "access/brin_pageops.h"
15#include "access/brin_revmap.h"
16#include "access/brin_xlog.h"
17#include "access/xloginsert.h"
18#include "miscadmin.h"
19#include "storage/bufmgr.h"
20#include "storage/freespace.h"
21#include "storage/lmgr.h"
22#include "utils/rel.h"
23
24/*
25 * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page. We can tolerate
26 * a single item per page, unlike other index AMs.
27 */
28#define BrinMaxItemSize \
29 MAXALIGN_DOWN(BLCKSZ - \
30 (MAXALIGN(SizeOfPageHeaderData + \
31 sizeof(ItemIdData)) + \
32 MAXALIGN(sizeof(BrinSpecialSpace))))
33
34static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
35 bool *extended);
37static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
38
39
40/*
41 * Update tuple origtup (size origsz), located in offset oldoff of buffer
42 * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
43 * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit.
44 *
45 * If samepage is true, attempt to put the new tuple in the same page, but if
46 * there's no room, use some other one.
47 *
48 * If the update is successful, return true; the revmap is updated to point to
49 * the new tuple. If the update is not done for whatever reason, return false.
50 * Caller may retry the update if this happens.
51 */
52bool
53brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
54 BrinRevmap *revmap, BlockNumber heapBlk,
55 Buffer oldbuf, OffsetNumber oldoff,
56 const BrinTuple *origtup, Size origsz,
57 const BrinTuple *newtup, Size newsz,
58 bool samepage)
59{
60 Page oldpage;
61 ItemId oldlp;
62 BrinTuple *oldtup;
63 Size oldsz;
64 Buffer newbuf;
66 bool extended;
67
68 Assert(newsz == MAXALIGN(newsz));
69
70 /* If the item is oversized, don't bother. */
71 if (newsz > BrinMaxItemSize)
72 {
74 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
75 errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
77 return false; /* keep compiler quiet */
78 }
79
80 /* make sure the revmap is long enough to contain the entry we need */
81 brinRevmapExtend(revmap, heapBlk);
82
83 if (!samepage)
84 {
85 /* need a page on which to put the item */
86 newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
87 if (!BufferIsValid(newbuf))
88 {
89 Assert(!extended);
90 return false;
91 }
92
93 /*
94 * Note: it's possible (though unlikely) that the returned newbuf is
95 * the same as oldbuf, if brin_getinsertbuffer determined that the old
96 * buffer does in fact have enough space.
97 */
98 if (newbuf == oldbuf)
99 {
100 Assert(!extended);
101 newbuf = InvalidBuffer;
102 }
103 else
104 newblk = BufferGetBlockNumber(newbuf);
105 }
106 else
107 {
109 newbuf = InvalidBuffer;
110 extended = false;
111 }
112 oldpage = BufferGetPage(oldbuf);
113 oldlp = PageGetItemId(oldpage, oldoff);
114
115 /*
116 * Check that the old tuple wasn't updated concurrently: it might have
117 * moved someplace else entirely, and for that matter the whole page
118 * might've become a revmap page. Note that in the first two cases
119 * checked here, the "oldlp" we just calculated is garbage; but
120 * PageGetItemId() is simple enough that it was safe to do that
121 * calculation anyway.
122 */
123 if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
124 oldoff > PageGetMaxOffsetNumber(oldpage) ||
125 !ItemIdIsNormal(oldlp))
126 {
128
129 /*
130 * If this happens, and the new buffer was obtained by extending the
131 * relation, then we need to ensure we don't leave it uninitialized or
132 * forget about it.
133 */
134 if (BufferIsValid(newbuf))
135 {
136 if (extended)
137 brin_initialize_empty_new_buffer(idxrel, newbuf);
138 UnlockReleaseBuffer(newbuf);
139 if (extended)
140 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
141 }
142 return false;
143 }
144
145 oldsz = ItemIdGetLength(oldlp);
146 oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
147
148 /*
149 * ... or it might have been updated in place to different contents.
150 */
151 if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
152 {
154 if (BufferIsValid(newbuf))
155 {
156 /* As above, initialize and record new page if we got one */
157 if (extended)
158 brin_initialize_empty_new_buffer(idxrel, newbuf);
159 UnlockReleaseBuffer(newbuf);
160 if (extended)
161 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
162 }
163 return false;
164 }
165
166 /*
167 * Great, the old tuple is intact. We can proceed with the update.
168 *
169 * If there's enough room in the old page for the new tuple, replace it.
170 *
171 * Note that there might now be enough space on the page even though the
172 * caller told us there isn't, if a concurrent update moved another tuple
173 * elsewhere or replaced a tuple with a smaller one.
174 */
175 if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
176 brin_can_do_samepage_update(oldbuf, origsz, newsz))
177 {
179 if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
180 elog(ERROR, "failed to replace BRIN tuple");
181 MarkBufferDirty(oldbuf);
182
183 /* XLOG stuff */
184 if (RelationNeedsWAL(idxrel))
185 {
187 XLogRecPtr recptr;
189
190 xlrec.offnum = oldoff;
191
194
196 XLogRegisterBufData(0, (const char *) newtup, newsz);
197
198 recptr = XLogInsert(RM_BRIN_ID, info);
199
200 PageSetLSN(oldpage, recptr);
201 }
202
204
206
207 if (BufferIsValid(newbuf))
208 {
209 /* As above, initialize and record new page if we got one */
210 if (extended)
211 brin_initialize_empty_new_buffer(idxrel, newbuf);
212 UnlockReleaseBuffer(newbuf);
213 if (extended)
214 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
215 }
216
217 return true;
218 }
219 else if (newbuf == InvalidBuffer)
220 {
221 /*
222 * Not enough space, but caller said that there was. Tell them to
223 * start over.
224 */
226 return false;
227 }
228 else
229 {
230 /*
231 * Not enough free space on the oldpage. Put the new tuple on the new
232 * page, and update the revmap.
233 */
234 Page newpage = BufferGetPage(newbuf);
235 Buffer revmapbuf;
236 ItemPointerData newtid;
237 OffsetNumber newoff;
238 Size freespace = 0;
239
240 revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
241
243
244 /*
245 * We need to initialize the page if it's newly obtained. Note we
246 * will WAL-log the initialization as part of the update, so we don't
247 * need to do that here.
248 */
249 if (extended)
251
252 PageIndexTupleDeleteNoCompact(oldpage, oldoff);
253 newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
254 InvalidOffsetNumber, false, false);
255 if (newoff == InvalidOffsetNumber)
256 elog(ERROR, "failed to add BRIN tuple to new page");
257 MarkBufferDirty(oldbuf);
258 MarkBufferDirty(newbuf);
259
260 /* needed to update FSM below */
261 if (extended)
262 freespace = br_page_get_freespace(newpage);
263
264 ItemPointerSet(&newtid, newblk, newoff);
265 brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
266 MarkBufferDirty(revmapbuf);
267
268 /* XLOG stuff */
269 if (RelationNeedsWAL(idxrel))
270 {
271 xl_brin_update xlrec;
272 XLogRecPtr recptr;
273 uint8 info;
274
275 info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
276
277 xlrec.insert.offnum = newoff;
278 xlrec.insert.heapBlk = heapBlk;
279 xlrec.insert.pagesPerRange = pagesPerRange;
280 xlrec.oldOffnum = oldoff;
281
283
284 /* new page */
285 XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
286
287 XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
288 XLogRegisterBufData(0, (const char *) newtup, newsz);
289
290 /* revmap page */
291 XLogRegisterBuffer(1, revmapbuf, 0);
292
293 /* old page */
295
296 recptr = XLogInsert(RM_BRIN_ID, info);
297
298 PageSetLSN(oldpage, recptr);
299 PageSetLSN(newpage, recptr);
300 PageSetLSN(BufferGetPage(revmapbuf), recptr);
301 }
302
304
305 LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
307 UnlockReleaseBuffer(newbuf);
308
309 if (extended)
310 {
311 RecordPageWithFreeSpace(idxrel, newblk, freespace);
312 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
313 }
314
315 return true;
316 }
317}
318
319/*
320 * Return whether brin_doupdate can do a samepage update.
321 */
322bool
324{
325 return
326 ((newsz <= origsz) ||
327 PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
328}
329
330/*
331 * Insert an index tuple into the index relation. The revmap is updated to
332 * mark the range containing the given page as pointing to the inserted entry.
333 * A WAL record is written.
334 *
335 * The buffer, if valid, is first checked for free space to insert the new
336 * entry; if there isn't enough, a new buffer is obtained and pinned. No
337 * buffer lock must be held on entry, no buffer lock is held on exit.
338 *
339 * Return value is the offset number where the tuple was inserted.
340 */
342brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
343 BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
344 BrinTuple *tup, Size itemsz)
345{
346 Page page;
347 BlockNumber blk;
348 OffsetNumber off;
349 Size freespace = 0;
350 Buffer revmapbuf;
351 ItemPointerData tid;
352 bool extended;
353
354 Assert(itemsz == MAXALIGN(itemsz));
355
356 /* If the item is oversized, don't even bother. */
357 if (itemsz > BrinMaxItemSize)
358 {
360 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
361 errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
362 itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
363 return InvalidOffsetNumber; /* keep compiler quiet */
364 }
365
366 /* Make sure the revmap is long enough to contain the entry we need */
367 brinRevmapExtend(revmap, heapBlk);
368
369 /*
370 * Acquire lock on buffer supplied by caller, if any. If it doesn't have
371 * enough space, unpin it to obtain a new one below.
372 */
373 if (BufferIsValid(*buffer))
374 {
375 /*
376 * It's possible that another backend (or ourselves!) extended the
377 * revmap over the page we held a pin on, so we cannot assume that
378 * it's still a regular page.
379 */
381 if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
382 {
383 UnlockReleaseBuffer(*buffer);
384 *buffer = InvalidBuffer;
385 }
386 }
387
388 /*
389 * If we still don't have a usable buffer, have brin_getinsertbuffer
390 * obtain one for us.
391 */
392 if (!BufferIsValid(*buffer))
393 {
394 do
395 *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
396 while (!BufferIsValid(*buffer));
397 }
398 else
399 extended = false;
400
401 /* Now obtain lock on revmap buffer */
402 revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
403
404 page = BufferGetPage(*buffer);
405 blk = BufferGetBlockNumber(*buffer);
406
407 /* Execute the actual insertion */
409 if (extended)
411 off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
412 false, false);
413 if (off == InvalidOffsetNumber)
414 elog(ERROR, "failed to add BRIN tuple to new page");
415 MarkBufferDirty(*buffer);
416
417 /* needed to update FSM below */
418 if (extended)
419 freespace = br_page_get_freespace(page);
420
421 ItemPointerSet(&tid, blk, off);
422 brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
423 MarkBufferDirty(revmapbuf);
424
425 /* XLOG stuff */
426 if (RelationNeedsWAL(idxrel))
427 {
428 xl_brin_insert xlrec;
429 XLogRecPtr recptr;
430 uint8 info;
431
432 info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
433 xlrec.heapBlk = heapBlk;
434 xlrec.pagesPerRange = pagesPerRange;
435 xlrec.offnum = off;
436
438 XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
439
440 XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
441 XLogRegisterBufData(0, (char *) tup, itemsz);
442
443 XLogRegisterBuffer(1, revmapbuf, 0);
444
445 recptr = XLogInsert(RM_BRIN_ID, info);
446
447 PageSetLSN(page, recptr);
448 PageSetLSN(BufferGetPage(revmapbuf), recptr);
449 }
450
452
453 /* Tuple is firmly on buffer; we can release our locks */
455 LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
456
457 BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
458 blk, off, heapBlk));
459
460 if (extended)
461 {
462 RecordPageWithFreeSpace(idxrel, blk, freespace);
463 FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
464 }
465
466 return off;
467}
468
469/*
470 * Initialize a page with the given type.
471 *
472 * Caller is responsible for marking it dirty, as appropriate.
473 */
474void
476{
477 PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
478
479 BrinPageType(page) = type;
480}
481
482/*
483 * Initialize a new BRIN index's metapage.
484 */
485void
486brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
487{
488 BrinMetaPageData *metadata;
489
491
492 metadata = (BrinMetaPageData *) PageGetContents(page);
493
494 metadata->brinMagic = BRIN_META_MAGIC;
495 metadata->brinVersion = version;
496 metadata->pagesPerRange = pagesPerRange;
497
498 /*
499 * Note we cheat here a little. 0 is not a valid revmap block number
500 * (because it's the metapage buffer), but doing this enables the first
501 * revmap page to be created when the index is.
502 */
503 metadata->lastRevmapPage = 0;
504
505 /*
506 * Set pd_lower just past the end of the metadata. This is essential,
507 * because without doing so, metadata will be lost if xlog.c compresses
508 * the page.
509 */
510 ((PageHeader) page)->pd_lower =
511 ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
512}
513
514/*
515 * Initiate page evacuation protocol.
516 *
517 * The page must be locked in exclusive mode by the caller.
518 *
519 * If the page is not yet initialized or empty, return false without doing
520 * anything; it can be used for revmap without any further changes. If it
521 * contains tuples, mark it for evacuation and return true.
522 */
523bool
525{
526 OffsetNumber off;
527 OffsetNumber maxoff;
528 Page page;
529
530 page = BufferGetPage(buf);
531
532 if (PageIsNew(page))
533 return false;
534
535 maxoff = PageGetMaxOffsetNumber(page);
536 for (off = FirstOffsetNumber; off <= maxoff; off++)
537 {
538 ItemId lp;
539
540 lp = PageGetItemId(page, off);
541 if (ItemIdIsUsed(lp))
542 {
543 /*
544 * Prevent other backends from adding more stuff to this page:
545 * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
546 * can no longer be used to add new tuples. Note that this flag
547 * is not WAL-logged, except accidentally.
548 */
551
552 return true;
553 }
554 }
555 return false;
556}
557
558/*
559 * Move all tuples out of a page.
560 *
561 * The caller must hold lock on the page. The lock and pin are released.
562 */
563void
565 BrinRevmap *revmap, Buffer buf)
566{
567 OffsetNumber off;
568 OffsetNumber maxoff;
569 Page page;
570 BrinTuple *btup = NULL;
571 Size btupsz = 0;
572
573 page = BufferGetPage(buf);
574
576
577 maxoff = PageGetMaxOffsetNumber(page);
578 for (off = FirstOffsetNumber; off <= maxoff; off++)
579 {
580 BrinTuple *tup;
581 Size sz;
582 ItemId lp;
583
585
586 lp = PageGetItemId(page, off);
587 if (ItemIdIsUsed(lp))
588 {
589 sz = ItemIdGetLength(lp);
590 tup = (BrinTuple *) PageGetItem(page, lp);
591 tup = brin_copy_tuple(tup, sz, btup, &btupsz);
592
594
595 if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
596 buf, off, tup, sz, tup, sz, false))
597 off--; /* retry */
598
600
601 /* It's possible that someone extended the revmap over this page */
602 if (!BRIN_IS_REGULAR_PAGE(page))
603 break;
604 }
605 }
606
608}
609
610/*
611 * Given a BRIN index page, initialize it if necessary, and record its
612 * current free space in the FSM.
613 *
614 * The main use for this is when, during vacuuming, an uninitialized page is
615 * found, which could be the result of relation extension followed by a crash
616 * before the page can be used.
617 *
618 * Here, we don't bother to update upper FSM pages, instead expecting that our
619 * caller (brin_vacuum_scan) will fix them at the end of the scan. Elsewhere
620 * in this file, it's generally a good idea to propagate additions of free
621 * space into the upper FSM pages immediately.
622 */
623void
625{
626 Page page = BufferGetPage(buf);
627
628 /*
629 * If a page was left uninitialized, initialize it now; also record it in
630 * FSM.
631 *
632 * Somebody else might be extending the relation concurrently. To avoid
633 * re-initializing the page before they can grab the buffer lock, we
634 * acquire the extension lock momentarily. Since they hold the extension
635 * lock from before getting the page and after its been initialized, we're
636 * sure to see their initialization.
637 */
638 if (PageIsNew(page))
639 {
642
644 if (PageIsNew(page))
645 {
648 return;
649 }
651 }
652
653 /* Nothing to be done for non-regular index pages */
656 return;
657
658 /* Measure free space and record it */
661}
662
663/*
664 * Return a pinned and exclusively locked buffer which can be used to insert an
665 * index item of size itemsz (caller must ensure not to request sizes
666 * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in
667 * an order determined to avoid deadlocks).
668 *
669 * If we find that the old page is no longer a regular index page (because
670 * of a revmap extension), the old buffer is unlocked and we return
671 * InvalidBuffer.
672 *
673 * If there's no existing page with enough free space to accommodate the new
674 * item, the relation is extended. If this happens, *extended is set to true,
675 * and it is the caller's responsibility to initialize the page (and WAL-log
676 * that fact) prior to use. The caller should also update the FSM with the
677 * page's remaining free space after the insertion.
678 *
679 * Note that the caller is not expected to update FSM unless *extended is set
680 * true. This policy means that we'll update FSM when a page is created, and
681 * when it's found to have too little space for a desired tuple insertion,
682 * but not every single time we add a tuple to the page.
683 *
684 * Note that in some corner cases it is possible for this routine to extend
685 * the relation and then not return the new page. It is this routine's
686 * responsibility to WAL-log the page initialization and to record the page in
687 * FSM if that happens, since the caller certainly can't do it.
688 */
689static Buffer
691 bool *extended)
692{
693 BlockNumber oldblk;
694 BlockNumber newblk;
695 Page page;
696 Size freespace;
697
698 /* callers must have checked */
699 Assert(itemsz <= BrinMaxItemSize);
700
701 if (BufferIsValid(oldbuf))
702 oldblk = BufferGetBlockNumber(oldbuf);
703 else
704 oldblk = InvalidBlockNumber;
705
706 /* Choose initial target page, re-using existing target if known */
707 newblk = RelationGetTargetBlock(irel);
708 if (newblk == InvalidBlockNumber)
709 newblk = GetPageWithFreeSpace(irel, itemsz);
710
711 /*
712 * Loop until we find a page with sufficient free space. By the time we
713 * return to caller out of this loop, both buffers are valid and locked;
714 * if we have to restart here, neither page is locked and newblk isn't
715 * pinned (if it's even valid).
716 */
717 for (;;)
718 {
719 Buffer buf;
720 bool extensionLockHeld = false;
721
723
724 *extended = false;
725
726 if (newblk == InvalidBlockNumber)
727 {
728 /*
729 * There's not enough free space in any existing index page,
730 * according to the FSM: extend the relation to obtain a shiny new
731 * page.
732 *
733 * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
734 * which'd avoid the need to hold the extension lock during buffer
735 * reclaim.
736 */
737 if (!RELATION_IS_LOCAL(irel))
738 {
740 extensionLockHeld = true;
741 }
742 buf = ReadBuffer(irel, P_NEW);
743 newblk = BufferGetBlockNumber(buf);
744 *extended = true;
745
746 BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
748 }
749 else if (newblk == oldblk)
750 {
751 /*
752 * There's an odd corner-case here where the FSM is out-of-date,
753 * and gave us the old page.
754 */
755 buf = oldbuf;
756 }
757 else
758 {
759 buf = ReadBuffer(irel, newblk);
760 }
761
762 /*
763 * We lock the old buffer first, if it's earlier than the new one; but
764 * then we need to check that it hasn't been turned into a revmap page
765 * concurrently. If we detect that that happened, give up and tell
766 * caller to start over.
767 */
768 if (BufferIsValid(oldbuf) && oldblk < newblk)
769 {
772 {
774
775 /*
776 * It is possible that the new page was obtained from
777 * extending the relation. In that case, we must be sure to
778 * record it in the FSM before leaving, because otherwise the
779 * space would be lost forever. However, we cannot let an
780 * uninitialized page get in the FSM, so we need to initialize
781 * it first.
782 */
783 if (*extended)
785
786 if (extensionLockHeld)
788
790
791 if (*extended)
792 {
793 FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
794 /* shouldn't matter, but don't confuse caller */
795 *extended = false;
796 }
797
798 return InvalidBuffer;
799 }
800 }
801
803
804 if (extensionLockHeld)
806
807 page = BufferGetPage(buf);
808
809 /*
810 * We have a new buffer to insert into. Check that the new page has
811 * enough free space, and return it if it does; otherwise start over.
812 * (br_page_get_freespace also checks that the FSM didn't hand us a
813 * page that has since been repurposed for the revmap.)
814 */
815 freespace = *extended ?
817 if (freespace >= itemsz)
818 {
819 RelationSetTargetBlock(irel, newblk);
820
821 /*
822 * Lock the old buffer if not locked already. Note that in this
823 * case we know for sure it's a regular page: it's later than the
824 * new page we just got, which is not a revmap page, and revmap
825 * pages are always consecutive.
826 */
827 if (BufferIsValid(oldbuf) && oldblk > newblk)
828 {
831 }
832
833 return buf;
834 }
835
836 /* This page is no good. */
837
838 /*
839 * If an entirely new page does not contain enough free space for the
840 * new item, then surely that item is oversized. Complain loudly; but
841 * first make sure we initialize the page and record it as free, for
842 * next time.
843 */
844 if (*extended)
845 {
847 /* since this should not happen, skip FreeSpaceMapVacuum */
848
850 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
851 errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
852 itemsz, freespace, RelationGetRelationName(irel))));
853 return InvalidBuffer; /* keep compiler quiet */
854 }
855
856 if (newblk != oldblk)
858 if (BufferIsValid(oldbuf) && oldblk <= newblk)
860
861 /*
862 * Update the FSM with the new, presumably smaller, freespace value
863 * for this page, then search for a new target page.
864 */
865 newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
866 }
867}
868
869/*
870 * Initialize a page as an empty regular BRIN page, WAL-log this, and record
871 * the page in FSM.
872 *
873 * There are several corner situations in which we extend the relation to
874 * obtain a new page and later find that we cannot use it immediately. When
875 * that happens, we don't want to leave the page go unrecorded in FSM, because
876 * there is no mechanism to get the space back and the index would bloat.
877 * Also, because we would not WAL-log the action that would initialize the
878 * page, the page would go uninitialized in a standby (or after recovery).
879 *
880 * While we record the page in FSM here, caller is responsible for doing FSM
881 * upper-page update if that seems appropriate.
882 */
883static void
885{
886 Page page;
887
889 "brin_initialize_empty_new_buffer: initializing blank page %u",
890 BufferGetBlockNumber(buffer)));
891
893 page = BufferGetPage(buffer);
895 MarkBufferDirty(buffer);
896 log_newpage_buffer(buffer, true);
898
899 /*
900 * We update the FSM for this page, but this is not WAL-logged. This is
901 * acceptable because VACUUM will scan the index and update the FSM with
902 * pages whose FSM records were forgotten in a crash.
903 */
906}
907
908
909/*
910 * Return the amount of free space on a regular BRIN index page.
911 *
912 * If the page is not a regular page, or has been marked with the
913 * BRIN_EVACUATE_PAGE flag, returns 0.
914 */
915static Size
917{
918 if (!BRIN_IS_REGULAR_PAGE(page) ||
919 (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
920 return 0;
921 else
922 return PageGetFreeSpace(page);
923}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
#define BRIN_elog(args)
Definition: brin_internal.h:85
#define BRIN_IS_META_PAGE(page)
Definition: brin_page.h:55
#define BrinPageFlags(page)
Definition: brin_page.h:46
#define BRIN_META_MAGIC
Definition: brin_page.h:73
#define BRIN_EVACUATE_PAGE
Definition: brin_page.h:60
#define BRIN_PAGETYPE_REGULAR
Definition: brin_page.h:53
#define BRIN_PAGETYPE_META
Definition: brin_page.h:51
#define BRIN_IS_REVMAP_PAGE(page)
Definition: brin_page.h:56
#define BrinPageType(page)
Definition: brin_page.h:42
#define BRIN_IS_REGULAR_PAGE(page)
Definition: brin_page.h:57
void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf)
Definition: brin_pageops.c:564
bool brin_start_evacuating_page(Relation idxRel, Buffer buf)
Definition: brin_pageops.c:524
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:53
static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
Definition: brin_pageops.c:884
void brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:624
#define BrinMaxItemSize
Definition: brin_pageops.c:28
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:342
void brin_page_init(Page page, uint16 type)
Definition: brin_pageops.c:475
static Size br_page_get_freespace(Page page)
Definition: brin_pageops.c:916
static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, bool *extended)
Definition: brin_pageops.c:690
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:486
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:323
void brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:112
void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid)
Definition: brin_revmap.c:155
Buffer brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:134
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:446
bool brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
Definition: brin_tuple.c:465
#define SizeOfBrinInsert
Definition: brin_xlog.h:74
#define SizeOfBrinUpdate
Definition: brin_xlog.h:95
#define XLOG_BRIN_SAMEPAGE_UPDATE
Definition: brin_xlog.h:34
#define SizeOfBrinSamepageUpdate
Definition: brin_xlog.h:107
#define XLOG_BRIN_INIT_PAGE
Definition: brin_xlog.h:43
#define XLOG_BRIN_UPDATE
Definition: brin_xlog.h:33
#define XLOG_BRIN_INSERT
Definition: brin_xlog.h:32
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4866
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4883
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5100
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4930
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
#define P_NEW
Definition: bufmgr.h:184
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:396
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:347
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1394
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:1284
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:947
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:896
PageHeaderData * PageHeader
Definition: bufpage.h:173
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static char * PageGetContents(Page page)
Definition: bufpage.h:257
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsNew(Page page)
Definition: bufpage.h:233
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
#define unconstify(underlying_type, expr)
Definition: c.h:1202
#define MAXALIGN(LEN)
Definition: c.h:768
uint8_t uint8
Definition: c.h:486
#define Assert(condition)
Definition: c.h:815
uint16_t uint16
Definition: c.h:487
size_t Size
Definition: c.h:562
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:154
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:194
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:137
Pointer Item
Definition: item.h:17
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:419
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:469
#define ExclusiveLock
Definition: lockdefs.h:42
#define ShareLock
Definition: lockdefs.h:40
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static char * buf
Definition: pg_test_fsync.c:72
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationGetTargetBlock(relation)
Definition: rel.h:601
#define RelationNeedsWAL(relation)
Definition: rel.h:628
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:608
uint32 brinVersion
Definition: brin_page.h:67
uint32 brinMagic
Definition: brin_page.h:66
BlockNumber lastRevmapPage
Definition: brin_page.h:69
BlockNumber pagesPerRange
Definition: brin_page.h:68
BlockNumber bt_blkno
Definition: brin_tuple.h:66
OffsetNumber offnum
Definition: brin_xlog.h:71
BlockNumber pagesPerRange
Definition: brin_xlog.h:68
BlockNumber heapBlk
Definition: brin_xlog.h:65
OffsetNumber offnum
Definition: brin_xlog.h:104
OffsetNumber oldOffnum
Definition: brin_xlog.h:90
xl_brin_insert insert
Definition: brin_xlog.h:92
const char * type
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterBufData(uint8 block_id, const char *data, uint32 len)
Definition: xloginsert.c:405
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33