PostgreSQL Source Code git master
Loading...
Searching...
No Matches
brin_revmap.c
Go to the documentation of this file.
1/*
2 * brin_revmap.c
3 * Range map for BRIN indexes
4 *
5 * The range map (revmap) is a translation structure for BRIN indexes: for each
6 * page range there is one summary tuple, and its location is tracked by the
7 * revmap. Whenever a new tuple is inserted into a table that violates the
8 * previously recorded summary values, a new tuple is inserted into the index
9 * and the revmap is updated to point to it.
10 *
11 * The revmap is stored in the first pages of the index, immediately following
12 * the metapage. When the revmap needs to be expanded, all tuples on the
13 * regular BRIN page at that block (if any) are moved out of the way.
14 *
15 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
16 * Portions Copyright (c) 1994, Regents of the University of California
17 *
18 * IDENTIFICATION
19 * src/backend/access/brin/brin_revmap.c
20 */
21#include "postgres.h"
22
23#include "access/brin_page.h"
24#include "access/brin_pageops.h"
25#include "access/brin_revmap.h"
26#include "access/brin_tuple.h"
27#include "access/brin_xlog.h"
28#include "access/rmgr.h"
29#include "access/xloginsert.h"
30#include "miscadmin.h"
31#include "storage/bufmgr.h"
32#include "utils/rel.h"
33
34
35/*
36 * In revmap pages, each item stores an ItemPointerData. These defines let one
37 * find the logical revmap page number and index number of the revmap item for
38 * the given heap block number.
39 */
40#define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
41 ((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS)
42#define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
43 ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS)
44
45
54
55/* typedef appears in brin_revmap.h */
56
57
59 BlockNumber heapBlk);
62 BlockNumber heapBlk);
64
65/*
66 * Initialize an access object for a range map. This must be freed by
67 * brinRevmapTerminate when caller is done with it.
68 */
71{
73 Buffer meta;
74 BrinMetaPageData *metadata;
75 Page page;
76
79 page = BufferGetPage(meta);
80 metadata = (BrinMetaPageData *) PageGetContents(page);
81
83 revmap->rm_irel = idxrel;
84 revmap->rm_pagesPerRange = metadata->pagesPerRange;
85 revmap->rm_lastRevmapPage = metadata->lastRevmapPage;
86 revmap->rm_metaBuf = meta;
87 revmap->rm_currBuf = InvalidBuffer;
88
89 *pagesPerRange = metadata->pagesPerRange;
90
92
93 return revmap;
94}
95
96/*
97 * Release resources associated with a revmap access object.
98 */
99void
101{
102 ReleaseBuffer(revmap->rm_metaBuf);
103 if (revmap->rm_currBuf != InvalidBuffer)
104 ReleaseBuffer(revmap->rm_currBuf);
105 pfree(revmap);
106}
107
108/*
109 * Extend the revmap to cover the given heap block number.
110 */
111void
113{
115
117
118 /* Ensure the buffer we got is in the expected range */
121 mapBlk <= revmap->rm_lastRevmapPage);
122}
123
124/*
125 * Prepare to insert an entry into the revmap; the revmap buffer in which the
126 * entry is to reside is locked and returned. Most callers should call
127 * brinRevmapExtend beforehand, as this routine does not extend the revmap if
128 * it's not long enough.
129 *
130 * The returned buffer is also recorded in the revmap struct; finishing that
131 * releases the buffer, therefore the caller needn't do it explicitly.
132 */
133Buffer
143
144/*
145 * In the given revmap buffer (locked appropriately by caller), which is used
146 * in a BRIN index of pagesPerRange pages per range, set the element
147 * corresponding to heap block number heapBlk to the given TID.
148 *
149 * Once the operation is complete, the caller must update the LSN on the
150 * returned buffer.
151 *
152 * This is used both in regular operation and during WAL replay.
153 */
154void
156 BlockNumber heapBlk, ItemPointerData tid)
157{
158 RevmapContents *contents;
159 ItemPointerData *iptr;
160 Page page;
161
162 /* The correct page should already be pinned and locked */
163 page = BufferGetPage(buf);
164 contents = (RevmapContents *) PageGetContents(page);
165 iptr = (ItemPointerData *) contents->rm_tids;
166 iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk);
167
168 if (ItemPointerIsValid(&tid))
169 ItemPointerSet(iptr,
172 else
174}
175
176/*
177 * Fetch the BrinTuple for a given heap block.
178 *
179 * The buffer containing the tuple is locked, and returned in *buf. The
180 * returned tuple points to the shared buffer and must not be freed; if caller
181 * wants to use it after releasing the buffer lock, it must create its own
182 * palloc'ed copy. As an optimization, the caller can pass a pinned buffer
183 * *buf on entry, which will avoid a pin-unpin cycle when the next tuple is on
184 * the same page as a previous one.
185 *
186 * If no tuple is found for the given heap range, returns NULL. In that case,
187 * *buf might still be updated (and pin must be released by caller), but it's
188 * not locked.
189 *
190 * The output tuple offset within the buffer is returned in *off, and its size
191 * is returned in *size.
192 */
193BrinTuple *
195 Buffer *buf, OffsetNumber *off, Size *size, int mode)
196{
197 Relation idxRel = revmap->rm_irel;
199 RevmapContents *contents;
200 ItemPointerData *iptr;
202 Page page;
203 ItemId lp;
204 BrinTuple *tup;
206
207 /* normalize the heap block number to be the first page in the range */
208 heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange;
209
210 /*
211 * Compute the revmap page number we need. If Invalid is returned (i.e.,
212 * the revmap page hasn't been created yet), the requested page range is
213 * not summarized.
214 */
215 mapBlk = revmap_get_blkno(revmap, heapBlk);
217 {
218 *off = InvalidOffsetNumber;
219 return NULL;
220 }
221
223 for (;;)
224 {
226
227 if (revmap->rm_currBuf == InvalidBuffer ||
228 BufferGetBlockNumber(revmap->rm_currBuf) != mapBlk)
229 {
230 if (revmap->rm_currBuf != InvalidBuffer)
231 ReleaseBuffer(revmap->rm_currBuf);
232
234 revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk);
235 }
236
237 LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE);
238
239 contents = (RevmapContents *)
241 iptr = contents->rm_tids;
242 iptr += HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
243
244 if (!ItemPointerIsValid(iptr))
245 {
247 return NULL;
248 }
249
250 /*
251 * Check the TID we got in a previous iteration, if any, and save the
252 * current TID we got from the revmap; if we loop, we can sanity-check
253 * that the next one we get is different. Otherwise we might be stuck
254 * looping forever if the revmap is somehow badly broken.
255 */
259 errmsg_internal("corrupted BRIN index: inconsistent range map")));
260 previptr = *iptr;
261
263 *off = ItemPointerGetOffsetNumber(iptr);
264
266
267 /* Ok, got a pointer to where the BrinTuple should be. Fetch it. */
269 {
270 if (BufferIsValid(*buf))
273 }
275 page = BufferGetPage(*buf);
276
277 /* If we land on a revmap page, start over */
278 if (BRIN_IS_REGULAR_PAGE(page))
279 {
280 /*
281 * If the offset number is greater than what's in the page, it's
282 * possible that the range was desummarized concurrently. Just
283 * return NULL to handle that case.
284 */
285 if (*off > PageGetMaxOffsetNumber(page))
286 {
288 return NULL;
289 }
290
291 lp = PageGetItemId(page, *off);
292 if (ItemIdIsUsed(lp))
293 {
294 tup = (BrinTuple *) PageGetItem(page, lp);
295
296 if (tup->bt_blkno == heapBlk)
297 {
298 if (size)
299 *size = ItemIdGetLength(lp);
300 /* found it! */
301 return tup;
302 }
303 }
304 }
305
306 /*
307 * No luck. Assume that the revmap was updated concurrently.
308 */
310 }
311 /* not reached, but keep compiler quiet */
312 return NULL;
313}
314
315/*
316 * Delete an index tuple, marking a page range as unsummarized.
317 *
318 * Index must be locked in ShareUpdateExclusiveLock mode.
319 *
320 * Return false if caller should retry.
321 */
322bool
324{
326 BlockNumber pagesPerRange;
327 RevmapContents *contents;
328 ItemPointerData *iptr;
334 Page regPg;
336 OffsetNumber regOffset;
337 ItemId lp;
338
339 revmap = brinRevmapInitialize(idxrel, &pagesPerRange);
340
343 {
344 /* revmap page doesn't exist: range not summarized, we're done */
346 return true;
347 }
348
349 /* Lock the revmap page, obtain the index tuple pointer from it */
352 revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
353
355 iptr = contents->rm_tids;
356 iptr += revmapOffset;
357
358 if (!ItemPointerIsValid(iptr))
359 {
360 /* no index tuple: range not summarized, we're done */
363 return true;
364 }
365
369
370 /* if this is no longer a regular page, tell caller to start over */
372 {
376 return false;
377 }
378
379 regOffset = ItemPointerGetOffsetNumber(iptr);
380 if (regOffset > PageGetMaxOffsetNumber(regPg))
383 errmsg("corrupted BRIN index: inconsistent range map")));
384
385 lp = PageGetItemId(regPg, regOffset);
386 if (!ItemIdIsUsed(lp))
389 errmsg("corrupted BRIN index: inconsistent range map")));
390
391 /*
392 * Placeholder tuples only appear during unfinished summarization, and we
393 * hold ShareUpdateExclusiveLock, so this function cannot run concurrently
394 * with that. So any placeholder tuples that exist are leftovers from a
395 * crashed or aborted summarization; remove them silently.
396 */
397
399
401 brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk,
404 /* XXX record free space in FSM? */
405
408
410 {
413
414 xlrec.pagesPerRange = revmap->rm_pagesPerRange;
415 xlrec.heapBlk = heapBlk;
416 xlrec.regOffset = regOffset;
417
425 }
426
428
432
433 return true;
434}
435
436/*
437 * Given a heap block number, find the corresponding physical revmap block
438 * number and return it. If the revmap page hasn't been allocated yet, return
439 * InvalidBlockNumber.
440 */
441static BlockNumber
443{
445
446 /* obtain revmap block number, skip 1 for metapage block */
447 targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1;
448
449 /* Normal case: the revmap page is already allocated */
450 if (targetblk <= revmap->rm_lastRevmapPage)
451 return targetblk;
452
453 return InvalidBlockNumber;
454}
455
456/*
457 * Obtain and return a buffer containing the revmap page for the given heap
458 * page. The revmap must have been previously extended to cover that page.
459 * The returned buffer is also recorded in the revmap struct; finishing that
460 * releases the buffer, therefore the caller needn't do it explicitly.
461 */
462static Buffer
464{
466
467 /* Translate the heap block number to physical index location. */
468 mapBlk = revmap_get_blkno(revmap, heapBlk);
469
471 elog(ERROR, "revmap does not cover heap block %u", heapBlk);
472
473 /* Ensure the buffer we got is in the expected range */
475 mapBlk <= revmap->rm_lastRevmapPage);
476
477 /*
478 * Obtain the buffer from which we need to read. If we already have the
479 * correct buffer in our access struct, use that; otherwise, release that,
480 * (if valid) and read the one we need.
481 */
482 if (revmap->rm_currBuf == InvalidBuffer ||
483 mapBlk != BufferGetBlockNumber(revmap->rm_currBuf))
484 {
485 if (revmap->rm_currBuf != InvalidBuffer)
486 ReleaseBuffer(revmap->rm_currBuf);
487
488 revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk);
489 }
490
491 return revmap->rm_currBuf;
492}
493
494/*
495 * Given a heap block number, find the corresponding physical revmap block
496 * number and return it. If the revmap page hasn't been allocated yet, extend
497 * the revmap until it is.
498 */
499static BlockNumber
501{
503
504 /* obtain revmap block number, skip 1 for metapage block */
505 targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1;
506
507 /* Extend the revmap, if necessary */
508 while (targetblk > revmap->rm_lastRevmapPage)
509 {
512 }
513
514 return targetblk;
515}
516
517/*
518 * Try to extend the revmap by one page. This might not happen for a number of
519 * reasons; caller is expected to retry until the expected outcome is obtained.
520 */
521static void
523{
524 Buffer buf;
525 Page page;
527 BrinMetaPageData *metadata;
529 BlockNumber nblocks;
530 Relation irel = revmap->rm_irel;
531
532 /*
533 * Lock the metapage. This locks out concurrent extensions of the revmap,
534 * but note that we still need to grab the relation extension lock because
535 * another backend can extend the index with regular BRIN pages.
536 */
538 metapage = BufferGetPage(revmap->rm_metaBuf);
540
541 /*
542 * Check that our cached lastRevmapPage value was up-to-date; if it
543 * wasn't, update the cached copy and have caller start over.
544 */
545 if (metadata->lastRevmapPage != revmap->rm_lastRevmapPage)
546 {
547 revmap->rm_lastRevmapPage = metadata->lastRevmapPage;
549 return;
550 }
551 mapBlk = metadata->lastRevmapPage + 1;
552
553 nblocks = RelationGetNumberOfBlocks(irel);
554 if (mapBlk < nblocks)
555 {
556 buf = ReadBuffer(irel, mapBlk);
558 page = BufferGetPage(buf);
559 }
560 else
561 {
565 {
566 /*
567 * Very rare corner case: somebody extended the relation
568 * concurrently after we read its length. If this happens, give
569 * up and have caller start over. We will have to evacuate that
570 * page from under whoever is using it.
571 */
574 return;
575 }
576 page = BufferGetPage(buf);
577 }
578
579 /* Check that it's a regular block (or an empty page) */
580 if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page))
583 errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u",
584 BrinPageType(page),
587
588 /* If the page is in use, evacuate it and restart */
590 {
592 brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf);
593
594 /* have caller start over */
595 return;
596 }
597
598 /*
599 * Ok, we have now locked the metapage and the target block. Re-initialize
600 * the target block as a revmap page, and update the metapage.
601 */
603
604 /* the rm_tids array is initialized to all invalid by PageInit */
607
608 metadata->lastRevmapPage = mapBlk;
609
610 /*
611 * Set pd_lower just past the end of the metadata. This is essential,
612 * because without doing so, metadata will be lost if xlog.c compresses
613 * the page. (We must do this here because pre-v11 versions of PG did not
614 * set the metapage's pd_lower correctly, so a pg_upgraded index might
615 * contain the wrong value.)
616 */
617 ((PageHeader) metapage)->pd_lower =
618 ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapage;
619
620 MarkBufferDirty(revmap->rm_metaBuf);
621
622 if (RelationNeedsWAL(revmap->rm_irel))
623 {
626
628
632
634
637 PageSetLSN(page, recptr);
638 }
639
641
643
645}
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
#define BRIN_PAGETYPE_REVMAP
Definition brin_page.h:52
#define BRIN_METAPAGE_BLKNO
Definition brin_page.h:75
#define BrinPageType(page)
Definition brin_page.h:42
#define BRIN_IS_REGULAR_PAGE(page)
Definition brin_page.h:57
void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf)
bool brin_start_evacuating_page(Relation idxRel, Buffer buf)
void brin_page_init(Page page, uint16 type)
bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
void brinRevmapTerminate(BrinRevmap *revmap)
static void revmap_physical_extend(BrinRevmap *revmap)
void brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
static BlockNumber revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange)
Definition brin_revmap.c:70
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode)
void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid)
#define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk)
Definition brin_revmap.c:42
#define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk)
Definition brin_revmap.c:40
static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk)
Buffer brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
#define XLOG_BRIN_REVMAP_EXTEND
Definition brin_xlog.h:35
#define SizeOfBrinRevmapExtend
Definition brin_xlog.h:124
#define XLOG_BRIN_DESUMMARIZE
Definition brin_xlog.h:36
#define SizeOfBrinDesummarize
Definition brin_xlog.h:142
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4356
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition bufmgr.c:964
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5501
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5518
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3056
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:864
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328
@ EB_LOCK_FIRST
Definition bufmgr.h:87
#define BMR_REL(p_rel)
Definition bufmgr.h:114
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition bufpage.c:1294
PageHeaderData * PageHeader
Definition bufpage.h:173
static bool PageIsNew(const PageData *page)
Definition bufpage.h:233
static char * PageGetContents(Page page)
Definition bufpage.h:257
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
PageData * Page
Definition bufpage.h:81
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
#define Assert(condition)
Definition c.h:873
size_t Size
Definition c.h:619
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
bool ItemPointerEquals(const ItemPointerData *pointer1, const ItemPointerData *pointer2)
Definition itemptr.c:35
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
static void ItemPointerSetInvalid(ItemPointerData *pointer)
Definition itemptr.h:184
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition itemptr.h:83
void pfree(void *pointer)
Definition mcxt.c:1616
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define END_CRIT_SECTION()
Definition miscadmin.h:152
#define InvalidOffsetNumber
Definition off.h:26
uint16 OffsetNumber
Definition off.h:24
static PgChecksumMode mode
static char buf[DEFAULT_XLOG_SEG_SIZE]
static int fb(int x)
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationNeedsWAL(relation)
Definition rel.h:637
@ MAIN_FORKNUM
Definition relpath.h:58
BlockNumber lastRevmapPage
Definition brin_page.h:69
BlockNumber pagesPerRange
Definition brin_page.h:68
BlockNumber rm_pagesPerRange
Definition brin_revmap.c:49
BlockNumber rm_lastRevmapPage
Definition brin_revmap.c:50
Buffer rm_metaBuf
Definition brin_revmap.c:51
Buffer rm_currBuf
Definition brin_revmap.c:52
Relation rm_irel
Definition brin_revmap.c:48
ItemPointerData rm_tids[1]
Definition brin_page.h:85
BlockNumber pagesPerRange
Definition brin_xlog.h:135
BlockNumber targetBlk
Definition brin_xlog.h:121
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:245
void XLogBeginInsert(void)
Definition xloginsert.c:152
#define REGBUF_STANDARD
Definition xloginsert.h:35
#define REGBUF_WILL_INIT
Definition xloginsert.h:34