PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
brin_revmap.c
Go to the documentation of this file.
1 /*
2  * brin_revmap.c
3  * Range map for BRIN indexes
4  *
5  * The range map (revmap) is a translation structure for BRIN indexes: for each
6  * page range there is one summary tuple, and its location is tracked by the
7  * revmap. Whenever a new tuple is inserted into a table that violates the
8  * previously recorded summary values, a new tuple is inserted into the index
9  * and the revmap is updated to point to it.
10  *
11  * The revmap is stored in the first pages of the index, immediately following
12  * the metapage. When the revmap needs to be expanded, all tuples on the
13  * regular BRIN page at that block (if any) are moved out of the way.
14  *
15  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
16  * Portions Copyright (c) 1994, Regents of the University of California
17  *
18  * IDENTIFICATION
19  * src/backend/access/brin/brin_revmap.c
20  */
21 #include "postgres.h"
22 
23 #include "access/brin_page.h"
24 #include "access/brin_pageops.h"
25 #include "access/brin_revmap.h"
26 #include "access/brin_tuple.h"
27 #include "access/brin_xlog.h"
28 #include "access/rmgr.h"
29 #include "access/xloginsert.h"
30 #include "miscadmin.h"
31 #include "storage/bufmgr.h"
32 #include "storage/lmgr.h"
33 #include "utils/rel.h"
34 
35 
36 /*
37  * In revmap pages, each item stores an ItemPointerData. These defines let one
38  * find the logical revmap page number and index number of the revmap item for
39  * the given heap block number.
40  */
41 #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
42  ((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS)
43 #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
44  ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS)
45 
46 
47 struct BrinRevmap
48 {
51  BlockNumber rm_lastRevmapPage; /* cached from the metapage */
54 };
55 
56 /* typedef appears in brin_revmap.h */
57 
58 
60  BlockNumber heapBlk);
61 static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk);
63  BlockNumber heapBlk);
64 static void revmap_physical_extend(BrinRevmap *revmap);
65 
66 /*
67  * Initialize an access object for a range map. This must be freed by
68  * brinRevmapTerminate when caller is done with it.
69  */
70 BrinRevmap *
71 brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange,
72  Snapshot snapshot)
73 {
74  BrinRevmap *revmap;
75  Buffer meta;
76  BrinMetaPageData *metadata;
77  Page page;
78 
79  meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO);
81  page = BufferGetPage(meta);
82  TestForOldSnapshot(snapshot, idxrel, page);
83  metadata = (BrinMetaPageData *) PageGetContents(page);
84 
85  revmap = palloc(sizeof(BrinRevmap));
86  revmap->rm_irel = idxrel;
87  revmap->rm_pagesPerRange = metadata->pagesPerRange;
88  revmap->rm_lastRevmapPage = metadata->lastRevmapPage;
89  revmap->rm_metaBuf = meta;
90  revmap->rm_currBuf = InvalidBuffer;
91 
92  *pagesPerRange = metadata->pagesPerRange;
93 
95 
96  return revmap;
97 }
98 
99 /*
100  * Release resources associated with a revmap access object.
101  */
102 void
104 {
105  ReleaseBuffer(revmap->rm_metaBuf);
106  if (revmap->rm_currBuf != InvalidBuffer)
107  ReleaseBuffer(revmap->rm_currBuf);
108  pfree(revmap);
109 }
110 
111 /*
112  * Extend the revmap to cover the given heap block number.
113  */
114 void
116 {
118 
119  mapBlk = revmap_extend_and_get_blkno(revmap, heapBlk);
120 
121  /* Ensure the buffer we got is in the expected range */
122  Assert(mapBlk != InvalidBlockNumber &&
123  mapBlk != BRIN_METAPAGE_BLKNO &&
124  mapBlk <= revmap->rm_lastRevmapPage);
125 }
126 
127 /*
128  * Prepare to insert an entry into the revmap; the revmap buffer in which the
129  * entry is to reside is locked and returned. Most callers should call
130  * brinRevmapExtend beforehand, as this routine does not extend the revmap if
131  * it's not long enough.
132  *
133  * The returned buffer is also recorded in the revmap struct; finishing that
134  * releases the buffer, therefore the caller needn't do it explicitly.
135  */
136 Buffer
138 {
139  Buffer rmBuf;
140 
141  rmBuf = revmap_get_buffer(revmap, heapBlk);
143 
144  return rmBuf;
145 }
146 
147 /*
148  * In the given revmap buffer (locked appropriately by caller), which is used
149  * in a BRIN index of pagesPerRange pages per range, set the element
150  * corresponding to heap block number heapBlk to the given TID.
151  *
152  * Once the operation is complete, the caller must update the LSN on the
153  * returned buffer.
154  *
155  * This is used both in regular operation and during WAL replay.
156  */
157 void
159  BlockNumber heapBlk, ItemPointerData tid)
160 {
161  RevmapContents *contents;
162  ItemPointerData *iptr;
163  Page page;
164 
165  /* The correct page should already be pinned and locked */
166  page = BufferGetPage(buf);
167  contents = (RevmapContents *) PageGetContents(page);
168  iptr = (ItemPointerData *) contents->rm_tids;
169  iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk);
170 
171  ItemPointerSet(iptr,
174 }
175 
176 /*
177  * Fetch the BrinTuple for a given heap block.
178  *
179  * The buffer containing the tuple is locked, and returned in *buf. As an
180  * optimization, the caller can pass a pinned buffer *buf on entry, which will
181  * avoid a pin-unpin cycle when the next tuple is on the same page as a
182  * previous one.
183  *
184  * If no tuple is found for the given heap range, returns NULL. In that case,
185  * *buf might still be updated, but it's not locked.
186  *
187  * The output tuple offset within the buffer is returned in *off, and its size
188  * is returned in *size.
189  */
190 BrinTuple *
192  Buffer *buf, OffsetNumber *off, Size *size, int mode,
193  Snapshot snapshot)
194 {
195  Relation idxRel = revmap->rm_irel;
196  BlockNumber mapBlk;
197  RevmapContents *contents;
198  ItemPointerData *iptr;
199  BlockNumber blk;
200  Page page;
201  ItemId lp;
202  BrinTuple *tup;
203  ItemPointerData previptr;
204 
205  /* normalize the heap block number to be the first page in the range */
206  heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange;
207 
208  /* Compute the revmap page number we need */
209  mapBlk = revmap_get_blkno(revmap, heapBlk);
210  if (mapBlk == InvalidBlockNumber)
211  {
212  *off = InvalidOffsetNumber;
213  return NULL;
214  }
215 
216  ItemPointerSetInvalid(&previptr);
217  for (;;)
218  {
220 
221  if (revmap->rm_currBuf == InvalidBuffer ||
222  BufferGetBlockNumber(revmap->rm_currBuf) != mapBlk)
223  {
224  if (revmap->rm_currBuf != InvalidBuffer)
225  ReleaseBuffer(revmap->rm_currBuf);
226 
227  Assert(mapBlk != InvalidBlockNumber);
228  revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk);
229  }
230 
232 
233  contents = (RevmapContents *)
235  iptr = contents->rm_tids;
236  iptr += HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
237 
238  if (!ItemPointerIsValid(iptr))
239  {
241  return NULL;
242  }
243 
244  /*
245  * Check the TID we got in a previous iteration, if any, and save the
246  * current TID we got from the revmap; if we loop, we can sanity-check
247  * that the next one we get is different. Otherwise we might be stuck
248  * looping forever if the revmap is somehow badly broken.
249  */
250  if (ItemPointerIsValid(&previptr) && ItemPointerEquals(&previptr, iptr))
251  ereport(ERROR,
252  (errcode(ERRCODE_INDEX_CORRUPTED),
253  errmsg_internal("corrupted BRIN index: inconsistent range map")));
254  previptr = *iptr;
255 
256  blk = ItemPointerGetBlockNumber(iptr);
257  *off = ItemPointerGetOffsetNumber(iptr);
258 
260 
261  /* Ok, got a pointer to where the BrinTuple should be. Fetch it. */
262  if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk)
263  {
264  if (BufferIsValid(*buf))
265  ReleaseBuffer(*buf);
266  *buf = ReadBuffer(idxRel, blk);
267  }
268  LockBuffer(*buf, mode);
269  page = BufferGetPage(*buf);
270  TestForOldSnapshot(snapshot, idxRel, page);
271 
272  /* If we land on a revmap page, start over */
273  if (BRIN_IS_REGULAR_PAGE(page))
274  {
275  if (*off > PageGetMaxOffsetNumber(page))
276  ereport(ERROR,
277  (errcode(ERRCODE_INDEX_CORRUPTED),
278  errmsg_internal("corrupted BRIN index: inconsistent range map")));
279  lp = PageGetItemId(page, *off);
280  if (ItemIdIsUsed(lp))
281  {
282  tup = (BrinTuple *) PageGetItem(page, lp);
283 
284  if (tup->bt_blkno == heapBlk)
285  {
286  if (size)
287  *size = ItemIdGetLength(lp);
288  /* found it! */
289  return tup;
290  }
291  }
292  }
293 
294  /*
295  * No luck. Assume that the revmap was updated concurrently.
296  */
298  }
299  /* not reached, but keep compiler quiet */
300  return NULL;
301 }
302 
303 /*
304  * Given a heap block number, find the corresponding physical revmap block
305  * number and return it. If the revmap page hasn't been allocated yet, return
306  * InvalidBlockNumber.
307  */
308 static BlockNumber
310 {
311  BlockNumber targetblk;
312 
313  /* obtain revmap block number, skip 1 for metapage block */
314  targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1;
315 
316  /* Normal case: the revmap page is already allocated */
317  if (targetblk <= revmap->rm_lastRevmapPage)
318  return targetblk;
319 
320  return InvalidBlockNumber;
321 }
322 
323 /*
324  * Obtain and return a buffer containing the revmap page for the given heap
325  * page. The revmap must have been previously extended to cover that page.
326  * The returned buffer is also recorded in the revmap struct; finishing that
327  * releases the buffer, therefore the caller needn't do it explicitly.
328  */
329 static Buffer
331 {
332  BlockNumber mapBlk;
333 
334  /* Translate the heap block number to physical index location. */
335  mapBlk = revmap_get_blkno(revmap, heapBlk);
336 
337  if (mapBlk == InvalidBlockNumber)
338  elog(ERROR, "revmap does not cover heap block %u", heapBlk);
339 
340  /* Ensure the buffer we got is in the expected range */
341  Assert(mapBlk != BRIN_METAPAGE_BLKNO &&
342  mapBlk <= revmap->rm_lastRevmapPage);
343 
344  /*
345  * Obtain the buffer from which we need to read. If we already have the
346  * correct buffer in our access struct, use that; otherwise, release that,
347  * (if valid) and read the one we need.
348  */
349  if (revmap->rm_currBuf == InvalidBuffer ||
350  mapBlk != BufferGetBlockNumber(revmap->rm_currBuf))
351  {
352  if (revmap->rm_currBuf != InvalidBuffer)
353  ReleaseBuffer(revmap->rm_currBuf);
354 
355  revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk);
356  }
357 
358  return revmap->rm_currBuf;
359 }
360 
361 /*
362  * Given a heap block number, find the corresponding physical revmap block
363  * number and return it. If the revmap page hasn't been allocated yet, extend
364  * the revmap until it is.
365  */
366 static BlockNumber
368 {
369  BlockNumber targetblk;
370 
371  /* obtain revmap block number, skip 1 for metapage block */
372  targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1;
373 
374  /* Extend the revmap, if necessary */
375  while (targetblk > revmap->rm_lastRevmapPage)
376  {
378  revmap_physical_extend(revmap);
379  }
380 
381  return targetblk;
382 }
383 
384 /*
385  * Try to extend the revmap by one page. This might not happen for a number of
386  * reasons; caller is expected to retry until the expected outcome is obtained.
387  */
388 static void
390 {
391  Buffer buf;
392  Page page;
393  Page metapage;
394  BrinMetaPageData *metadata;
395  BlockNumber mapBlk;
396  BlockNumber nblocks;
397  Relation irel = revmap->rm_irel;
398  bool needLock = !RELATION_IS_LOCAL(irel);
399 
400  /*
401  * Lock the metapage. This locks out concurrent extensions of the revmap,
402  * but note that we still need to grab the relation extension lock because
403  * another backend can extend the index with regular BRIN pages.
404  */
406  metapage = BufferGetPage(revmap->rm_metaBuf);
407  metadata = (BrinMetaPageData *) PageGetContents(metapage);
408 
409  /*
410  * Check that our cached lastRevmapPage value was up-to-date; if it
411  * wasn't, update the cached copy and have caller start over.
412  */
413  if (metadata->lastRevmapPage != revmap->rm_lastRevmapPage)
414  {
415  revmap->rm_lastRevmapPage = metadata->lastRevmapPage;
417  return;
418  }
419  mapBlk = metadata->lastRevmapPage + 1;
420 
421  nblocks = RelationGetNumberOfBlocks(irel);
422  if (mapBlk < nblocks)
423  {
424  buf = ReadBuffer(irel, mapBlk);
426  page = BufferGetPage(buf);
427  }
428  else
429  {
430  if (needLock)
432 
433  buf = ReadBuffer(irel, P_NEW);
434  if (BufferGetBlockNumber(buf) != mapBlk)
435  {
436  /*
437  * Very rare corner case: somebody extended the relation
438  * concurrently after we read its length. If this happens, give
439  * up and have caller start over. We will have to evacuate that
440  * page from under whoever is using it.
441  */
442  if (needLock)
445  ReleaseBuffer(buf);
446  return;
447  }
449  page = BufferGetPage(buf);
450 
451  if (needLock)
453  }
454 
455  /* Check that it's a regular block (or an empty page) */
456  if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page))
457  ereport(ERROR,
458  (errcode(ERRCODE_INDEX_CORRUPTED),
459  errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u",
460  BrinPageType(page),
462  BufferGetBlockNumber(buf))));
463 
464  /* If the page is in use, evacuate it and restart */
465  if (brin_start_evacuating_page(irel, buf))
466  {
468  brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf);
469 
470  /* have caller start over */
471  return;
472  }
473 
474  /*
475  * Ok, we have now locked the metapage and the target block. Re-initialize
476  * it as a revmap page.
477  */
479 
480  /* the rm_tids array is initialized to all invalid by PageInit */
482  MarkBufferDirty(buf);
483 
484  metadata->lastRevmapPage = mapBlk;
485  MarkBufferDirty(revmap->rm_metaBuf);
486 
487  if (RelationNeedsWAL(revmap->rm_irel))
488  {
489  xl_brin_revmap_extend xlrec;
490  XLogRecPtr recptr;
491 
492  xlrec.targetBlk = mapBlk;
493 
494  XLogBeginInsert();
495  XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
496  XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
497 
499 
500  recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
501  PageSetLSN(metapage, recptr);
502  PageSetLSN(page, recptr);
503  }
504 
506 
508 
509  UnlockReleaseBuffer(buf);
510 }
BlockNumber rm_lastRevmapPage
Definition: brin_revmap.c:51
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:59
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
Relation rm_irel
Definition: brin_revmap.c:49
#define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk)
Definition: brin_revmap.c:43
static void TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
Definition: bufmgr.h:265
static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:367
#define BRIN_METAPAGE_BLKNO
Definition: brin_page.h:75
void brinRevmapTerminate(BrinRevmap *revmap)
Definition: brin_revmap.c:103
BrinTuple * brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode, Snapshot snapshot)
Definition: brin_revmap.c:191
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1445
#define ExclusiveLock
Definition: lockdefs.h:44
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:520
#define END_CRIT_SECTION()
Definition: miscadmin.h:132
#define ItemIdIsUsed(itemId)
Definition: itemid.h:91
#define InvalidBuffer
Definition: buf.h:25
Buffer brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:137
#define REGBUF_WILL_INIT
Definition: xloginsert.h:32
#define START_CRIT_SECTION()
Definition: miscadmin.h:130
int errcode(int sqlerrcode)
Definition: elog.c:575
#define BRIN_IS_REGULAR_PAGE(page)
Definition: brin_page.h:57
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3292
#define P_NEW
Definition: bufmgr.h:82
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define SizeOfBrinRevmapExtend
Definition: brin_xlog.h:124
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
uint16 OffsetNumber
Definition: off.h:24
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
void pfree(void *pointer)
Definition: mcxt.c:992
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3315
#define ERROR
Definition: elog.h:43
void brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:115
static char * buf
Definition: pg_test_fsync.c:65
static void revmap_physical_extend(BrinRevmap *revmap)
Definition: brin_revmap.c:389
#define RelationGetRelationName(relation)
Definition: rel.h:433
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
BlockNumber targetBlk
Definition: brin_xlog.h:121
#define ereport(elevel, rest)
Definition: elog.h:122
#define XLOG_BRIN_REVMAP_EXTEND
Definition: brin_xlog.h:35
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
BlockNumber lastRevmapPage
Definition: brin_page.h:69
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define PageGetContents(page)
Definition: bufpage.h:243
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3529
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
#define InvalidOffsetNumber
Definition: off.h:26
BlockNumber pagesPerRange
Definition: brin_page.h:68
BlockNumber bt_blkno
Definition: brin_tuple.h:52
int errmsg_internal(const char *fmt,...)
Definition: elog.c:827
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:670
static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:330
Buffer rm_currBuf
Definition: brin_revmap.c:53
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define BRIN_PAGETYPE_REVMAP
Definition: brin_page.h:52
size_t Size
Definition: c.h:352
void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf)
Definition: brin_pageops.c:545
#define InvalidBlockNumber
Definition: block.h:33
BrinRevmap * brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, Snapshot snapshot)
Definition: brin_revmap.c:71
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:76
#define RelationNeedsWAL(relation)
Definition: rel.h:502
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
Buffer rm_metaBuf
Definition: brin_revmap.c:52
#define BrinPageType(page)
Definition: brin_page.h:42
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2588
#define ItemPointerSetInvalid(pointer)
Definition: itemptr.h:131
#define PageIsNew(page)
Definition: bufpage.h:226
void * palloc(Size size)
Definition: mcxt.c:891
BlockNumber rm_pagesPerRange
Definition: brin_revmap.c:50
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk)
Definition: brin_revmap.c:41
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
bool brin_start_evacuating_page(Relation idxRel, Buffer buf)
Definition: brin_pageops.c:510
#define elog
Definition: elog.h:219
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:66
static BlockNumber revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:309
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:985
void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid)
Definition: brin_revmap.c:158
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PageSetLSN(page, lsn)
Definition: bufpage.h:365
int Buffer
Definition: buf.h:23
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:86
ItemPointerData rm_tids[1]
Definition: brin_page.h:85
void brin_page_init(Page page, uint16 type)
Definition: brin_pageops.c:469