PostgreSQL Source Code  git master
brin_pageops.c
Go to the documentation of this file.
1 /*
2  * brin_pageops.c
3  * Page-handling routines for BRIN indexes
4  *
5  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * IDENTIFICATION
9  * src/backend/access/brin/brin_pageops.c
10  */
11 #include "postgres.h"
12 
13 #include "access/brin_pageops.h"
14 #include "access/brin_page.h"
15 #include "access/brin_revmap.h"
16 #include "access/brin_xlog.h"
17 #include "access/xloginsert.h"
18 #include "miscadmin.h"
19 #include "storage/bufmgr.h"
20 #include "storage/freespace.h"
21 #include "storage/lmgr.h"
22 #include "storage/smgr.h"
23 #include "utils/rel.h"
24 
25 
26 /*
27  * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page. We can tolerate
28  * a single item per page, unlike other index AMs.
29  */
30 #define BrinMaxItemSize \
31  MAXALIGN_DOWN(BLCKSZ - \
32  (MAXALIGN(SizeOfPageHeaderData + \
33  sizeof(ItemIdData)) + \
34  MAXALIGN(sizeof(BrinSpecialSpace))))
35 
36 static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
37  bool *extended);
38 static Size br_page_get_freespace(Page page);
40 
41 
42 /*
43  * Update tuple origtup (size origsz), located in offset oldoff of buffer
44  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
45  * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit.
46  *
47  * If samepage is true, attempt to put the new tuple in the same page, but if
48  * there's no room, use some other one.
49  *
50  * If the update is successful, return true; the revmap is updated to point to
51  * the new tuple. If the update is not done for whatever reason, return false.
52  * Caller may retry the update if this happens.
53  */
54 bool
55 brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
56  BrinRevmap *revmap, BlockNumber heapBlk,
57  Buffer oldbuf, OffsetNumber oldoff,
58  const BrinTuple *origtup, Size origsz,
59  const BrinTuple *newtup, Size newsz,
60  bool samepage)
61 {
62  Page oldpage;
63  ItemId oldlp;
64  BrinTuple *oldtup;
65  Size oldsz;
66  Buffer newbuf;
67  bool extended;
68 
69  Assert(newsz == MAXALIGN(newsz));
70 
71  /* If the item is oversized, don't bother. */
72  if (newsz > BrinMaxItemSize)
73  {
74  ereport(ERROR,
75  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
76  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
77  newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
78  return false; /* keep compiler quiet */
79  }
80 
81  /* make sure the revmap is long enough to contain the entry we need */
82  brinRevmapExtend(revmap, heapBlk);
83 
84  if (!samepage)
85  {
86  /* need a page on which to put the item */
87  newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
88  if (!BufferIsValid(newbuf))
89  {
90  Assert(!extended);
91  return false;
92  }
93 
94  /*
95  * Note: it's possible (though unlikely) that the returned newbuf is
96  * the same as oldbuf, if brin_getinsertbuffer determined that the old
97  * buffer does in fact have enough space.
98  */
99  if (newbuf == oldbuf)
100  {
101  Assert(!extended);
102  newbuf = InvalidBuffer;
103  }
104  }
105  else
106  {
108  newbuf = InvalidBuffer;
109  extended = false;
110  }
111  oldpage = BufferGetPage(oldbuf);
112  oldlp = PageGetItemId(oldpage, oldoff);
113 
114  /*
115  * Check that the old tuple wasn't updated concurrently: it might have
116  * moved someplace else entirely, and for that matter the whole page
117  * might've become a revmap page. Note that in the first two cases
118  * checked here, the "oldlp" we just calculated is garbage; but
119  * PageGetItemId() is simple enough that it was safe to do that
120  * calculation anyway.
121  */
122  if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
123  oldoff > PageGetMaxOffsetNumber(oldpage) ||
124  !ItemIdIsNormal(oldlp))
125  {
127 
128  /*
129  * If this happens, and the new buffer was obtained by extending the
130  * relation, then we need to ensure we don't leave it uninitialized or
131  * forget about it.
132  */
133  if (BufferIsValid(newbuf))
134  {
135  if (extended)
136  brin_initialize_empty_new_buffer(idxrel, newbuf);
137  UnlockReleaseBuffer(newbuf);
138  if (extended)
139  FreeSpaceMapVacuum(idxrel);
140  }
141  return false;
142  }
143 
144  oldsz = ItemIdGetLength(oldlp);
145  oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
146 
147  /*
148  * ... or it might have been updated in place to different contents.
149  */
150  if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
151  {
153  if (BufferIsValid(newbuf))
154  {
155  if (extended)
156  brin_initialize_empty_new_buffer(idxrel, newbuf);
157  UnlockReleaseBuffer(newbuf);
158  if (extended)
159  FreeSpaceMapVacuum(idxrel);
160  }
161  return false;
162  }
163 
164  /*
165  * Great, the old tuple is intact. We can proceed with the update.
166  *
167  * If there's enough room in the old page for the new tuple, replace it.
168  *
169  * Note that there might now be enough space on the page even though the
170  * caller told us there isn't, if a concurrent update moved another tuple
171  * elsewhere or replaced a tuple with a smaller one.
172  */
173  if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
174  brin_can_do_samepage_update(oldbuf, origsz, newsz))
175  {
176  if (BufferIsValid(newbuf))
177  {
178  /* as above */
179  if (extended)
180  brin_initialize_empty_new_buffer(idxrel, newbuf);
181  UnlockReleaseBuffer(newbuf);
182  }
183 
185  if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) newtup, newsz))
186  elog(ERROR, "failed to replace BRIN tuple");
187  MarkBufferDirty(oldbuf);
188 
189  /* XLOG stuff */
190  if (RelationNeedsWAL(idxrel))
191  {
193  XLogRecPtr recptr;
195 
196  xlrec.offnum = oldoff;
197 
198  XLogBeginInsert();
199  XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
200 
202  XLogRegisterBufData(0, (char *) newtup, newsz);
203 
204  recptr = XLogInsert(RM_BRIN_ID, info);
205 
206  PageSetLSN(oldpage, recptr);
207  }
208 
210 
212 
213  if (extended)
214  FreeSpaceMapVacuum(idxrel);
215 
216  return true;
217  }
218  else if (newbuf == InvalidBuffer)
219  {
220  /*
221  * Not enough space, but caller said that there was. Tell them to
222  * start over.
223  */
225  return false;
226  }
227  else
228  {
229  /*
230  * Not enough free space on the oldpage. Put the new tuple on the new
231  * page, and update the revmap.
232  */
233  Page newpage = BufferGetPage(newbuf);
234  Buffer revmapbuf;
235  ItemPointerData newtid;
236  OffsetNumber newoff;
238  Size freespace = 0;
239 
240  revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
241 
243 
244  /*
245  * We need to initialize the page if it's newly obtained. Note we
246  * will WAL-log the initialization as part of the update, so we don't
247  * need to do that here.
248  */
249  if (extended)
251 
252  PageIndexTupleDeleteNoCompact(oldpage, oldoff);
253  newoff = PageAddItem(newpage, (Item) newtup, newsz,
254  InvalidOffsetNumber, false, false);
255  if (newoff == InvalidOffsetNumber)
256  elog(ERROR, "failed to add BRIN tuple to new page");
257  MarkBufferDirty(oldbuf);
258  MarkBufferDirty(newbuf);
259 
260  /* needed to update FSM below */
261  if (extended)
262  {
263  newblk = BufferGetBlockNumber(newbuf);
264  freespace = br_page_get_freespace(newpage);
265  }
266 
267  ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff);
268  brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
269  MarkBufferDirty(revmapbuf);
270 
271  /* XLOG stuff */
272  if (RelationNeedsWAL(idxrel))
273  {
274  xl_brin_update xlrec;
275  XLogRecPtr recptr;
276  uint8 info;
277 
278  info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
279 
280  xlrec.insert.offnum = newoff;
281  xlrec.insert.heapBlk = heapBlk;
282  xlrec.insert.pagesPerRange = pagesPerRange;
283  xlrec.oldOffnum = oldoff;
284 
285  XLogBeginInsert();
286 
287  /* new page */
288  XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
289 
290  XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
291  XLogRegisterBufData(0, (char *) newtup, newsz);
292 
293  /* revmap page */
294  XLogRegisterBuffer(1, revmapbuf, 0);
295 
296  /* old page */
298 
299  recptr = XLogInsert(RM_BRIN_ID, info);
300 
301  PageSetLSN(oldpage, recptr);
302  PageSetLSN(newpage, recptr);
303  PageSetLSN(BufferGetPage(revmapbuf), recptr);
304  }
305 
307 
308  LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
310  UnlockReleaseBuffer(newbuf);
311 
312  if (extended)
313  {
314  Assert(BlockNumberIsValid(newblk));
315  RecordPageWithFreeSpace(idxrel, newblk, freespace);
316  FreeSpaceMapVacuum(idxrel);
317  }
318 
319  return true;
320  }
321 }
322 
323 /*
324  * Return whether brin_doupdate can do a samepage update.
325  */
326 bool
328 {
329  return
330  ((newsz <= origsz) ||
331  PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
332 }
333 
334 /*
335  * Insert an index tuple into the index relation. The revmap is updated to
336  * mark the range containing the given page as pointing to the inserted entry.
337  * A WAL record is written.
338  *
339  * The buffer, if valid, is first checked for free space to insert the new
340  * entry; if there isn't enough, a new buffer is obtained and pinned. No
341  * buffer lock must be held on entry, no buffer lock is held on exit.
342  *
343  * Return value is the offset number where the tuple was inserted.
344  */
346 brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
347  BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
348  BrinTuple *tup, Size itemsz)
349 {
350  Page page;
351  BlockNumber blk;
352  OffsetNumber off;
353  Buffer revmapbuf;
354  ItemPointerData tid;
355  bool extended;
356 
357  Assert(itemsz == MAXALIGN(itemsz));
358 
359  /* If the item is oversized, don't even bother. */
360  if (itemsz > BrinMaxItemSize)
361  {
362  ereport(ERROR,
363  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
364  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
365  itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
366  return InvalidOffsetNumber; /* keep compiler quiet */
367  }
368 
369  /* Make sure the revmap is long enough to contain the entry we need */
370  brinRevmapExtend(revmap, heapBlk);
371 
372  /*
373  * Acquire lock on buffer supplied by caller, if any. If it doesn't have
374  * enough space, unpin it to obtain a new one below.
375  */
376  if (BufferIsValid(*buffer))
377  {
378  /*
379  * It's possible that another backend (or ourselves!) extended the
380  * revmap over the page we held a pin on, so we cannot assume that
381  * it's still a regular page.
382  */
384  if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
385  {
386  UnlockReleaseBuffer(*buffer);
387  *buffer = InvalidBuffer;
388  }
389  }
390 
391  /*
392  * If we still don't have a usable buffer, have brin_getinsertbuffer
393  * obtain one for us.
394  */
395  if (!BufferIsValid(*buffer))
396  {
397  do
398  *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
399  while (!BufferIsValid(*buffer));
400  }
401  else
402  extended = false;
403 
404  /* Now obtain lock on revmap buffer */
405  revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
406 
407  page = BufferGetPage(*buffer);
408  blk = BufferGetBlockNumber(*buffer);
409 
410  /* Execute the actual insertion */
412  if (extended)
414  off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
415  false, false);
416  if (off == InvalidOffsetNumber)
417  elog(ERROR, "could not insert new index tuple to page");
418  MarkBufferDirty(*buffer);
419 
420  BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
421  blk, off, heapBlk));
422 
423  ItemPointerSet(&tid, blk, off);
424  brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
425  MarkBufferDirty(revmapbuf);
426 
427  /* XLOG stuff */
428  if (RelationNeedsWAL(idxrel))
429  {
430  xl_brin_insert xlrec;
431  XLogRecPtr recptr;
432  uint8 info;
433 
434  info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
435  xlrec.heapBlk = heapBlk;
436  xlrec.pagesPerRange = pagesPerRange;
437  xlrec.offnum = off;
438 
439  XLogBeginInsert();
440  XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
441 
442  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
443  XLogRegisterBufData(0, (char *) tup, itemsz);
444 
445  XLogRegisterBuffer(1, revmapbuf, 0);
446 
447  recptr = XLogInsert(RM_BRIN_ID, info);
448 
449  PageSetLSN(page, recptr);
450  PageSetLSN(BufferGetPage(revmapbuf), recptr);
451  }
452 
454 
455  /* Tuple is firmly on buffer; we can release our locks */
456  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
457  LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
458 
459  if (extended)
460  FreeSpaceMapVacuum(idxrel);
461 
462  return off;
463 }
464 
465 /*
466  * Initialize a page with the given type.
467  *
468  * Caller is responsible for marking it dirty, as appropriate.
469  */
470 void
472 {
473  PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
474 
475  BrinPageType(page) = type;
476 }
477 
478 /*
479  * Initialize a new BRIN index's metapage.
480  */
481 void
482 brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
483 {
484  BrinMetaPageData *metadata;
485 
487 
488  metadata = (BrinMetaPageData *) PageGetContents(page);
489 
490  metadata->brinMagic = BRIN_META_MAGIC;
491  metadata->brinVersion = version;
492  metadata->pagesPerRange = pagesPerRange;
493 
494  /*
495  * Note we cheat here a little. 0 is not a valid revmap block number
496  * (because it's the metapage buffer), but doing this enables the first
497  * revmap page to be created when the index is.
498  */
499  metadata->lastRevmapPage = 0;
500 
501  /*
502  * Set pd_lower just past the end of the metadata. This is essential,
503  * because without doing so, metadata will be lost if xlog.c compresses
504  * the page.
505  */
506  ((PageHeader) page)->pd_lower =
507  ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
508 }
509 
510 /*
511  * Initiate page evacuation protocol.
512  *
513  * The page must be locked in exclusive mode by the caller.
514  *
515  * If the page is not yet initialized or empty, return false without doing
516  * anything; it can be used for revmap without any further changes. If it
517  * contains tuples, mark it for evacuation and return true.
518  */
519 bool
521 {
522  OffsetNumber off;
523  OffsetNumber maxoff;
524  Page page;
525 
526  page = BufferGetPage(buf);
527 
528  if (PageIsNew(page))
529  return false;
530 
531  maxoff = PageGetMaxOffsetNumber(page);
532  for (off = FirstOffsetNumber; off <= maxoff; off++)
533  {
534  ItemId lp;
535 
536  lp = PageGetItemId(page, off);
537  if (ItemIdIsUsed(lp))
538  {
539  /* prevent other backends from adding more stuff to this page */
541  MarkBufferDirtyHint(buf, true);
542 
543  return true;
544  }
545  }
546  return false;
547 }
548 
549 /*
550  * Move all tuples out of a page.
551  *
552  * The caller must hold lock on the page. The lock and pin are released.
553  */
554 void
556  BrinRevmap *revmap, Buffer buf)
557 {
558  OffsetNumber off;
559  OffsetNumber maxoff;
560  Page page;
561  BrinTuple *btup = NULL;
562  Size btupsz = 0;
563 
564  page = BufferGetPage(buf);
565 
567 
568  maxoff = PageGetMaxOffsetNumber(page);
569  for (off = FirstOffsetNumber; off <= maxoff; off++)
570  {
571  BrinTuple *tup;
572  Size sz;
573  ItemId lp;
574 
576 
577  lp = PageGetItemId(page, off);
578  if (ItemIdIsUsed(lp))
579  {
580  sz = ItemIdGetLength(lp);
581  tup = (BrinTuple *) PageGetItem(page, lp);
582  tup = brin_copy_tuple(tup, sz, btup, &btupsz);
583 
585 
586  if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
587  buf, off, tup, sz, tup, sz, false))
588  off--; /* retry */
589 
591 
592  /* It's possible that someone extended the revmap over this page */
593  if (!BRIN_IS_REGULAR_PAGE(page))
594  break;
595  }
596  }
597 
598  UnlockReleaseBuffer(buf);
599 }
600 
601 /*
602  * Given a BRIN index page, initialize it if necessary, and record it into the
603  * FSM if necessary. Return value is true if the FSM itself needs "vacuuming".
604  * The main use for this is when, during vacuuming, an uninitialized page is
605  * found, which could be the result of relation extension followed by a crash
606  * before the page can be used.
607  */
608 bool
610 {
611  Page page = BufferGetPage(buf);
612  Size freespace;
613 
614  /*
615  * If a page was left uninitialized, initialize it now; also record it in
616  * FSM.
617  *
618  * Somebody else might be extending the relation concurrently. To avoid
619  * re-initializing the page before they can grab the buffer lock, we
620  * acquire the extension lock momentarily. Since they hold the extension
621  * lock from before getting the page and after its been initialized, we're
622  * sure to see their initialization.
623  */
624  if (PageIsNew(page))
625  {
628 
630  if (PageIsNew(page))
631  {
634  return true;
635  }
637  }
638 
639  /* Nothing to be done for non-regular index pages */
640  if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
642  return false;
643 
644  /* Measure free space and record it */
645  freespace = br_page_get_freespace(page);
646  if (freespace > GetRecordedFreeSpace(idxrel, BufferGetBlockNumber(buf)))
647  {
648  RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf), freespace);
649  return true;
650  }
651 
652  return false;
653 }
654 
655 /*
656  * Return a pinned and exclusively locked buffer which can be used to insert an
657  * index item of size itemsz (caller must ensure not to request sizes
658  * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in
659  * an order determined to avoid deadlocks.)
660  *
661  * If we find that the old page is no longer a regular index page (because
662  * of a revmap extension), the old buffer is unlocked and we return
663  * InvalidBuffer.
664  *
665  * If there's no existing page with enough free space to accommodate the new
666  * item, the relation is extended. If this happens, *extended is set to true,
667  * and it is the caller's responsibility to initialize the page (and WAL-log
668  * that fact) prior to use.
669  *
670  * Note that in some corner cases it is possible for this routine to extend the
671  * relation and then not return the buffer. It is this routine's
672  * responsibility to WAL-log the page initialization and to record the page in
673  * FSM if that happens. Such a buffer may later be reused by this routine.
674  */
675 static Buffer
677  bool *extended)
678 {
679  BlockNumber oldblk;
680  BlockNumber newblk;
681  Page page;
682  Size freespace;
683 
684  /* callers must have checked */
685  Assert(itemsz <= BrinMaxItemSize);
686 
687  *extended = false;
688 
689  if (BufferIsValid(oldbuf))
690  oldblk = BufferGetBlockNumber(oldbuf);
691  else
692  oldblk = InvalidBlockNumber;
693 
694  /*
695  * Loop until we find a page with sufficient free space. By the time we
696  * return to caller out of this loop, both buffers are valid and locked;
697  * if we have to restart here, neither buffer is locked and buf is not a
698  * pinned buffer.
699  */
700  newblk = RelationGetTargetBlock(irel);
701  if (newblk == InvalidBlockNumber)
702  newblk = GetPageWithFreeSpace(irel, itemsz);
703  for (;;)
704  {
705  Buffer buf;
706  bool extensionLockHeld = false;
707 
709 
710  if (newblk == InvalidBlockNumber)
711  {
712  /*
713  * There's not enough free space in any existing index page,
714  * according to the FSM: extend the relation to obtain a shiny new
715  * page.
716  */
717  if (!RELATION_IS_LOCAL(irel))
718  {
720  extensionLockHeld = true;
721  }
722  buf = ReadBuffer(irel, P_NEW);
723  newblk = BufferGetBlockNumber(buf);
724  *extended = true;
725 
726  BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
727  BufferGetBlockNumber(buf)));
728  }
729  else if (newblk == oldblk)
730  {
731  /*
732  * There's an odd corner-case here where the FSM is out-of-date,
733  * and gave us the old page.
734  */
735  buf = oldbuf;
736  }
737  else
738  {
739  buf = ReadBuffer(irel, newblk);
740  }
741 
742  /*
743  * We lock the old buffer first, if it's earlier than the new one; but
744  * before we do, we need to check that it hasn't been turned into a
745  * revmap page concurrently; if we detect that it happened, give up
746  * and tell caller to start over.
747  */
748  if (BufferIsValid(oldbuf) && oldblk < newblk)
749  {
751  if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
752  {
754 
755  /*
756  * It is possible that the new page was obtained from
757  * extending the relation. In that case, we must be sure to
758  * record it in the FSM before leaving, because otherwise the
759  * space would be lost forever. However, we cannot let an
760  * uninitialized page get in the FSM, so we need to initialize
761  * it first.
762  */
763  if (*extended)
764  {
766  /* shouldn't matter, but don't confuse caller */
767  *extended = false;
768  }
769 
770  if (extensionLockHeld)
772 
773  ReleaseBuffer(buf);
774  return InvalidBuffer;
775  }
776  }
777 
779 
780  if (extensionLockHeld)
782 
783  page = BufferGetPage(buf);
784 
785  /*
786  * We have a new buffer to insert into. Check that the new page has
787  * enough free space, and return it if it does; otherwise start over.
788  * Note that we allow for the FSM to be out of date here, and in that
789  * case we update it and move on.
790  *
791  * (br_page_get_freespace also checks that the FSM didn't hand us a
792  * page that has since been repurposed for the revmap.)
793  */
794  freespace = *extended ?
796  if (freespace >= itemsz)
797  {
799 
800  /*
801  * Since the target block specification can get lost on cache
802  * invalidations, make sure we update the more permanent FSM with
803  * data about it before going away.
804  */
805  if (*extended)
807  freespace);
808 
809  /*
810  * Lock the old buffer if not locked already. Note that in this
811  * case we know for sure it's a regular page: it's later than the
812  * new page we just got, which is not a revmap page, and revmap
813  * pages are always consecutive.
814  */
815  if (BufferIsValid(oldbuf) && oldblk > newblk)
816  {
819  }
820 
821  return buf;
822  }
823 
824  /* This page is no good. */
825 
826  /*
827  * If an entirely new page does not contain enough free space for the
828  * new item, then surely that item is oversized. Complain loudly; but
829  * first make sure we initialize the page and record it as free, for
830  * next time.
831  */
832  if (*extended)
833  {
835 
836  ereport(ERROR,
837  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
838  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
839  itemsz, freespace, RelationGetRelationName(irel))));
840  return InvalidBuffer; /* keep compiler quiet */
841  }
842 
843  if (newblk != oldblk)
844  UnlockReleaseBuffer(buf);
845  if (BufferIsValid(oldbuf) && oldblk <= newblk)
847 
848  newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
849  }
850 }
851 
852 /*
853  * Initialize a page as an empty regular BRIN page, WAL-log this, and record
854  * the page in FSM.
855  *
856  * There are several corner situations in which we extend the relation to
857  * obtain a new page and later find that we cannot use it immediately. When
858  * that happens, we don't want to leave the page go unrecorded in FSM, because
859  * there is no mechanism to get the space back and the index would bloat.
860  * Also, because we would not WAL-log the action that would initialize the
861  * page, the page would go uninitialized in a standby (or after recovery).
862  */
863 static void
865 {
866  Page page;
867 
868  BRIN_elog((DEBUG2,
869  "brin_initialize_empty_new_buffer: initializing blank page %u",
870  BufferGetBlockNumber(buffer)));
871 
873  page = BufferGetPage(buffer);
875  MarkBufferDirty(buffer);
876  log_newpage_buffer(buffer, true);
878 
879  /*
880  * We update the FSM for this page, but this is not WAL-logged. This is
881  * acceptable because VACUUM will scan the index and update the FSM with
882  * pages whose FSM records were forgotten in a crash.
883  */
885  br_page_get_freespace(page));
886 }
887 
888 
889 /*
890  * Return the amount of free space on a regular BRIN index page.
891  *
892  * If the page is not a regular page, or has been marked with the
893  * BRIN_EVACUATE_PAGE flag, returns 0.
894  */
895 static Size
897 {
898  if (!BRIN_IS_REGULAR_PAGE(page) ||
899  (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
900  return 0;
901  else
902  return PageGetFreeSpace(page);
903 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
uint32 brinVersion
Definition: brin_page.h:67
BlockNumber heapBlk
Definition: brin_xlog.h:65
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define BRIN_elog(args)
Definition: brin_internal.h:81
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1009
BrinTuple * brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
Definition: brin_tuple.c:321
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3379
#define SizeOfBrinInsert
Definition: brin_xlog.h:74
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
#define ExclusiveLock
Definition: lockdefs.h:44
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:532
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
#define ItemIdIsUsed(itemId)
Definition: itemid.h:91
OffsetNumber offnum
Definition: brin_xlog.h:104
unsigned char uint8
Definition: c.h:294
Pointer Item
Definition: item.h:17
#define InvalidBuffer
Definition: buf.h:25
Buffer brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:137
#define REGBUF_WILL_INIT
Definition: xloginsert.h:32
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
int errcode(int sqlerrcode)
Definition: elog.c:575
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
#define BRIN_IS_REGULAR_PAGE(page)
Definition: brin_page.h:57
uint32 brinMagic
Definition: brin_page.h:66
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
#define P_NEW
Definition: bufmgr.h:82
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage)
Definition: brin_pageops.c:55
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:353
#define RelationGetTargetBlock(relation)
Definition: rel.h:497
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:578
uint16 OffsetNumber
Definition: off.h:24
OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz)
Definition: brin_pageops.c:346
#define SizeOfBrinUpdate
Definition: brin_xlog.h:95
#define XLOG_BRIN_UPDATE
Definition: brin_xlog.h:33
unsigned short uint16
Definition: c.h:295
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
Definition: brin_pageops.c:482
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3332
#define ERROR
Definition: elog.h:43
#define BRIN_PAGETYPE_META
Definition: brin_page.h:51
#define BRIN_PAGETYPE_REGULAR
Definition: brin_page.h:53
#define DEBUG2
Definition: elog.h:24
void brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
Definition: brin_revmap.c:115
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1062
bool brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
Definition: brin_tuple.c:340
BlockNumber pagesPerRange
Definition: brin_xlog.h:68
static char * buf
Definition: pg_test_fsync.c:67
#define FirstOffsetNumber
Definition: off.h:27
#define REGBUF_STANDARD
Definition: xloginsert.h:34
static Size br_page_get_freespace(Page page)
Definition: brin_pageops.c:896
#define RelationGetRelationName(relation)
Definition: rel.h:445
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
bool brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
Definition: brin_pageops.c:327
void FreeSpaceMapVacuum(Relation rel)
Definition: freespace.c:379
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
BlockNumber lastRevmapPage
Definition: brin_page.h:69
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
#define XLOG_BRIN_INIT_PAGE
Definition: brin_xlog.h:43
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:415
#define BRIN_IS_META_PAGE(page)
Definition: brin_page.h:55
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:270
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define PageGetContents(page)
Definition: bufpage.h:242
#define XLOG_BRIN_INSERT
Definition: brin_xlog.h:32
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
#define InvalidOffsetNumber
Definition: off.h:26
xl_brin_insert insert
Definition: brin_xlog.h:92
BlockNumber pagesPerRange
Definition: brin_page.h:68
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
BlockNumber bt_blkno
Definition: brin_tuple.h:57
void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
Definition: bufpage.c:954
#define BrinMaxItemSize
Definition: brin_pageops.c:30
PageHeaderData * PageHeader
Definition: bufpage.h:162
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:670
#define ItemIdIsNormal(itemId)
Definition: itemid.h:98
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
#define SizeOfBrinSamepageUpdate
Definition: brin_xlog.h:107
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define BRIN_IS_REVMAP_PAGE(page)
Definition: brin_page.h:56
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:504
size_t Size
Definition: c.h:404
#define XLOG_BRIN_SAMEPAGE_UPDATE
Definition: brin_xlog.h:34
void brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer buf)
Definition: brin_pageops.c:555
#define InvalidBlockNumber
Definition: block.h:33
OffsetNumber offnum
Definition: brin_xlog.h:71
static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
Definition: brin_pageops.c:864
#define MAXALIGN(LEN)
Definition: c.h:623
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define RelationNeedsWAL(relation)
Definition: rel.h:514
#define BrinPageType(page)
Definition: brin_page.h:42
Size PageGetExactFreeSpace(Page page)
Definition: bufpage.c:629
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
#define PageIsNew(page)
Definition: bufpage.h:225
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define ShareLock
Definition: lockdefs.h:41
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:132
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
bool brin_page_cleanup(Relation idxrel, Buffer buf)
Definition: brin_pageops.c:609
static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, bool *extended)
Definition: brin_pageops.c:676
#define BRIN_EVACUATE_PAGE
Definition: brin_page.h:60
bool brin_start_evacuating_page(Relation idxRel, Buffer buf)
Definition: brin_pageops.c:520
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:149
#define elog
Definition: elog.h:219
#define BrinPageFlags(page)
Definition: brin_page.h:46
void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, BlockNumber heapBlk, ItemPointerData tid)
Definition: brin_revmap.c:158
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PageSetLSN(page, lsn)
Definition: bufpage.h:364
OffsetNumber oldOffnum
Definition: brin_xlog.h:90
int Buffer
Definition: buf.h:23
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105
void brin_page_init(Page page, uint16 type)
Definition: brin_pageops.c:471
#define BRIN_META_MAGIC
Definition: brin_page.h:73
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:41