PostgreSQL Source Code  git master
hio.c File Reference
#include "postgres.h"
#include "access/heapam.h"
#include "access/hio.h"
#include "access/htup_details.h"
#include "access/visibilitymap.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
Include dependency graph for hio.c:

Go to the source code of this file.

Macros

#define MAX_BUFFERS_TO_EXTEND_BY   64
 

Functions

void RelationPutHeapTuple (Relation relation, Buffer buffer, HeapTuple tuple, bool token)
 
static Buffer ReadBufferBI (Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
 
static bool GetVisibilityMapPins (Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
 
static Buffer RelationAddBlocks (Relation relation, BulkInsertState bistate, int num_pages, bool use_fsm, bool *did_unlock)
 
Buffer RelationGetBufferForTuple (Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
 

Macro Definition Documentation

◆ MAX_BUFFERS_TO_EXTEND_BY

#define MAX_BUFFERS_TO_EXTEND_BY   64

Function Documentation

◆ GetVisibilityMapPins()

static bool GetVisibilityMapPins ( Relation  relation,
Buffer  buffer1,
Buffer  buffer2,
BlockNumber  block1,
BlockNumber  block2,
Buffer vmbuffer1,
Buffer vmbuffer2 
)
static

Definition at line 140 of file hio.c.

143 {
144  bool need_to_pin_buffer1;
145  bool need_to_pin_buffer2;
146  bool released_locks = false;
147 
148  /*
149  * Swap buffers around to handle case of a single block/buffer, and to
150  * handle if lock ordering rules require to lock block2 first.
151  */
152  if (!BufferIsValid(buffer1) ||
153  (BufferIsValid(buffer2) && block1 > block2))
154  {
155  Buffer tmpbuf = buffer1;
156  Buffer *tmpvmbuf = vmbuffer1;
157  BlockNumber tmpblock = block1;
158 
159  buffer1 = buffer2;
160  vmbuffer1 = vmbuffer2;
161  block1 = block2;
162 
163  buffer2 = tmpbuf;
164  vmbuffer2 = tmpvmbuf;
165  block2 = tmpblock;
166  }
167 
168  Assert(BufferIsValid(buffer1));
169  Assert(buffer2 == InvalidBuffer || block1 <= block2);
170 
171  while (1)
172  {
173  /* Figure out which pins we need but don't have. */
174  need_to_pin_buffer1 = PageIsAllVisible(BufferGetPage(buffer1))
175  && !visibilitymap_pin_ok(block1, *vmbuffer1);
176  need_to_pin_buffer2 = buffer2 != InvalidBuffer
177  && PageIsAllVisible(BufferGetPage(buffer2))
178  && !visibilitymap_pin_ok(block2, *vmbuffer2);
179  if (!need_to_pin_buffer1 && !need_to_pin_buffer2)
180  break;
181 
182  /* We must unlock both buffers before doing any I/O. */
183  released_locks = true;
184  LockBuffer(buffer1, BUFFER_LOCK_UNLOCK);
185  if (buffer2 != InvalidBuffer && buffer2 != buffer1)
186  LockBuffer(buffer2, BUFFER_LOCK_UNLOCK);
187 
188  /* Get pins. */
189  if (need_to_pin_buffer1)
190  visibilitymap_pin(relation, block1, vmbuffer1);
191  if (need_to_pin_buffer2)
192  visibilitymap_pin(relation, block2, vmbuffer2);
193 
194  /* Relock buffers. */
196  if (buffer2 != InvalidBuffer && buffer2 != buffer1)
198 
199  /*
200  * If there are two buffers involved and we pinned just one of them,
201  * it's possible that the second one became all-visible while we were
202  * busy pinning the first one. If it looks like that's a possible
203  * scenario, we'll need to make a second pass through this loop.
204  */
205  if (buffer2 == InvalidBuffer || buffer1 == buffer2
206  || (need_to_pin_buffer1 && need_to_pin_buffer2))
207  break;
208  }
209 
210  return released_locks;
211 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5131
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:193
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:404
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:195
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:355
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
#define Assert(condition)
Definition: c.h:858
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
static StringInfoData tmpbuf
Definition: walsender.c:170

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), InvalidBuffer, LockBuffer(), PageIsAllVisible(), tmpbuf, visibilitymap_pin(), and visibilitymap_pin_ok().

Referenced by RelationGetBufferForTuple().

◆ ReadBufferBI()

static Buffer ReadBufferBI ( Relation  relation,
BlockNumber  targetBlock,
ReadBufferMode  mode,
BulkInsertState  bistate 
)
static

Definition at line 88 of file hio.c.

90 {
91  Buffer buffer;
92 
93  /* If not bulk-insert, exactly like ReadBuffer */
94  if (!bistate)
95  return ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
96  mode, NULL);
97 
98  /* If we have the desired block already pinned, re-pin and return it */
99  if (bistate->current_buf != InvalidBuffer)
100  {
101  if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
102  {
103  /*
104  * Currently the LOCK variants are only used for extending
105  * relation, which should never reach this branch.
106  */
109 
111  return bistate->current_buf;
112  }
113  /* ... else drop the old buffer */
114  ReleaseBuffer(bistate->current_buf);
115  bistate->current_buf = InvalidBuffer;
116  }
117 
118  /* Perform a read using the buffer strategy */
119  buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
120  mode, bistate->strategy);
121 
122  /* Save the selected block as target for future inserts */
123  IncrBufferRefCount(buffer);
124  bistate->current_buf = buffer;
125 
126  return buffer;
127 }
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:4928
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3713
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4896
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:792
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:48
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:46
static PgChecksumMode mode
Definition: pg_checksums.c:56
@ MAIN_FORKNUM
Definition: relpath.h:50
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References Assert, BufferGetBlockNumber(), BulkInsertStateData::current_buf, IncrBufferRefCount(), InvalidBuffer, MAIN_FORKNUM, mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, ReadBufferExtended(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by RelationGetBufferForTuple().

◆ RelationAddBlocks()

static Buffer RelationAddBlocks ( Relation  relation,
BulkInsertState  bistate,
int  num_pages,
bool  use_fsm,
bool did_unlock 
)
static

Definition at line 238 of file hio.c.

240 {
241 #define MAX_BUFFERS_TO_EXTEND_BY 64
242  Buffer victim_buffers[MAX_BUFFERS_TO_EXTEND_BY];
243  BlockNumber first_block = InvalidBlockNumber;
244  BlockNumber last_block = InvalidBlockNumber;
245  uint32 extend_by_pages;
246  uint32 not_in_fsm_pages;
247  Buffer buffer;
248  Page page;
249 
250  /*
251  * Determine by how many pages to try to extend by.
252  */
253  if (bistate == NULL && !use_fsm)
254  {
255  /*
256  * If we have neither bistate, nor can use the FSM, we can't bulk
257  * extend - there'd be no way to find the additional pages.
258  */
259  extend_by_pages = 1;
260  }
261  else
262  {
263  uint32 waitcount;
264 
265  /*
266  * Try to extend at least by the number of pages the caller needs. We
267  * can remember the additional pages (either via FSM or bistate).
268  */
269  extend_by_pages = num_pages;
270 
271  if (!RELATION_IS_LOCAL(relation))
272  waitcount = RelationExtensionLockWaiterCount(relation);
273  else
274  waitcount = 0;
275 
276  /*
277  * Multiply the number of pages to extend by the number of waiters. Do
278  * this even if we're not using the FSM, as it still relieves
279  * contention, by deferring the next time this backend needs to
280  * extend. In that case the extended pages will be found via
281  * bistate->next_free.
282  */
283  extend_by_pages += extend_by_pages * waitcount;
284 
285  /* ---
286  * If we previously extended using the same bistate, it's very likely
287  * we'll extend some more. Try to extend by as many pages as
288  * before. This can be important for performance for several reasons,
289  * including:
290  *
291  * - It prevents mdzeroextend() switching between extending the
292  * relation in different ways, which is inefficient for some
293  * filesystems.
294  *
295  * - Contention is often intermittent. Even if we currently don't see
296  * other waiters (see above), extending by larger amounts can
297  * prevent future contention.
298  * ---
299  */
300  if (bistate)
301  extend_by_pages = Max(extend_by_pages, bistate->already_extended_by);
302 
303  /*
304  * Can't extend by more than MAX_BUFFERS_TO_EXTEND_BY, we need to pin
305  * them all concurrently.
306  */
307  extend_by_pages = Min(extend_by_pages, MAX_BUFFERS_TO_EXTEND_BY);
308  }
309 
310  /*
311  * How many of the extended pages should be entered into the FSM?
312  *
313  * If we have a bistate, only enter pages that we don't need ourselves
314  * into the FSM. Otherwise every other backend will immediately try to
315  * use the pages this backend needs for itself, causing unnecessary
316  * contention. If we don't have a bistate, we can't avoid the FSM.
317  *
318  * Never enter the page returned into the FSM, we'll immediately use it.
319  */
320  if (num_pages > 1 && bistate == NULL)
321  not_in_fsm_pages = 1;
322  else
323  not_in_fsm_pages = num_pages;
324 
325  /* prepare to put another buffer into the bistate */
326  if (bistate && bistate->current_buf != InvalidBuffer)
327  {
328  ReleaseBuffer(bistate->current_buf);
329  bistate->current_buf = InvalidBuffer;
330  }
331 
332  /*
333  * Extend the relation. We ask for the first returned page to be locked,
334  * so that we are sure that nobody has inserted into the page
335  * concurrently.
336  *
337  * With the current MAX_BUFFERS_TO_EXTEND_BY there's no danger of
338  * [auto]vacuum trying to truncate later pages as REL_TRUNCATE_MINIMUM is
339  * way larger.
340  */
341  first_block = ExtendBufferedRelBy(BMR_REL(relation), MAIN_FORKNUM,
342  bistate ? bistate->strategy : NULL,
344  extend_by_pages,
345  victim_buffers,
346  &extend_by_pages);
347  buffer = victim_buffers[0]; /* the buffer the function will return */
348  last_block = first_block + (extend_by_pages - 1);
349  Assert(first_block == BufferGetBlockNumber(buffer));
350 
351  /*
352  * Relation is now extended. Initialize the page. We do this here, before
353  * potentially releasing the lock on the page, because it allows us to
354  * double check that the page contents are empty (this should never
355  * happen, but if it does we don't want to risk wiping out valid data).
356  */
357  page = BufferGetPage(buffer);
358  if (!PageIsNew(page))
359  elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
360  first_block,
361  RelationGetRelationName(relation));
362 
363  PageInit(page, BufferGetPageSize(buffer), 0);
364  MarkBufferDirty(buffer);
365 
366  /*
367  * If we decided to put pages into the FSM, release the buffer lock (but
368  * not pin), we don't want to do IO while holding a buffer lock. This will
369  * necessitate a bit more extensive checking in our caller.
370  */
371  if (use_fsm && not_in_fsm_pages < extend_by_pages)
372  {
374  *did_unlock = true;
375  }
376  else
377  *did_unlock = false;
378 
379  /*
380  * Relation is now extended. Release pins on all buffers, except for the
381  * first (which we'll return). If we decided to put pages into the FSM,
382  * we can do that as part of the same loop.
383  */
384  for (uint32 i = 1; i < extend_by_pages; i++)
385  {
386  BlockNumber curBlock = first_block + i;
387 
388  Assert(curBlock == BufferGetBlockNumber(victim_buffers[i]));
389  Assert(BlockNumberIsValid(curBlock));
390 
391  ReleaseBuffer(victim_buffers[i]);
392 
393  if (use_fsm && i >= not_in_fsm_pages)
394  {
395  Size freespace = BufferGetPageSize(victim_buffers[i]) -
397 
398  RecordPageWithFreeSpace(relation, curBlock, freespace);
399  }
400  }
401 
402  if (use_fsm && not_in_fsm_pages < extend_by_pages)
403  {
404  BlockNumber first_fsm_block = first_block + not_in_fsm_pages;
405 
406  FreeSpaceMapVacuumRange(relation, first_fsm_block, last_block);
407  }
408 
409  if (bistate)
410  {
411  /*
412  * Remember the additional pages we extended by, so we later can use
413  * them without looking into the FSM.
414  */
415  if (extend_by_pages > 1)
416  {
417  bistate->next_free = first_block + 1;
418  bistate->last_free = last_block;
419  }
420  else
421  {
422  bistate->next_free = InvalidBlockNumber;
423  bistate->last_free = InvalidBlockNumber;
424  }
425 
426  /* maintain bistate->current_buf */
427  IncrBufferRefCount(buffer);
428  bistate->current_buf = buffer;
429  bistate->already_extended_by += extend_by_pages;
430  }
431 
432  return buffer;
433 #undef MAX_BUFFERS_TO_EXTEND_BY
434 }
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:877
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2520
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:393
@ EB_LOCK_FIRST
Definition: bufmgr.h:86
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
Pointer Page
Definition: bufpage.h:78
#define SizeOfPageHeaderData
Definition: bufpage.h:213
static bool PageIsNew(Page page)
Definition: bufpage.h:230
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:1004
#define Max(x, y)
Definition: c.h:998
size_t Size
Definition: c.h:605
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:194
#define MAX_BUFFERS_TO_EXTEND_BY
int i
Definition: isn.c:73
int RelationExtensionLockWaiterCount(Relation relation)
Definition: lmgr.c:455
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
#define RelationGetRelationName(relation)
Definition: rel.h:539
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, Assert, BlockNumberIsValid(), BMR_REL, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), BulkInsertStateData::current_buf, EB_LOCK_FIRST, elog, ERROR, ExtendBufferedRelBy(), FreeSpaceMapVacuumRange(), i, IncrBufferRefCount(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), Max, MAX_BUFFERS_TO_EXTEND_BY, Min, BulkInsertStateData::next_free, PageInit(), PageIsNew(), RecordPageWithFreeSpace(), RELATION_IS_LOCAL, RelationExtensionLockWaiterCount(), RelationGetRelationName, ReleaseBuffer(), SizeOfPageHeaderData, and BulkInsertStateData::strategy.

Referenced by RelationGetBufferForTuple().

◆ RelationGetBufferForTuple()

Buffer RelationGetBufferForTuple ( Relation  relation,
Size  len,
Buffer  otherBuffer,
int  options,
BulkInsertState  bistate,
Buffer vmbuffer,
Buffer vmbuffer_other,
int  num_pages 
)

Definition at line 502 of file hio.c.

507 {
508  bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
509  Buffer buffer = InvalidBuffer;
510  Page page;
511  Size nearlyEmptyFreeSpace,
512  pageFreeSpace = 0,
513  saveFreeSpace = 0,
514  targetFreeSpace = 0;
515  BlockNumber targetBlock,
516  otherBlock;
517  bool unlockedTargetBuffer;
518  bool recheckVmPins;
519 
520  len = MAXALIGN(len); /* be conservative */
521 
522  /* if the caller doesn't know by how many pages to extend, extend by 1 */
523  if (num_pages <= 0)
524  num_pages = 1;
525 
526  /* Bulk insert is not supported for updates, only inserts. */
527  Assert(otherBuffer == InvalidBuffer || !bistate);
528 
529  /*
530  * If we're gonna fail for oversize tuple, do it right away
531  */
532  if (len > MaxHeapTupleSize)
533  ereport(ERROR,
534  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
535  errmsg("row is too big: size %zu, maximum size %zu",
536  len, MaxHeapTupleSize)));
537 
538  /* Compute desired extra freespace due to fillfactor option */
539  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
541 
542  /*
543  * Since pages without tuples can still have line pointers, we consider
544  * pages "empty" when the unavailable space is slight. This threshold is
545  * somewhat arbitrary, but it should prevent most unnecessary relation
546  * extensions while inserting large tuples into low-fillfactor tables.
547  */
548  nearlyEmptyFreeSpace = MaxHeapTupleSize -
549  (MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData));
550  if (len + saveFreeSpace > nearlyEmptyFreeSpace)
551  targetFreeSpace = Max(len, nearlyEmptyFreeSpace);
552  else
553  targetFreeSpace = len + saveFreeSpace;
554 
555  if (otherBuffer != InvalidBuffer)
556  otherBlock = BufferGetBlockNumber(otherBuffer);
557  else
558  otherBlock = InvalidBlockNumber; /* just to keep compiler quiet */
559 
560  /*
561  * We first try to put the tuple on the same page we last inserted a tuple
562  * on, as cached in the BulkInsertState or relcache entry. If that
563  * doesn't work, we ask the Free Space Map to locate a suitable page.
564  * Since the FSM's info might be out of date, we have to be prepared to
565  * loop around and retry multiple times. (To ensure this isn't an infinite
566  * loop, we must update the FSM with the correct amount of free space on
567  * each page that proves not to be suitable.) If the FSM has no record of
568  * a page with enough free space, we give up and extend the relation.
569  *
570  * When use_fsm is false, we either put the tuple onto the existing target
571  * page or extend the relation.
572  */
573  if (bistate && bistate->current_buf != InvalidBuffer)
574  targetBlock = BufferGetBlockNumber(bistate->current_buf);
575  else
576  targetBlock = RelationGetTargetBlock(relation);
577 
578  if (targetBlock == InvalidBlockNumber && use_fsm)
579  {
580  /*
581  * We have no cached target page, so ask the FSM for an initial
582  * target.
583  */
584  targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
585  }
586 
587  /*
588  * If the FSM knows nothing of the rel, try the last page before we give
589  * up and extend. This avoids one-tuple-per-page syndrome during
590  * bootstrapping or in a recently-started system.
591  */
592  if (targetBlock == InvalidBlockNumber)
593  {
594  BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
595 
596  if (nblocks > 0)
597  targetBlock = nblocks - 1;
598  }
599 
600 loop:
601  while (targetBlock != InvalidBlockNumber)
602  {
603  /*
604  * Read and exclusive-lock the target block, as well as the other
605  * block if one was given, taking suitable care with lock ordering and
606  * the possibility they are the same block.
607  *
608  * If the page-level all-visible flag is set, caller will need to
609  * clear both that and the corresponding visibility map bit. However,
610  * by the time we return, we'll have x-locked the buffer, and we don't
611  * want to do any I/O while in that state. So we check the bit here
612  * before taking the lock, and pin the page if it appears necessary.
613  * Checking without the lock creates a risk of getting the wrong
614  * answer, so we'll have to recheck after acquiring the lock.
615  */
616  if (otherBuffer == InvalidBuffer)
617  {
618  /* easy case */
619  buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
620  if (PageIsAllVisible(BufferGetPage(buffer)))
621  visibilitymap_pin(relation, targetBlock, vmbuffer);
622 
623  /*
624  * If the page is empty, pin vmbuffer to set all_frozen bit later.
625  */
626  if ((options & HEAP_INSERT_FROZEN) &&
627  (PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0))
628  visibilitymap_pin(relation, targetBlock, vmbuffer);
629 
631  }
632  else if (otherBlock == targetBlock)
633  {
634  /* also easy case */
635  buffer = otherBuffer;
636  if (PageIsAllVisible(BufferGetPage(buffer)))
637  visibilitymap_pin(relation, targetBlock, vmbuffer);
639  }
640  else if (otherBlock < targetBlock)
641  {
642  /* lock other buffer first */
643  buffer = ReadBuffer(relation, targetBlock);
644  if (PageIsAllVisible(BufferGetPage(buffer)))
645  visibilitymap_pin(relation, targetBlock, vmbuffer);
646  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
648  }
649  else
650  {
651  /* lock target buffer first */
652  buffer = ReadBuffer(relation, targetBlock);
653  if (PageIsAllVisible(BufferGetPage(buffer)))
654  visibilitymap_pin(relation, targetBlock, vmbuffer);
656  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
657  }
658 
659  /*
660  * We now have the target page (and the other buffer, if any) pinned
661  * and locked. However, since our initial PageIsAllVisible checks
662  * were performed before acquiring the lock, the results might now be
663  * out of date, either for the selected victim buffer, or for the
664  * other buffer passed by the caller. In that case, we'll need to
665  * give up our locks, go get the pin(s) we failed to get earlier, and
666  * re-lock. That's pretty painful, but hopefully shouldn't happen
667  * often.
668  *
669  * Note that there's a small possibility that we didn't pin the page
670  * above but still have the correct page pinned anyway, either because
671  * we've already made a previous pass through this loop, or because
672  * caller passed us the right page anyway.
673  *
674  * Note also that it's possible that by the time we get the pin and
675  * retake the buffer locks, the visibility map bit will have been
676  * cleared by some other backend anyway. In that case, we'll have
677  * done a bit of extra work for no gain, but there's no real harm
678  * done.
679  */
680  GetVisibilityMapPins(relation, buffer, otherBuffer,
681  targetBlock, otherBlock, vmbuffer,
682  vmbuffer_other);
683 
684  /*
685  * Now we can check to see if there's enough free space here. If so,
686  * we're done.
687  */
688  page = BufferGetPage(buffer);
689 
690  /*
691  * If necessary initialize page, it'll be used soon. We could avoid
692  * dirtying the buffer here, and rely on the caller to do so whenever
693  * it puts a tuple onto the page, but there seems not much benefit in
694  * doing so.
695  */
696  if (PageIsNew(page))
697  {
698  PageInit(page, BufferGetPageSize(buffer), 0);
699  MarkBufferDirty(buffer);
700  }
701 
702  pageFreeSpace = PageGetHeapFreeSpace(page);
703  if (targetFreeSpace <= pageFreeSpace)
704  {
705  /* use this page as future insert target, too */
706  RelationSetTargetBlock(relation, targetBlock);
707  return buffer;
708  }
709 
710  /*
711  * Not enough space, so we must give up our page locks and pin (if
712  * any) and prepare to look elsewhere. We don't care which order we
713  * unlock the two buffers in, so this can be slightly simpler than the
714  * code above.
715  */
717  if (otherBuffer == InvalidBuffer)
718  ReleaseBuffer(buffer);
719  else if (otherBlock != targetBlock)
720  {
721  LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
722  ReleaseBuffer(buffer);
723  }
724 
725  /* Is there an ongoing bulk extension? */
726  if (bistate && bistate->next_free != InvalidBlockNumber)
727  {
728  Assert(bistate->next_free <= bistate->last_free);
729 
730  /*
731  * We bulk extended the relation before, and there are still some
732  * unused pages from that extension, so we don't need to look in
733  * the FSM for a new page. But do record the free space from the
734  * last page, somebody might insert narrower tuples later.
735  */
736  if (use_fsm)
737  RecordPageWithFreeSpace(relation, targetBlock, pageFreeSpace);
738 
739  targetBlock = bistate->next_free;
740  if (bistate->next_free >= bistate->last_free)
741  {
742  bistate->next_free = InvalidBlockNumber;
743  bistate->last_free = InvalidBlockNumber;
744  }
745  else
746  bistate->next_free++;
747  }
748  else if (!use_fsm)
749  {
750  /* Without FSM, always fall out of the loop and extend */
751  break;
752  }
753  else
754  {
755  /*
756  * Update FSM as to condition of this page, and ask for another
757  * page to try.
758  */
759  targetBlock = RecordAndGetPageWithFreeSpace(relation,
760  targetBlock,
761  pageFreeSpace,
762  targetFreeSpace);
763  }
764  }
765 
766  /* Have to extend the relation */
767  buffer = RelationAddBlocks(relation, bistate, num_pages, use_fsm,
768  &unlockedTargetBuffer);
769 
770  targetBlock = BufferGetBlockNumber(buffer);
771  page = BufferGetPage(buffer);
772 
773  /*
774  * The page is empty, pin vmbuffer to set all_frozen bit. We don't want to
775  * do IO while the buffer is locked, so we unlock the page first if IO is
776  * needed (necessitating checks below).
777  */
779  {
780  Assert(PageGetMaxOffsetNumber(page) == 0);
781 
782  if (!visibilitymap_pin_ok(targetBlock, *vmbuffer))
783  {
784  if (!unlockedTargetBuffer)
786  unlockedTargetBuffer = true;
787  visibilitymap_pin(relation, targetBlock, vmbuffer);
788  }
789  }
790 
791  /*
792  * Reacquire locks if necessary.
793  *
794  * If the target buffer was unlocked above, or is unlocked while
795  * reacquiring the lock on otherBuffer below, it's unlikely, but possible,
796  * that another backend used space on this page. We check for that below,
797  * and retry if necessary.
798  */
799  recheckVmPins = false;
800  if (unlockedTargetBuffer)
801  {
802  /* released lock on target buffer above */
803  if (otherBuffer != InvalidBuffer)
804  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
806  recheckVmPins = true;
807  }
808  else if (otherBuffer != InvalidBuffer)
809  {
810  /*
811  * We did not release the target buffer, and otherBuffer is valid,
812  * need to lock the other buffer. It's guaranteed to be of a lower
813  * page number than the new page. To conform with the deadlock
814  * prevent rules, we ought to lock otherBuffer first, but that would
815  * give other backends a chance to put tuples on our page. To reduce
816  * the likelihood of that, attempt to lock the other buffer
817  * conditionally, that's very likely to work.
818  *
819  * Alternatively, we could acquire the lock on otherBuffer before
820  * extending the relation, but that'd require holding the lock while
821  * performing IO, which seems worse than an unlikely retry.
822  */
823  Assert(otherBuffer != buffer);
824  Assert(targetBlock > otherBlock);
825 
826  if (unlikely(!ConditionalLockBuffer(otherBuffer)))
827  {
828  unlockedTargetBuffer = true;
830  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
832  }
833  recheckVmPins = true;
834  }
835 
836  /*
837  * If one of the buffers was unlocked (always the case if otherBuffer is
838  * valid), it's possible, although unlikely, that an all-visible flag
839  * became set. We can use GetVisibilityMapPins to deal with that. It's
840  * possible that GetVisibilityMapPins() might need to temporarily release
841  * buffer locks, in which case we'll need to check if there's still enough
842  * space on the page below.
843  */
844  if (recheckVmPins)
845  {
846  if (GetVisibilityMapPins(relation, otherBuffer, buffer,
847  otherBlock, targetBlock, vmbuffer_other,
848  vmbuffer))
849  unlockedTargetBuffer = true;
850  }
851 
852  /*
853  * If the target buffer was temporarily unlocked since the relation
854  * extension, it's possible, although unlikely, that all the space on the
855  * page was already used. If so, we just retry from the start. If we
856  * didn't unlock, something has gone wrong if there's not enough space -
857  * the test at the top should have prevented reaching this case.
858  */
859  pageFreeSpace = PageGetHeapFreeSpace(page);
860  if (len > pageFreeSpace)
861  {
862  if (unlockedTargetBuffer)
863  {
864  if (otherBuffer != InvalidBuffer)
865  LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
866  UnlockReleaseBuffer(buffer);
867 
868  goto loop;
869  }
870  elog(PANIC, "tuple is too big: size %zu", len);
871  }
872 
873  /*
874  * Remember the new page as our target for future insertions.
875  *
876  * XXX should we enter the new page into the free space map immediately,
877  * or just keep it for this backend's exclusive use in the short run
878  * (until VACUUM sees it)? Seems to depend on whether you expect the
879  * current backend to make more insertions or not, which is probably a
880  * good bet most of the time. So for now, don't add it to FSM yet.
881  */
882  RelationSetTargetBlock(relation, targetBlock);
883 
884  return buffer;
885 }
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5157
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4913
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:277
@ RBM_NORMAL
Definition: bufmgr.h:45
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define MAXALIGN(LEN)
Definition: c.h:811
#define unlikely(x)
Definition: c.h:311
int errcode(int sqlerrcode)
Definition: elog.c:855
int errmsg(const char *fmt,...)
Definition: elog.c:1068
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:154
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:137
#define HEAP_INSERT_SKIP_FSM
Definition: heapam.h:35
#define HEAP_INSERT_FROZEN
Definition: heapam.h:36
static Buffer RelationAddBlocks(Relation relation, BulkInsertState bistate, int num_pages, bool use_fsm, bool *did_unlock)
Definition: hio.c:238
static bool GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
Definition: hio.c:140
static Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
Definition: hio.c:88
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define MaxHeapTupleSize
Definition: htup_details.h:558
struct ItemIdData ItemIdData
const void size_t len
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:378
#define RelationGetTargetBlock(relation)
Definition: rel.h:601
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:608
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:349

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), ConditionalLockBuffer(), BulkInsertStateData::current_buf, elog, ereport, errcode(), errmsg(), ERROR, GetPageWithFreeSpace(), GetVisibilityMapPins(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_SKIP_FSM, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, len, LockBuffer(), MarkBufferDirty(), Max, MAXALIGN, MaxHeapTupleSize, MaxHeapTuplesPerPage, BulkInsertStateData::next_free, PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageInit(), PageIsAllVisible(), PageIsNew(), PANIC, RBM_NORMAL, ReadBuffer(), ReadBufferBI(), RecordAndGetPageWithFreeSpace(), RecordPageWithFreeSpace(), RelationAddBlocks(), RelationGetNumberOfBlocks, RelationGetTargetBlock, RelationGetTargetPageFreeSpace, RelationSetTargetBlock, ReleaseBuffer(), unlikely, UnlockReleaseBuffer(), visibilitymap_pin(), and visibilitymap_pin_ok().

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

◆ RelationPutHeapTuple()

void RelationPutHeapTuple ( Relation  relation,
Buffer  buffer,
HeapTuple  tuple,
bool  token 
)

Definition at line 35 of file hio.c.

39 {
40  Page pageHeader;
41  OffsetNumber offnum;
42 
43  /*
44  * A tuple that's being inserted speculatively should already have its
45  * token set.
46  */
48 
49  /*
50  * Do not allow tuples with invalid combinations of hint bits to be placed
51  * on a page. This combination is detected as corruption by the
52  * contrib/amcheck logic, so if you disable this assertion, make
53  * corresponding changes there.
54  */
56  (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)));
57 
58  /* Add the tuple to the page */
59  pageHeader = BufferGetPage(buffer);
60 
61  offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
62  tuple->t_len, InvalidOffsetNumber, false, true);
63 
64  if (offnum == InvalidOffsetNumber)
65  elog(PANIC, "failed to add tuple to page");
66 
67  /* Update tuple->t_self to the actual position where it was stored */
68  ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
69 
70  /*
71  * Insert the correct position into CTID of the stored tuple, too (unless
72  * this is a speculative insertion, in which case the token is held in
73  * CTID field instead)
74  */
75  if (!token)
76  {
77  ItemId itemId = PageGetItemId(pageHeader, offnum);
78  HeapTupleHeader item = (HeapTupleHeader) PageGetItem(pageHeader, itemId);
79 
80  item->t_ctid = tuple->t_self;
81  }
82 }
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define token
Definition: indent_globs.h:126
Pointer Item
Definition: item.h:17
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
ItemPointerData t_ctid
Definition: htup_details.h:161

References Assert, BufferGetBlockNumber(), BufferGetPage(), elog, HEAP_XMAX_COMMITTED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderIsSpeculative, InvalidOffsetNumber, ItemPointerSet(), PageAddItem, PageGetItem(), PageGetItemId(), PANIC, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, and token.

Referenced by heap_insert(), heap_multi_insert(), and heap_update().