PostgreSQL Source Code  git master
hio.c File Reference
#include "postgres.h"
#include "access/heapam.h"
#include "access/hio.h"
#include "access/htup_details.h"
#include "access/visibilitymap.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
Include dependency graph for hio.c:

Go to the source code of this file.

Functions

void RelationPutHeapTuple (Relation relation, Buffer buffer, HeapTuple tuple, bool token)
 
static Buffer ReadBufferBI (Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
 
static void GetVisibilityMapPins (Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
 
static void RelationAddExtraBlocks (Relation relation, BulkInsertState bistate)
 
Buffer RelationGetBufferForTuple (Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other)
 

Function Documentation

◆ GetVisibilityMapPins()

static void GetVisibilityMapPins ( Relation  relation,
Buffer  buffer1,
Buffer  buffer2,
BlockNumber  block1,
BlockNumber  block2,
Buffer vmbuffer1,
Buffer vmbuffer2 
)
static

Definition at line 139 of file hio.c.

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage, BufferIsValid, InvalidBuffer, LockBuffer(), PageIsAllVisible, visibilitymap_pin(), and visibilitymap_pin_ok().

Referenced by RelationGetBufferForTuple().

142 {
143  bool need_to_pin_buffer1;
144  bool need_to_pin_buffer2;
145 
146  Assert(BufferIsValid(buffer1));
147  Assert(buffer2 == InvalidBuffer || block1 <= block2);
148 
149  while (1)
150  {
151  /* Figure out which pins we need but don't have. */
152  need_to_pin_buffer1 = PageIsAllVisible(BufferGetPage(buffer1))
153  && !visibilitymap_pin_ok(block1, *vmbuffer1);
154  need_to_pin_buffer2 = buffer2 != InvalidBuffer
155  && PageIsAllVisible(BufferGetPage(buffer2))
156  && !visibilitymap_pin_ok(block2, *vmbuffer2);
157  if (!need_to_pin_buffer1 && !need_to_pin_buffer2)
158  return;
159 
160  /* We must unlock both buffers before doing any I/O. */
161  LockBuffer(buffer1, BUFFER_LOCK_UNLOCK);
162  if (buffer2 != InvalidBuffer && buffer2 != buffer1)
163  LockBuffer(buffer2, BUFFER_LOCK_UNLOCK);
164 
165  /* Get pins. */
166  if (need_to_pin_buffer1)
167  visibilitymap_pin(relation, block1, vmbuffer1);
168  if (need_to_pin_buffer2)
169  visibilitymap_pin(relation, block2, vmbuffer2);
170 
171  /* Relock buffers. */
173  if (buffer2 != InvalidBuffer && buffer2 != buffer1)
175 
176  /*
177  * If there are two buffers involved and we pinned just one of them,
178  * it's possible that the second one became all-visible while we were
179  * busy pinning the first one. If it looks like that's a possible
180  * scenario, we'll need to make a second pass through this loop.
181  */
182  if (buffer2 == InvalidBuffer || buffer1 == buffer2
183  || (need_to_pin_buffer1 && need_to_pin_buffer2))
184  break;
185  }
186 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define PageIsAllVisible(page)
Definition: bufpage.h:385
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
#define InvalidBuffer
Definition: buf.h:25
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4023
#define Assert(condition)
Definition: c.h:804
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)

◆ ReadBufferBI()

static Buffer ReadBufferBI ( Relation  relation,
BlockNumber  targetBlock,
ReadBufferMode  mode,
BulkInsertState  bistate 
)
static

Definition at line 89 of file hio.c.

References Assert, BufferGetBlockNumber(), BulkInsertStateData::current_buf, IncrBufferRefCount(), InvalidBuffer, MAIN_FORKNUM, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, ReadBufferExtended(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by RelationAddExtraBlocks(), and RelationGetBufferForTuple().

91 {
92  Buffer buffer;
93 
94  /* If not bulk-insert, exactly like ReadBuffer */
95  if (!bistate)
96  return ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
97  mode, NULL);
98 
99  /* If we have the desired block already pinned, re-pin and return it */
100  if (bistate->current_buf != InvalidBuffer)
101  {
102  if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
103  {
104  /*
105  * Currently the LOCK variants are only used for extending
106  * relation, which should never reach this branch.
107  */
110 
112  return bistate->current_buf;
113  }
114  /* ... else drop the old buffer */
115  ReleaseBuffer(bistate->current_buf);
116  bistate->current_buf = InvalidBuffer;
117  }
118 
119  /* Perform a read using the buffer strategy */
120  buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
121  mode, bistate->strategy);
122 
123  /* Save the selected block as target for future inserts */
124  IncrBufferRefCount(buffer);
125  bistate->current_buf = buffer;
126 
127  return buffer;
128 }
static PgChecksumMode mode
Definition: pg_checksums.c:61
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:744
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3784
#define Assert(condition)
Definition: c.h:804
BufferAccessStrategy strategy
Definition: hio.h:31
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2758
int Buffer
Definition: buf.h:23
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:3822
Buffer current_buf
Definition: hio.h:32

◆ RelationAddExtraBlocks()

static void RelationAddExtraBlocks ( Relation  relation,
BulkInsertState  bistate 
)
static

Definition at line 195 of file hio.c.

References BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, elog, ERROR, FreeSpaceMapVacuumRange(), InvalidBlockNumber, Min, P_NEW, PageIsNew, RBM_ZERO_AND_LOCK, ReadBufferBI(), RecordPageWithFreeSpace(), RelationExtensionLockWaiterCount(), RelationGetRelationName, SizeOfPageHeaderData, and UnlockReleaseBuffer().

Referenced by RelationGetBufferForTuple().

196 {
197  BlockNumber blockNum,
198  firstBlock = InvalidBlockNumber;
199  int extraBlocks;
200  int lockWaiters;
201 
202  /* Use the length of the lock wait queue to judge how much to extend. */
203  lockWaiters = RelationExtensionLockWaiterCount(relation);
204  if (lockWaiters <= 0)
205  return;
206 
207  /*
208  * It might seem like multiplying the number of lock waiters by as much as
209  * 20 is too aggressive, but benchmarking revealed that smaller numbers
210  * were insufficient. 512 is just an arbitrary cap to prevent
211  * pathological results.
212  */
213  extraBlocks = Min(512, lockWaiters * 20);
214 
215  do
216  {
217  Buffer buffer;
218  Page page;
219  Size freespace;
220 
221  /*
222  * Extend by one page. This should generally match the main-line
223  * extension code in RelationGetBufferForTuple, except that we hold
224  * the relation extension lock throughout, and we don't immediately
225  * initialize the page (see below).
226  */
227  buffer = ReadBufferBI(relation, P_NEW, RBM_ZERO_AND_LOCK, bistate);
228  page = BufferGetPage(buffer);
229 
230  if (!PageIsNew(page))
231  elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
232  BufferGetBlockNumber(buffer),
233  RelationGetRelationName(relation));
234 
235  /*
236  * Add the page to the FSM without initializing. If we were to
237  * initialize here, the page would potentially get flushed out to disk
238  * before we add any useful content. There's no guarantee that that'd
239  * happen before a potential crash, so we need to deal with
240  * uninitialized pages anyway, thus avoid the potential for
241  * unnecessary writes.
242  */
243 
244  /* we'll need this info below */
245  blockNum = BufferGetBlockNumber(buffer);
246  freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData;
247 
248  UnlockReleaseBuffer(buffer);
249 
250  /* Remember first block number thus added. */
251  if (firstBlock == InvalidBlockNumber)
252  firstBlock = blockNum;
253 
254  /*
255  * Immediately update the bottom level of the FSM. This has a good
256  * chance of making this page visible to other concurrently inserting
257  * backends, and we want that to happen without delay.
258  */
259  RecordPageWithFreeSpace(relation, blockNum, freespace);
260  }
261  while (--extraBlocks > 0);
262 
263  /*
264  * Updating the upper levels of the free space map is too expensive to do
265  * for every block, but it's worth doing once at the end to make sure that
266  * subsequent insertion activity sees all of those nifty free pages we
267  * just inserted.
268  */
269  FreeSpaceMapVacuumRange(relation, firstBlock, blockNum + 1);
270 }
int RelationExtensionLockWaiterCount(Relation relation)
Definition: lmgr.c:438
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
#define Min(x, y)
Definition: c.h:986
uint32 BlockNumber
Definition: block.h:31
#define P_NEW
Definition: bufmgr.h:91
#define SizeOfPageHeaderData
Definition: bufpage.h:216
static Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
Definition: hio.c:89
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3807
#define ERROR
Definition: elog.h:46
#define RelationGetRelationName(relation)
Definition: rel.h:503
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
size_t Size
Definition: c.h:540
#define InvalidBlockNumber
Definition: block.h:33
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2758
#define PageIsNew(page)
Definition: bufpage.h:229
#define elog(elevel,...)
Definition: elog.h:232
int Buffer
Definition: buf.h:23
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
Pointer Page
Definition: bufpage.h:78

◆ RelationGetBufferForTuple()

Buffer RelationGetBufferForTuple ( Relation  relation,
Size  len,
Buffer  otherBuffer,
int  options,
BulkInsertState  bistate,
Buffer vmbuffer,
Buffer vmbuffer_other 
)

Definition at line 333 of file hio.c.

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, ConditionalLockBuffer(), ConditionalLockRelationForExtension(), BulkInsertStateData::current_buf, elog, ereport, errcode(), errmsg(), ERROR, ExclusiveLock, GetPageWithFreeSpace(), GetVisibilityMapPins(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_SKIP_FSM, InvalidBlockNumber, InvalidBuffer, LockBuffer(), LockRelationForExtension(), MarkBufferDirty(), Max, MAXALIGN, MaxHeapTupleSize, MaxHeapTuplesPerPage, P_NEW, PageGetHeapFreeSpace(), PageGetMaxOffsetNumber, PageInit(), PageIsAllVisible, PageIsNew, PANIC, RBM_NORMAL, RBM_ZERO_AND_LOCK, ReadBuffer(), ReadBufferBI(), RecordAndGetPageWithFreeSpace(), RELATION_IS_LOCAL, RelationAddExtraBlocks(), RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetTargetBlock, RelationGetTargetPageFreeSpace, RelationSetTargetBlock, ReleaseBuffer(), unlikely, UnlockRelationForExtension(), UnlockReleaseBuffer(), and visibilitymap_pin().

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

337 {
338  bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
339  Buffer buffer = InvalidBuffer;
340  Page page;
341  Size nearlyEmptyFreeSpace,
342  pageFreeSpace = 0,
343  saveFreeSpace = 0,
344  targetFreeSpace = 0;
345  BlockNumber targetBlock,
346  otherBlock;
347  bool needLock;
348 
349  len = MAXALIGN(len); /* be conservative */
350 
351  /* Bulk insert is not supported for updates, only inserts. */
352  Assert(otherBuffer == InvalidBuffer || !bistate);
353 
354  /*
355  * If we're gonna fail for oversize tuple, do it right away
356  */
357  if (len > MaxHeapTupleSize)
358  ereport(ERROR,
359  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
360  errmsg("row is too big: size %zu, maximum size %zu",
361  len, MaxHeapTupleSize)));
362 
363  /* Compute desired extra freespace due to fillfactor option */
364  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
366 
367  /*
368  * Since pages without tuples can still have line pointers, we consider
369  * pages "empty" when the unavailable space is slight. This threshold is
370  * somewhat arbitrary, but it should prevent most unnecessary relation
371  * extensions while inserting large tuples into low-fillfactor tables.
372  */
373  nearlyEmptyFreeSpace = MaxHeapTupleSize -
374  (MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData));
375  if (len + saveFreeSpace > nearlyEmptyFreeSpace)
376  targetFreeSpace = Max(len, nearlyEmptyFreeSpace);
377  else
378  targetFreeSpace = len + saveFreeSpace;
379 
380  if (otherBuffer != InvalidBuffer)
381  otherBlock = BufferGetBlockNumber(otherBuffer);
382  else
383  otherBlock = InvalidBlockNumber; /* just to keep compiler quiet */
384 
385  /*
386  * We first try to put the tuple on the same page we last inserted a tuple
387  * on, as cached in the BulkInsertState or relcache entry. If that
388  * doesn't work, we ask the Free Space Map to locate a suitable page.
389  * Since the FSM's info might be out of date, we have to be prepared to
390  * loop around and retry multiple times. (To insure this isn't an infinite
391  * loop, we must update the FSM with the correct amount of free space on
392  * each page that proves not to be suitable.) If the FSM has no record of
393  * a page with enough free space, we give up and extend the relation.
394  *
395  * When use_fsm is false, we either put the tuple onto the existing target
396  * page or extend the relation.
397  */
398  if (bistate && bistate->current_buf != InvalidBuffer)
399  targetBlock = BufferGetBlockNumber(bistate->current_buf);
400  else
401  targetBlock = RelationGetTargetBlock(relation);
402 
403  if (targetBlock == InvalidBlockNumber && use_fsm)
404  {
405  /*
406  * We have no cached target page, so ask the FSM for an initial
407  * target.
408  */
409  targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
410  }
411 
412  /*
413  * If the FSM knows nothing of the rel, try the last page before we give
414  * up and extend. This avoids one-tuple-per-page syndrome during
415  * bootstrapping or in a recently-started system.
416  */
417  if (targetBlock == InvalidBlockNumber)
418  {
419  BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
420 
421  if (nblocks > 0)
422  targetBlock = nblocks - 1;
423  }
424 
425 loop:
426  while (targetBlock != InvalidBlockNumber)
427  {
428  /*
429  * Read and exclusive-lock the target block, as well as the other
430  * block if one was given, taking suitable care with lock ordering and
431  * the possibility they are the same block.
432  *
433  * If the page-level all-visible flag is set, caller will need to
434  * clear both that and the corresponding visibility map bit. However,
435  * by the time we return, we'll have x-locked the buffer, and we don't
436  * want to do any I/O while in that state. So we check the bit here
437  * before taking the lock, and pin the page if it appears necessary.
438  * Checking without the lock creates a risk of getting the wrong
439  * answer, so we'll have to recheck after acquiring the lock.
440  */
441  if (otherBuffer == InvalidBuffer)
442  {
443  /* easy case */
444  buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
445  if (PageIsAllVisible(BufferGetPage(buffer)))
446  visibilitymap_pin(relation, targetBlock, vmbuffer);
447 
448  /*
449  * If the page is empty, pin vmbuffer to set all_frozen bit later.
450  */
451  if ((options & HEAP_INSERT_FROZEN) &&
452  (PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0))
453  visibilitymap_pin(relation, targetBlock, vmbuffer);
454 
456  }
457  else if (otherBlock == targetBlock)
458  {
459  /* also easy case */
460  buffer = otherBuffer;
461  if (PageIsAllVisible(BufferGetPage(buffer)))
462  visibilitymap_pin(relation, targetBlock, vmbuffer);
464  }
465  else if (otherBlock < targetBlock)
466  {
467  /* lock other buffer first */
468  buffer = ReadBuffer(relation, targetBlock);
469  if (PageIsAllVisible(BufferGetPage(buffer)))
470  visibilitymap_pin(relation, targetBlock, vmbuffer);
471  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
473  }
474  else
475  {
476  /* lock target buffer first */
477  buffer = ReadBuffer(relation, targetBlock);
478  if (PageIsAllVisible(BufferGetPage(buffer)))
479  visibilitymap_pin(relation, targetBlock, vmbuffer);
481  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
482  }
483 
484  /*
485  * We now have the target page (and the other buffer, if any) pinned
486  * and locked. However, since our initial PageIsAllVisible checks
487  * were performed before acquiring the lock, the results might now be
488  * out of date, either for the selected victim buffer, or for the
489  * other buffer passed by the caller. In that case, we'll need to
490  * give up our locks, go get the pin(s) we failed to get earlier, and
491  * re-lock. That's pretty painful, but hopefully shouldn't happen
492  * often.
493  *
494  * Note that there's a small possibility that we didn't pin the page
495  * above but still have the correct page pinned anyway, either because
496  * we've already made a previous pass through this loop, or because
497  * caller passed us the right page anyway.
498  *
499  * Note also that it's possible that by the time we get the pin and
500  * retake the buffer locks, the visibility map bit will have been
501  * cleared by some other backend anyway. In that case, we'll have
502  * done a bit of extra work for no gain, but there's no real harm
503  * done.
504  */
505  if (otherBuffer == InvalidBuffer || targetBlock <= otherBlock)
506  GetVisibilityMapPins(relation, buffer, otherBuffer,
507  targetBlock, otherBlock, vmbuffer,
508  vmbuffer_other);
509  else
510  GetVisibilityMapPins(relation, otherBuffer, buffer,
511  otherBlock, targetBlock, vmbuffer_other,
512  vmbuffer);
513 
514  /*
515  * Now we can check to see if there's enough free space here. If so,
516  * we're done.
517  */
518  page = BufferGetPage(buffer);
519 
520  /*
521  * If necessary initialize page, it'll be used soon. We could avoid
522  * dirtying the buffer here, and rely on the caller to do so whenever
523  * it puts a tuple onto the page, but there seems not much benefit in
524  * doing so.
525  */
526  if (PageIsNew(page))
527  {
528  PageInit(page, BufferGetPageSize(buffer), 0);
529  MarkBufferDirty(buffer);
530  }
531 
532  pageFreeSpace = PageGetHeapFreeSpace(page);
533  if (targetFreeSpace <= pageFreeSpace)
534  {
535  /* use this page as future insert target, too */
536  RelationSetTargetBlock(relation, targetBlock);
537  return buffer;
538  }
539 
540  /*
541  * Not enough space, so we must give up our page locks and pin (if
542  * any) and prepare to look elsewhere. We don't care which order we
543  * unlock the two buffers in, so this can be slightly simpler than the
544  * code above.
545  */
547  if (otherBuffer == InvalidBuffer)
548  ReleaseBuffer(buffer);
549  else if (otherBlock != targetBlock)
550  {
551  LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
552  ReleaseBuffer(buffer);
553  }
554 
555  /* Without FSM, always fall out of the loop and extend */
556  if (!use_fsm)
557  break;
558 
559  /*
560  * Update FSM as to condition of this page, and ask for another page
561  * to try.
562  */
563  targetBlock = RecordAndGetPageWithFreeSpace(relation,
564  targetBlock,
565  pageFreeSpace,
566  targetFreeSpace);
567  }
568 
569  /*
570  * Have to extend the relation.
571  *
572  * We have to use a lock to ensure no one else is extending the rel at the
573  * same time, else we will both try to initialize the same new page. We
574  * can skip locking for new or temp relations, however, since no one else
575  * could be accessing them.
576  */
577  needLock = !RELATION_IS_LOCAL(relation);
578 
579  /*
580  * If we need the lock but are not able to acquire it immediately, we'll
581  * consider extending the relation by multiple blocks at a time to manage
582  * contention on the relation extension lock. However, this only makes
583  * sense if we're using the FSM; otherwise, there's no point.
584  */
585  if (needLock)
586  {
587  if (!use_fsm)
590  {
591  /* Couldn't get the lock immediately; wait for it. */
593 
594  /*
595  * Check if some other backend has extended a block for us while
596  * we were waiting on the lock.
597  */
598  targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
599 
600  /*
601  * If some other waiter has already extended the relation, we
602  * don't need to do so; just use the existing freespace.
603  */
604  if (targetBlock != InvalidBlockNumber)
605  {
607  goto loop;
608  }
609 
610  /* Time to bulk-extend. */
611  RelationAddExtraBlocks(relation, bistate);
612  }
613  }
614 
615  /*
616  * In addition to whatever extension we performed above, we always add at
617  * least one block to satisfy our own request.
618  *
619  * XXX This does an lseek - rather expensive - but at the moment it is the
620  * only way to accurately determine how many blocks are in a relation. Is
621  * it worth keeping an accurate file length in shared memory someplace,
622  * rather than relying on the kernel to do it for us?
623  */
624  buffer = ReadBufferBI(relation, P_NEW, RBM_ZERO_AND_LOCK, bistate);
625 
626  /*
627  * We need to initialize the empty new page. Double-check that it really
628  * is empty (this should never happen, but if it does we don't want to
629  * risk wiping out valid data).
630  */
631  page = BufferGetPage(buffer);
632 
633  if (!PageIsNew(page))
634  elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
635  BufferGetBlockNumber(buffer),
636  RelationGetRelationName(relation));
637 
638  PageInit(page, BufferGetPageSize(buffer), 0);
639  MarkBufferDirty(buffer);
640 
641  /*
642  * The page is empty, pin vmbuffer to set all_frozen bit.
643  */
644  if (options & HEAP_INSERT_FROZEN)
645  {
647  visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
648  }
649 
650  /*
651  * Release the file-extension lock; it's now OK for someone else to extend
652  * the relation some more.
653  */
654  if (needLock)
656 
657  /*
658  * Lock the other buffer. It's guaranteed to be of a lower page number
659  * than the new page. To conform with the deadlock prevent rules, we ought
660  * to lock otherBuffer first, but that would give other backends a chance
661  * to put tuples on our page. To reduce the likelihood of that, attempt to
662  * lock the other buffer conditionally, that's very likely to work.
663  * Otherwise we need to lock buffers in the correct order, and retry if
664  * the space has been used in the mean time.
665  *
666  * Alternatively, we could acquire the lock on otherBuffer before
667  * extending the relation, but that'd require holding the lock while
668  * performing IO, which seems worse than an unlikely retry.
669  */
670  if (otherBuffer != InvalidBuffer)
671  {
672  Assert(otherBuffer != buffer);
673  targetBlock = BufferGetBlockNumber(buffer);
674  Assert(targetBlock > otherBlock);
675 
676  if (unlikely(!ConditionalLockBuffer(otherBuffer)))
677  {
679  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
681 
682  /*
683  * Because the buffers were unlocked for a while, it's possible,
684  * although unlikely, that an all-visible flag became set or that
685  * somebody used up the available space in the new page. We can
686  * use GetVisibilityMapPins to deal with the first case. In the
687  * second case, just retry from start.
688  */
689  GetVisibilityMapPins(relation, otherBuffer, buffer,
690  otherBlock, targetBlock, vmbuffer_other,
691  vmbuffer);
692 
693  if (len > PageGetHeapFreeSpace(page))
694  {
695  LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
696  UnlockReleaseBuffer(buffer);
697 
698  goto loop;
699  }
700  }
701  }
702 
703  if (len > PageGetHeapFreeSpace(page))
704  {
705  /* We should not get here given the test at the top */
706  elog(PANIC, "tuple is too big: size %zu", len);
707  }
708 
709  /*
710  * Remember the new page as our target for future insertions.
711  *
712  * XXX should we enter the new page into the free space map immediately,
713  * or just keep it for this backend's exclusive use in the short run
714  * (until VACUUM sees it)? Seems to depend on whether you expect the
715  * current backend to make more insertions or not, which is probably a
716  * good bet most of the time. So for now, don't add it to FSM yet.
717  */
718  RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
719 
720  return buffer;
721 }
bool ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:421
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define PageIsAllVisible(page)
Definition: bufpage.h:385
#define HEAP_INSERT_FROZEN
Definition: heapam.h:35
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1562
#define ExclusiveLock
Definition: lockdefs.h:44
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:602
#define MaxHeapTuplesPerPage
Definition: htup_details.h:573
#define InvalidBuffer
Definition: buf.h:25
int errcode(int sqlerrcode)
Definition: elog.c:698
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3784
#define P_NEW
Definition: bufmgr.h:91
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
#define PANIC
Definition: elog.h:50
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
#define RelationGetTargetBlock(relation)
Definition: rel.h:554
static Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
Definition: hio.c:89
static void GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
Definition: hio.c:139
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3807
#define ERROR
Definition: elog.h:46
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:984
#define MaxHeapTupleSize
Definition: htup_details.h:559
#define RelationGetRelationName(relation)
Definition: rel.h:503
struct ItemIdData ItemIdData
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:4049
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:353
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:403
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:453
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4023
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:213
#define ereport(elevel,...)
Definition: elog.h:157
#define Max(x, y)
Definition: c.h:980
#define Assert(condition)
Definition: c.h:804
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:697
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:561
size_t Size
Definition: c.h:540
#define InvalidBlockNumber
Definition: block.h:33
#define MAXALIGN(LEN)
Definition: c.h:757
#define HEAP_INSERT_SKIP_FSM
Definition: heapam.h:34
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2758
static void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
Definition: hio.c:195
#define PageIsNew(page)
Definition: bufpage.h:229
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define elog(elevel,...)
Definition: elog.h:232
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:132
#define unlikely(x)
Definition: c.h:273
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:149
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:324
int Buffer
Definition: buf.h:23
Buffer current_buf
Definition: hio.h:32
Pointer Page
Definition: bufpage.h:78
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42

◆ RelationPutHeapTuple()

void RelationPutHeapTuple ( Relation  relation,
Buffer  buffer,
HeapTuple  tuple,
bool  token 
)

Definition at line 36 of file hio.c.

References Assert, BufferGetBlockNumber(), BufferGetPage, elog, HEAP_XMAX_COMMITTED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderIsSpeculative, InvalidOffsetNumber, ItemPointerSet, PageAddItem, PageGetItem, PageGetItemId, PANIC, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, and HeapTupleData::t_self.

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

40 {
41  Page pageHeader;
42  OffsetNumber offnum;
43 
44  /*
45  * A tuple that's being inserted speculatively should already have its
46  * token set.
47  */
48  Assert(!token || HeapTupleHeaderIsSpeculative(tuple->t_data));
49 
50  /*
51  * Do not allow tuples with invalid combinations of hint bits to be placed
52  * on a page. This combination is detected as corruption by the
53  * contrib/amcheck logic, so if you disable this assertion, make
54  * corresponding changes there.
55  */
57  (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)));
58 
59  /* Add the tuple to the page */
60  pageHeader = BufferGetPage(buffer);
61 
62  offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
63  tuple->t_len, InvalidOffsetNumber, false, true);
64 
65  if (offnum == InvalidOffsetNumber)
66  elog(PANIC, "failed to add tuple to page");
67 
68  /* Update tuple->t_self to the actual position where it was stored */
69  ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
70 
71  /*
72  * Insert the correct position into CTID of the stored tuple, too (unless
73  * this is a speculative insertion, in which case the token is held in
74  * CTID field instead)
75  */
76  if (!token)
77  {
78  ItemId itemId = PageGetItemId(pageHeader, offnum);
79  HeapTupleHeader item = (HeapTupleHeader) PageGetItem(pageHeader, itemId);
80 
81  item->t_ctid = tuple->t_self;
82  }
83 }
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
Pointer Item
Definition: item.h:17
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:429
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:416
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:206
#define PANIC
Definition: elog.h:50
uint16 OffsetNumber
Definition: off.h:24
HeapTupleHeader t_data
Definition: htup.h:68
ItemPointerData t_ctid
Definition: htup_details.h:160
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:208
#define InvalidOffsetNumber
Definition: off.h:26
#define Assert(condition)
Definition: c.h:804
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2758
#define elog(elevel,...)
Definition: elog.h:232
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127