PostgreSQL Source Code  git master
hio.h File Reference
#include "access/heapam.h"
#include "access/htup.h"
#include "utils/relcache.h"
#include "storage/buf.h"
Include dependency graph for hio.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  BulkInsertStateData
 

Typedefs

typedef struct BulkInsertStateData BulkInsertStateData
 

Functions

void RelationPutHeapTuple (Relation relation, Buffer buffer, HeapTuple tuple, bool token)
 
Buffer RelationGetBufferForTuple (Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other)
 

Typedef Documentation

◆ BulkInsertStateData

Function Documentation

◆ RelationGetBufferForTuple()

Buffer RelationGetBufferForTuple ( Relation  relation,
Size  len,
Buffer  otherBuffer,
int  options,
BulkInsertState  bistate,
Buffer vmbuffer,
Buffer vmbuffer_other 
)

Definition at line 313 of file hio.c.

References Assert, buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, ConditionalLockRelationForExtension(), BulkInsertStateData::current_buf, elog, ereport, errcode(), errmsg(), ERROR, ExclusiveLock, GetPageWithFreeSpace(), GetVisibilityMapPins(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_SKIP_FSM, InvalidBlockNumber, InvalidBuffer, LockBuffer(), LockRelationForExtension(), MAXALIGN, MaxHeapTupleSize, P_NEW, PageGetHeapFreeSpace(), PageInit(), PageIsAllVisible, PageIsNew, PANIC, ReadBuffer(), ReadBufferBI(), RecordAndGetPageWithFreeSpace(), RELATION_IS_LOCAL, RelationAddExtraBlocks(), RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetTargetBlock, RelationGetTargetPageFreeSpace, RelationSetTargetBlock, ReleaseBuffer(), UnlockRelationForExtension(), and visibilitymap_pin().

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

317 {
318  bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
320  Page page;
321  Size pageFreeSpace = 0,
322  saveFreeSpace = 0;
323  BlockNumber targetBlock,
324  otherBlock;
325  bool needLock;
326 
327  len = MAXALIGN(len); /* be conservative */
328 
329  /* Bulk insert is not supported for updates, only inserts. */
330  Assert(otherBuffer == InvalidBuffer || !bistate);
331 
332  /*
333  * If we're gonna fail for oversize tuple, do it right away
334  */
335  if (len > MaxHeapTupleSize)
336  ereport(ERROR,
337  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
338  errmsg("row is too big: size %zu, maximum size %zu",
339  len, MaxHeapTupleSize)));
340 
341  /* Compute desired extra freespace due to fillfactor option */
342  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
344 
345  if (otherBuffer != InvalidBuffer)
346  otherBlock = BufferGetBlockNumber(otherBuffer);
347  else
348  otherBlock = InvalidBlockNumber; /* just to keep compiler quiet */
349 
350  /*
351  * We first try to put the tuple on the same page we last inserted a tuple
352  * on, as cached in the BulkInsertState or relcache entry. If that
353  * doesn't work, we ask the Free Space Map to locate a suitable page.
354  * Since the FSM's info might be out of date, we have to be prepared to
355  * loop around and retry multiple times. (To insure this isn't an infinite
356  * loop, we must update the FSM with the correct amount of free space on
357  * each page that proves not to be suitable.) If the FSM has no record of
358  * a page with enough free space, we give up and extend the relation.
359  *
360  * When use_fsm is false, we either put the tuple onto the existing target
361  * page or extend the relation.
362  */
363  if (len + saveFreeSpace > MaxHeapTupleSize)
364  {
365  /* can't fit, don't bother asking FSM */
366  targetBlock = InvalidBlockNumber;
367  use_fsm = false;
368  }
369  else if (bistate && bistate->current_buf != InvalidBuffer)
370  targetBlock = BufferGetBlockNumber(bistate->current_buf);
371  else
372  targetBlock = RelationGetTargetBlock(relation);
373 
374  if (targetBlock == InvalidBlockNumber && use_fsm)
375  {
376  /*
377  * We have no cached target page, so ask the FSM for an initial
378  * target.
379  */
380  targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
381 
382  /*
383  * If the FSM knows nothing of the rel, try the last page before we
384  * give up and extend. This avoids one-tuple-per-page syndrome during
385  * bootstrapping or in a recently-started system.
386  */
387  if (targetBlock == InvalidBlockNumber)
388  {
389  BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
390 
391  if (nblocks > 0)
392  targetBlock = nblocks - 1;
393  }
394  }
395 
396 loop:
397  while (targetBlock != InvalidBlockNumber)
398  {
399  /*
400  * Read and exclusive-lock the target block, as well as the other
401  * block if one was given, taking suitable care with lock ordering and
402  * the possibility they are the same block.
403  *
404  * If the page-level all-visible flag is set, caller will need to
405  * clear both that and the corresponding visibility map bit. However,
406  * by the time we return, we'll have x-locked the buffer, and we don't
407  * want to do any I/O while in that state. So we check the bit here
408  * before taking the lock, and pin the page if it appears necessary.
409  * Checking without the lock creates a risk of getting the wrong
410  * answer, so we'll have to recheck after acquiring the lock.
411  */
412  if (otherBuffer == InvalidBuffer)
413  {
414  /* easy case */
415  buffer = ReadBufferBI(relation, targetBlock, bistate);
416  if (PageIsAllVisible(BufferGetPage(buffer)))
417  visibilitymap_pin(relation, targetBlock, vmbuffer);
419  }
420  else if (otherBlock == targetBlock)
421  {
422  /* also easy case */
423  buffer = otherBuffer;
424  if (PageIsAllVisible(BufferGetPage(buffer)))
425  visibilitymap_pin(relation, targetBlock, vmbuffer);
427  }
428  else if (otherBlock < targetBlock)
429  {
430  /* lock other buffer first */
431  buffer = ReadBuffer(relation, targetBlock);
432  if (PageIsAllVisible(BufferGetPage(buffer)))
433  visibilitymap_pin(relation, targetBlock, vmbuffer);
434  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
436  }
437  else
438  {
439  /* lock target buffer first */
440  buffer = ReadBuffer(relation, targetBlock);
441  if (PageIsAllVisible(BufferGetPage(buffer)))
442  visibilitymap_pin(relation, targetBlock, vmbuffer);
444  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
445  }
446 
447  /*
448  * We now have the target page (and the other buffer, if any) pinned
449  * and locked. However, since our initial PageIsAllVisible checks
450  * were performed before acquiring the lock, the results might now be
451  * out of date, either for the selected victim buffer, or for the
452  * other buffer passed by the caller. In that case, we'll need to
453  * give up our locks, go get the pin(s) we failed to get earlier, and
454  * re-lock. That's pretty painful, but hopefully shouldn't happen
455  * often.
456  *
457  * Note that there's a small possibility that we didn't pin the page
458  * above but still have the correct page pinned anyway, either because
459  * we've already made a previous pass through this loop, or because
460  * caller passed us the right page anyway.
461  *
462  * Note also that it's possible that by the time we get the pin and
463  * retake the buffer locks, the visibility map bit will have been
464  * cleared by some other backend anyway. In that case, we'll have
465  * done a bit of extra work for no gain, but there's no real harm
466  * done.
467  */
468  if (otherBuffer == InvalidBuffer || buffer <= otherBuffer)
469  GetVisibilityMapPins(relation, buffer, otherBuffer,
470  targetBlock, otherBlock, vmbuffer,
471  vmbuffer_other);
472  else
473  GetVisibilityMapPins(relation, otherBuffer, buffer,
474  otherBlock, targetBlock, vmbuffer_other,
475  vmbuffer);
476 
477  /*
478  * Now we can check to see if there's enough free space here. If so,
479  * we're done.
480  */
481  page = BufferGetPage(buffer);
482  pageFreeSpace = PageGetHeapFreeSpace(page);
483  if (len + saveFreeSpace <= pageFreeSpace)
484  {
485  /* use this page as future insert target, too */
486  RelationSetTargetBlock(relation, targetBlock);
487  return buffer;
488  }
489 
490  /*
491  * Not enough space, so we must give up our page locks and pin (if
492  * any) and prepare to look elsewhere. We don't care which order we
493  * unlock the two buffers in, so this can be slightly simpler than the
494  * code above.
495  */
497  if (otherBuffer == InvalidBuffer)
498  ReleaseBuffer(buffer);
499  else if (otherBlock != targetBlock)
500  {
501  LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
502  ReleaseBuffer(buffer);
503  }
504 
505  /* Without FSM, always fall out of the loop and extend */
506  if (!use_fsm)
507  break;
508 
509  /*
510  * Update FSM as to condition of this page, and ask for another page
511  * to try.
512  */
513  targetBlock = RecordAndGetPageWithFreeSpace(relation,
514  targetBlock,
515  pageFreeSpace,
516  len + saveFreeSpace);
517  }
518 
519  /*
520  * Have to extend the relation.
521  *
522  * We have to use a lock to ensure no one else is extending the rel at the
523  * same time, else we will both try to initialize the same new page. We
524  * can skip locking for new or temp relations, however, since no one else
525  * could be accessing them.
526  */
527  needLock = !RELATION_IS_LOCAL(relation);
528 
529  /*
530  * If we need the lock but are not able to acquire it immediately, we'll
531  * consider extending the relation by multiple blocks at a time to manage
532  * contention on the relation extension lock. However, this only makes
533  * sense if we're using the FSM; otherwise, there's no point.
534  */
535  if (needLock)
536  {
537  if (!use_fsm)
540  {
541  /* Couldn't get the lock immediately; wait for it. */
543 
544  /*
545  * Check if some other backend has extended a block for us while
546  * we were waiting on the lock.
547  */
548  targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
549 
550  /*
551  * If some other waiter has already extended the relation, we
552  * don't need to do so; just use the existing freespace.
553  */
554  if (targetBlock != InvalidBlockNumber)
555  {
557  goto loop;
558  }
559 
560  /* Time to bulk-extend. */
561  RelationAddExtraBlocks(relation, bistate);
562  }
563  }
564 
565  /*
566  * In addition to whatever extension we performed above, we always add at
567  * least one block to satisfy our own request.
568  *
569  * XXX This does an lseek - rather expensive - but at the moment it is the
570  * only way to accurately determine how many blocks are in a relation. Is
571  * it worth keeping an accurate file length in shared memory someplace,
572  * rather than relying on the kernel to do it for us?
573  */
574  buffer = ReadBufferBI(relation, P_NEW, bistate);
575 
576  /*
577  * We can be certain that locking the otherBuffer first is OK, since it
578  * must have a lower page number.
579  */
580  if (otherBuffer != InvalidBuffer)
581  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
582 
583  /*
584  * Now acquire lock on the new page.
585  */
587 
588  /*
589  * Release the file-extension lock; it's now OK for someone else to extend
590  * the relation some more. Note that we cannot release this lock before
591  * we have buffer lock on the new page, or we risk a race condition
592  * against vacuumlazy.c --- see comments therein.
593  */
594  if (needLock)
596 
597  /*
598  * We need to initialize the empty new page. Double-check that it really
599  * is empty (this should never happen, but if it does we don't want to
600  * risk wiping out valid data).
601  */
602  page = BufferGetPage(buffer);
603 
604  if (!PageIsNew(page))
605  elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
606  BufferGetBlockNumber(buffer),
607  RelationGetRelationName(relation));
608 
609  PageInit(page, BufferGetPageSize(buffer), 0);
610 
611  if (len > PageGetHeapFreeSpace(page))
612  {
613  /* We should not get here given the test at the top */
614  elog(PANIC, "tuple is too big: size %zu", len);
615  }
616 
617  /*
618  * Remember the new page as our target for future insertions.
619  *
620  * XXX should we enter the new page into the free space map immediately,
621  * or just keep it for this backend's exclusive use in the short run
622  * (until VACUUM sees it)? Seems to depend on whether you expect the
623  * current backend to make more insertions or not, which is probably a
624  * good bet most of the time. So for now, don't add it to FSM yet.
625  */
626  RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
627 
628  return buffer;
629 }
bool ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:350
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define PageIsAllVisible(page)
Definition: bufpage.h:381
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
#define ExclusiveLock
Definition: lockdefs.h:44
static Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, BulkInsertState bistate)
Definition: hio.c:80
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:528
#define InvalidBuffer
Definition: buf.h:25
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
#define P_NEW
Definition: bufmgr.h:82
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define PANIC
Definition: elog.h:53
#define RelationGetTargetBlock(relation)
Definition: rel.h:493
static void GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
Definition: hio.c:122
#define ERROR
Definition: elog.h:43
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:662
#define MaxHeapTupleSize
Definition: htup_details.h:573
#define RelationGetRelationName(relation)
Definition: rel.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:298
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
#define Assert(condition)
Definition: c.h:699
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:215
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:500
size_t Size
Definition: c.h:433
#define InvalidBlockNumber
Definition: block.h:33
#define MAXALIGN(LEN)
Definition: c.h:652
#define HEAP_INSERT_SKIP_FSM
Definition: heapam.h:29
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
static void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
Definition: hio.c:178
#define PageIsNew(page)
Definition: bufpage.h:225
int errmsg(const char *fmt,...)
Definition: elog.c:797
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:132
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:149
#define elog
Definition: elog.h:219
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:269
int Buffer
Definition: buf.h:23
Buffer current_buf
Definition: hio.h:34
Pointer Page
Definition: bufpage.h:74
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:41

◆ RelationPutHeapTuple()

void RelationPutHeapTuple ( Relation  relation,
Buffer  buffer,
HeapTuple  tuple,
bool  token 
)

Definition at line 36 of file hio.c.

References Assert, BufferGetBlockNumber(), BufferGetPage, elog, HeapTupleHeaderIsSpeculative, InvalidOffsetNumber, ItemPointerSet, PageAddItem, PageGetItem, PageGetItemId, PANIC, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, and HeapTupleData::t_self.

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

40 {
41  Page pageHeader;
42  OffsetNumber offnum;
43 
44  /*
45  * A tuple that's being inserted speculatively should already have its
46  * token set.
47  */
48  Assert(!token || HeapTupleHeaderIsSpeculative(tuple->t_data));
49 
50  /* Add the tuple to the page */
51  pageHeader = BufferGetPage(buffer);
52 
53  offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
54  tuple->t_len, InvalidOffsetNumber, false, true);
55 
56  if (offnum == InvalidOffsetNumber)
57  elog(PANIC, "failed to add tuple to page");
58 
59  /* Update tuple->t_self to the actual position where it was stored */
60  ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
61 
62  /*
63  * Insert the correct position into CTID of the stored tuple, too (unless
64  * this is a speculative insertion, in which case the token is held in
65  * CTID field instead)
66  */
67  if (!token)
68  {
69  ItemId itemId = PageGetItemId(pageHeader, offnum);
70  HeapTupleHeader item = (HeapTupleHeader) PageGetItem(pageHeader, itemId);
71 
72  item->t_ctid = tuple->t_self;
73  }
74 }
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
Pointer Item
Definition: item.h:17
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
#define PANIC
Definition: elog.h:53
uint16 OffsetNumber
Definition: off.h:24
HeapTupleHeader t_data
Definition: htup.h:68
ItemPointerData t_ctid
Definition: htup_details.h:159
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define InvalidOffsetNumber
Definition: off.h:26
#define Assert(condition)
Definition: c.h:699
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:215
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
#define elog
Definition: elog.h:219
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127