PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
hio.h File Reference
#include "access/heapam.h"
#include "access/htup.h"
#include "utils/relcache.h"
#include "storage/buf.h"
Include dependency graph for hio.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  BulkInsertStateData
 

Typedefs

typedef struct BulkInsertStateData BulkInsertStateData
 

Functions

void RelationPutHeapTuple (Relation relation, Buffer buffer, HeapTuple tuple, bool token)
 
Buffer RelationGetBufferForTuple (Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other)
 

Typedef Documentation

Function Documentation

Buffer RelationGetBufferForTuple ( Relation  relation,
Size  len,
Buffer  otherBuffer,
int  options,
BulkInsertState  bistate,
Buffer vmbuffer,
Buffer vmbuffer_other 
)

Definition at line 297 of file hio.c.

References Assert, buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage, BufferGetPageSize, ConditionalLockRelationForExtension(), BulkInsertStateData::current_buf, elog, ereport, errcode(), errmsg(), ERROR, ExclusiveLock, GetPageWithFreeSpace(), GetVisibilityMapPins(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_SKIP_FSM, InvalidBlockNumber, InvalidBuffer, LockBuffer(), LockRelationForExtension(), MAXALIGN, MaxHeapTupleSize, P_NEW, PageGetHeapFreeSpace(), PageInit(), PageIsAllVisible, PageIsNew, PANIC, ReadBuffer(), ReadBufferBI(), RecordAndGetPageWithFreeSpace(), RELATION_IS_LOCAL, RelationAddExtraBlocks(), RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetTargetBlock, RelationGetTargetPageFreeSpace, RelationSetTargetBlock, ReleaseBuffer(), UnlockRelationForExtension(), and visibilitymap_pin().

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

301 {
302  bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
304  Page page;
305  Size pageFreeSpace = 0,
306  saveFreeSpace = 0;
307  BlockNumber targetBlock,
308  otherBlock;
309  bool needLock;
310 
311  len = MAXALIGN(len); /* be conservative */
312 
313  /* Bulk insert is not supported for updates, only inserts. */
314  Assert(otherBuffer == InvalidBuffer || !bistate);
315 
316  /*
317  * If we're gonna fail for oversize tuple, do it right away
318  */
319  if (len > MaxHeapTupleSize)
320  ereport(ERROR,
321  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
322  errmsg("row is too big: size %zu, maximum size %zu",
323  len, MaxHeapTupleSize)));
324 
325  /* Compute desired extra freespace due to fillfactor option */
326  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
328 
329  if (otherBuffer != InvalidBuffer)
330  otherBlock = BufferGetBlockNumber(otherBuffer);
331  else
332  otherBlock = InvalidBlockNumber; /* just to keep compiler quiet */
333 
334  /*
335  * We first try to put the tuple on the same page we last inserted a tuple
336  * on, as cached in the BulkInsertState or relcache entry. If that
337  * doesn't work, we ask the Free Space Map to locate a suitable page.
338  * Since the FSM's info might be out of date, we have to be prepared to
339  * loop around and retry multiple times. (To insure this isn't an infinite
340  * loop, we must update the FSM with the correct amount of free space on
341  * each page that proves not to be suitable.) If the FSM has no record of
342  * a page with enough free space, we give up and extend the relation.
343  *
344  * When use_fsm is false, we either put the tuple onto the existing target
345  * page or extend the relation.
346  */
347  if (len + saveFreeSpace > MaxHeapTupleSize)
348  {
349  /* can't fit, don't bother asking FSM */
350  targetBlock = InvalidBlockNumber;
351  use_fsm = false;
352  }
353  else if (bistate && bistate->current_buf != InvalidBuffer)
354  targetBlock = BufferGetBlockNumber(bistate->current_buf);
355  else
356  targetBlock = RelationGetTargetBlock(relation);
357 
358  if (targetBlock == InvalidBlockNumber && use_fsm)
359  {
360  /*
361  * We have no cached target page, so ask the FSM for an initial
362  * target.
363  */
364  targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
365 
366  /*
367  * If the FSM knows nothing of the rel, try the last page before we
368  * give up and extend. This avoids one-tuple-per-page syndrome during
369  * bootstrapping or in a recently-started system.
370  */
371  if (targetBlock == InvalidBlockNumber)
372  {
373  BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
374 
375  if (nblocks > 0)
376  targetBlock = nblocks - 1;
377  }
378  }
379 
380 loop:
381  while (targetBlock != InvalidBlockNumber)
382  {
383  /*
384  * Read and exclusive-lock the target block, as well as the other
385  * block if one was given, taking suitable care with lock ordering and
386  * the possibility they are the same block.
387  *
388  * If the page-level all-visible flag is set, caller will need to
389  * clear both that and the corresponding visibility map bit. However,
390  * by the time we return, we'll have x-locked the buffer, and we don't
391  * want to do any I/O while in that state. So we check the bit here
392  * before taking the lock, and pin the page if it appears necessary.
393  * Checking without the lock creates a risk of getting the wrong
394  * answer, so we'll have to recheck after acquiring the lock.
395  */
396  if (otherBuffer == InvalidBuffer)
397  {
398  /* easy case */
399  buffer = ReadBufferBI(relation, targetBlock, bistate);
400  if (PageIsAllVisible(BufferGetPage(buffer)))
401  visibilitymap_pin(relation, targetBlock, vmbuffer);
403  }
404  else if (otherBlock == targetBlock)
405  {
406  /* also easy case */
407  buffer = otherBuffer;
408  if (PageIsAllVisible(BufferGetPage(buffer)))
409  visibilitymap_pin(relation, targetBlock, vmbuffer);
411  }
412  else if (otherBlock < targetBlock)
413  {
414  /* lock other buffer first */
415  buffer = ReadBuffer(relation, targetBlock);
416  if (PageIsAllVisible(BufferGetPage(buffer)))
417  visibilitymap_pin(relation, targetBlock, vmbuffer);
418  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
420  }
421  else
422  {
423  /* lock target buffer first */
424  buffer = ReadBuffer(relation, targetBlock);
425  if (PageIsAllVisible(BufferGetPage(buffer)))
426  visibilitymap_pin(relation, targetBlock, vmbuffer);
428  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
429  }
430 
431  /*
432  * We now have the target page (and the other buffer, if any) pinned
433  * and locked. However, since our initial PageIsAllVisible checks
434  * were performed before acquiring the lock, the results might now be
435  * out of date, either for the selected victim buffer, or for the
436  * other buffer passed by the caller. In that case, we'll need to
437  * give up our locks, go get the pin(s) we failed to get earlier, and
438  * re-lock. That's pretty painful, but hopefully shouldn't happen
439  * often.
440  *
441  * Note that there's a small possibility that we didn't pin the page
442  * above but still have the correct page pinned anyway, either because
443  * we've already made a previous pass through this loop, or because
444  * caller passed us the right page anyway.
445  *
446  * Note also that it's possible that by the time we get the pin and
447  * retake the buffer locks, the visibility map bit will have been
448  * cleared by some other backend anyway. In that case, we'll have
449  * done a bit of extra work for no gain, but there's no real harm
450  * done.
451  */
452  if (otherBuffer == InvalidBuffer || buffer <= otherBuffer)
453  GetVisibilityMapPins(relation, buffer, otherBuffer,
454  targetBlock, otherBlock, vmbuffer,
455  vmbuffer_other);
456  else
457  GetVisibilityMapPins(relation, otherBuffer, buffer,
458  otherBlock, targetBlock, vmbuffer_other,
459  vmbuffer);
460 
461  /*
462  * Now we can check to see if there's enough free space here. If so,
463  * we're done.
464  */
465  page = BufferGetPage(buffer);
466  pageFreeSpace = PageGetHeapFreeSpace(page);
467  if (len + saveFreeSpace <= pageFreeSpace)
468  {
469  /* use this page as future insert target, too */
470  RelationSetTargetBlock(relation, targetBlock);
471  return buffer;
472  }
473 
474  /*
475  * Not enough space, so we must give up our page locks and pin (if
476  * any) and prepare to look elsewhere. We don't care which order we
477  * unlock the two buffers in, so this can be slightly simpler than the
478  * code above.
479  */
481  if (otherBuffer == InvalidBuffer)
482  ReleaseBuffer(buffer);
483  else if (otherBlock != targetBlock)
484  {
485  LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
486  ReleaseBuffer(buffer);
487  }
488 
489  /* Without FSM, always fall out of the loop and extend */
490  if (!use_fsm)
491  break;
492 
493  /*
494  * Update FSM as to condition of this page, and ask for another page
495  * to try.
496  */
497  targetBlock = RecordAndGetPageWithFreeSpace(relation,
498  targetBlock,
499  pageFreeSpace,
500  len + saveFreeSpace);
501  }
502 
503  /*
504  * Have to extend the relation.
505  *
506  * We have to use a lock to ensure no one else is extending the rel at the
507  * same time, else we will both try to initialize the same new page. We
508  * can skip locking for new or temp relations, however, since no one else
509  * could be accessing them.
510  */
511  needLock = !RELATION_IS_LOCAL(relation);
512 
513  /*
514  * If we need the lock but are not able to acquire it immediately, we'll
515  * consider extending the relation by multiple blocks at a time to manage
516  * contention on the relation extension lock. However, this only makes
517  * sense if we're using the FSM; otherwise, there's no point.
518  */
519  if (needLock)
520  {
521  if (!use_fsm)
524  {
525  /* Couldn't get the lock immediately; wait for it. */
527 
528  /*
529  * Check if some other backend has extended a block for us while
530  * we were waiting on the lock.
531  */
532  targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
533 
534  /*
535  * If some other waiter has already extended the relation, we
536  * don't need to do so; just use the existing freespace.
537  */
538  if (targetBlock != InvalidBlockNumber)
539  {
541  goto loop;
542  }
543 
544  /* Time to bulk-extend. */
545  RelationAddExtraBlocks(relation, bistate);
546  }
547  }
548 
549  /*
550  * In addition to whatever extension we performed above, we always add at
551  * least one block to satisfy our own request.
552  *
553  * XXX This does an lseek - rather expensive - but at the moment it is the
554  * only way to accurately determine how many blocks are in a relation. Is
555  * it worth keeping an accurate file length in shared memory someplace,
556  * rather than relying on the kernel to do it for us?
557  */
558  buffer = ReadBufferBI(relation, P_NEW, bistate);
559 
560  /*
561  * We can be certain that locking the otherBuffer first is OK, since it
562  * must have a lower page number.
563  */
564  if (otherBuffer != InvalidBuffer)
565  LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
566 
567  /*
568  * Now acquire lock on the new page.
569  */
571 
572  /*
573  * Release the file-extension lock; it's now OK for someone else to extend
574  * the relation some more. Note that we cannot release this lock before
575  * we have buffer lock on the new page, or we risk a race condition
576  * against vacuumlazy.c --- see comments therein.
577  */
578  if (needLock)
580 
581  /*
582  * We need to initialize the empty new page. Double-check that it really
583  * is empty (this should never happen, but if it does we don't want to
584  * risk wiping out valid data).
585  */
586  page = BufferGetPage(buffer);
587 
588  if (!PageIsNew(page))
589  elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
590  BufferGetBlockNumber(buffer),
591  RelationGetRelationName(relation));
592 
593  PageInit(page, BufferGetPageSize(buffer), 0);
594 
595  if (len > PageGetHeapFreeSpace(page))
596  {
597  /* We should not get here given the test at the top */
598  elog(PANIC, "tuple is too big: size %zu", len);
599  }
600 
601  /*
602  * Remember the new page as our target for future insertions.
603  *
604  * XXX should we enter the new page into the free space map immediately,
605  * or just keep it for this backend's exclusive use in the short run
606  * (until VACUUM sees it)? Seems to depend on whether you expect the
607  * current backend to make more insertions or not, which is probably a
608  * good bet most of the time. So for now, don't add it to FSM yet.
609  */
610  RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
611 
612  return buffer;
613 }
bool ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:350
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define PageIsAllVisible(page)
Definition: bufpage.h:381
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
#define ExclusiveLock
Definition: lockdefs.h:44
static Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, BulkInsertState bistate)
Definition: hio.c:80
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:523
#define InvalidBuffer
Definition: buf.h:25
int errcode(int sqlerrcode)
Definition: elog.c:575
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3309
#define P_NEW
Definition: bufmgr.h:82
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define PANIC
Definition: elog.h:53
#define RelationGetTargetBlock(relation)
Definition: rel.h:488
static void GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
Definition: hio.c:122
#define ERROR
Definition: elog.h:43
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:666
#define MaxHeapTupleSize
Definition: htup_details.h:561
#define RelationGetRelationName(relation)
Definition: rel.h:436
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define ereport(elevel, rest)
Definition: elog.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:307
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:332
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:382
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:147
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:199
#define Assert(condition)
Definition: c.h:664
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:495
size_t Size
Definition: c.h:350
#define InvalidBlockNumber
Definition: block.h:33
#define MAXALIGN(LEN)
Definition: c.h:576
#define HEAP_INSERT_SKIP_FSM
Definition: heapam.h:29
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
static void RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
Definition: hio.c:178
#define PageIsNew(page)
Definition: bufpage.h:225
int errmsg(const char *fmt,...)
Definition: elog.c:797
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:132
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:149
#define elog
Definition: elog.h:219
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:286
int Buffer
Definition: buf.h:23
Buffer current_buf
Definition: hio.h:34
Pointer Page
Definition: bufpage.h:74
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:41
void RelationPutHeapTuple ( Relation  relation,
Buffer  buffer,
HeapTuple  tuple,
bool  token 
)

Definition at line 36 of file hio.c.

References Assert, BufferGetBlockNumber(), BufferGetPage, elog, HeapTupleHeaderIsSpeculative, InvalidOffsetNumber, ItemPointerSet, PageAddItem, PageGetItem, PageGetItemId, PANIC, HeapTupleData::t_data, HeapTupleData::t_len, and HeapTupleData::t_self.

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

40 {
41  Page pageHeader;
42  OffsetNumber offnum;
43 
44  /*
45  * A tuple that's being inserted speculatively should already have its
46  * token set.
47  */
48  Assert(!token || HeapTupleHeaderIsSpeculative(tuple->t_data));
49 
50  /* Add the tuple to the page */
51  pageHeader = BufferGetPage(buffer);
52 
53  offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
54  tuple->t_len, InvalidOffsetNumber, false, true);
55 
56  if (offnum == InvalidOffsetNumber)
57  elog(PANIC, "failed to add tuple to page");
58 
59  /* Update tuple->t_self to the actual position where it was stored */
60  ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
61 
62  /*
63  * Insert the correct position into CTID of the stored tuple, too (unless
64  * this is a speculative insertion, in which case the token is held in
65  * CTID field instead)
66  */
67  if (!token)
68  {
69  ItemId itemId = PageGetItemId(pageHeader, offnum);
70  Item item = PageGetItem(pageHeader, itemId);
71 
72  ((HeapTupleHeader) item)->t_ctid = tuple->t_self;
73  }
74 }
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
Pointer Item
Definition: item.h:17
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:423
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:412
#define PANIC
Definition: elog.h:53
uint16 OffsetNumber
Definition: off.h:24
HeapTupleHeader t_data
Definition: htup.h:67
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:231
#define InvalidOffsetNumber
Definition: off.h:26
#define Assert(condition)
Definition: c.h:664
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
#define elog
Definition: elog.h:219
#define PageGetItem(page, itemId)
Definition: bufpage.h:336
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:105