PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
hio.c File Reference
#include "postgres.h"
#include "access/heapam.h"
#include "access/hio.h"
#include "access/htup_details.h"
#include "access/visibilitymap.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
Include dependency graph for hio.c:

Go to the source code of this file.

Macros

#define MAX_BUFFERS_TO_EXTEND_BY   64
 

Functions

void RelationPutHeapTuple (Relation relation, Buffer buffer, HeapTuple tuple, bool token)
 
static Buffer ReadBufferBI (Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
 
static bool GetVisibilityMapPins (Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
 
static Buffer RelationAddBlocks (Relation relation, BulkInsertState bistate, int num_pages, bool use_fsm, bool *did_unlock)
 
Buffer RelationGetBufferForTuple (Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
 

Macro Definition Documentation

◆ MAX_BUFFERS_TO_EXTEND_BY

#define MAX_BUFFERS_TO_EXTEND_BY   64

Function Documentation

◆ GetVisibilityMapPins()

static bool GetVisibilityMapPins ( Relation  relation,
Buffer  buffer1,
Buffer  buffer2,
BlockNumber  block1,
BlockNumber  block2,
Buffer vmbuffer1,
Buffer vmbuffer2 
)
static

Definition at line 140 of file hio.c.

143{
144 bool need_to_pin_buffer1;
145 bool need_to_pin_buffer2;
146 bool released_locks = false;
147
148 /*
149 * Swap buffers around to handle case of a single block/buffer, and to
150 * handle if lock ordering rules require to lock block2 first.
151 */
152 if (!BufferIsValid(buffer1) ||
153 (BufferIsValid(buffer2) && block1 > block2))
154 {
155 Buffer tmpbuf = buffer1;
156 Buffer *tmpvmbuf = vmbuffer1;
157 BlockNumber tmpblock = block1;
158
159 buffer1 = buffer2;
160 vmbuffer1 = vmbuffer2;
161 block1 = block2;
162
163 buffer2 = tmpbuf;
164 vmbuffer2 = tmpvmbuf;
165 block2 = tmpblock;
166 }
167
168 Assert(BufferIsValid(buffer1));
169 Assert(buffer2 == InvalidBuffer || block1 <= block2);
170
171 while (1)
172 {
173 /* Figure out which pins we need but don't have. */
174 need_to_pin_buffer1 = PageIsAllVisible(BufferGetPage(buffer1))
175 && !visibilitymap_pin_ok(block1, *vmbuffer1);
176 need_to_pin_buffer2 = buffer2 != InvalidBuffer
178 && !visibilitymap_pin_ok(block2, *vmbuffer2);
179 if (!need_to_pin_buffer1 && !need_to_pin_buffer2)
180 break;
181
182 /* We must unlock both buffers before doing any I/O. */
183 released_locks = true;
185 if (buffer2 != InvalidBuffer && buffer2 != buffer1)
187
188 /* Get pins. */
189 if (need_to_pin_buffer1)
190 visibilitymap_pin(relation, block1, vmbuffer1);
191 if (need_to_pin_buffer2)
192 visibilitymap_pin(relation, block2, vmbuffer2);
193
194 /* Relock buffers. */
196 if (buffer2 != InvalidBuffer && buffer2 != buffer1)
198
199 /*
200 * If there are two buffers involved and we pinned just one of them,
201 * it's possible that the second one became all-visible while we were
202 * busy pinning the first one. If it looks like that's a possible
203 * scenario, we'll need to make a second pass through this loop.
204 */
205 if (buffer2 == InvalidBuffer || buffer1 == buffer2
206 || (need_to_pin_buffer1 && need_to_pin_buffer2))
207 break;
208 }
209
210 return released_locks;
211}
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5100
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:396
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:347
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:429
#define Assert(condition)
Definition: c.h:815
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
static StringInfoData tmpbuf
Definition: walsender.c:170

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), InvalidBuffer, LockBuffer(), PageIsAllVisible(), tmpbuf, visibilitymap_pin(), and visibilitymap_pin_ok().

Referenced by RelationGetBufferForTuple().

◆ ReadBufferBI()

static Buffer ReadBufferBI ( Relation  relation,
BlockNumber  targetBlock,
ReadBufferMode  mode,
BulkInsertState  bistate 
)
static

Definition at line 88 of file hio.c.

90{
91 Buffer buffer;
92
93 /* If not bulk-insert, exactly like ReadBuffer */
94 if (!bistate)
95 return ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
96 mode, NULL);
97
98 /* If we have the desired block already pinned, re-pin and return it */
99 if (bistate->current_buf != InvalidBuffer)
100 {
101 if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
102 {
103 /*
104 * Currently the LOCK variants are only used for extending
105 * relation, which should never reach this branch.
106 */
109
111 return bistate->current_buf;
112 }
113 /* ... else drop the old buffer */
114 ReleaseBuffer(bistate->current_buf);
115 bistate->current_buf = InvalidBuffer;
116 }
117
118 /* Perform a read using the buffer strategy */
119 buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
120 mode, bistate->strategy);
121
122 /* Save the selected block as target for future inserts */
123 IncrBufferRefCount(buffer);
124 bistate->current_buf = buffer;
125
126 return buffer;
127}
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:4898
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4866
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:793
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:48
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:46
static PgChecksumMode mode
Definition: pg_checksums.c:55
@ MAIN_FORKNUM
Definition: relpath.h:58
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References Assert, BufferGetBlockNumber(), BulkInsertStateData::current_buf, IncrBufferRefCount(), InvalidBuffer, MAIN_FORKNUM, mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, ReadBufferExtended(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by RelationGetBufferForTuple().

◆ RelationAddBlocks()

static Buffer RelationAddBlocks ( Relation  relation,
BulkInsertState  bistate,
int  num_pages,
bool  use_fsm,
bool *  did_unlock 
)
static

Definition at line 238 of file hio.c.

240{
241#define MAX_BUFFERS_TO_EXTEND_BY 64
242 Buffer victim_buffers[MAX_BUFFERS_TO_EXTEND_BY];
243 BlockNumber first_block = InvalidBlockNumber;
244 BlockNumber last_block = InvalidBlockNumber;
245 uint32 extend_by_pages;
246 uint32 not_in_fsm_pages;
247 Buffer buffer;
248 Page page;
249
250 /*
251 * Determine by how many pages to try to extend by.
252 */
253 if (bistate == NULL && !use_fsm)
254 {
255 /*
256 * If we have neither bistate, nor can use the FSM, we can't bulk
257 * extend - there'd be no way to find the additional pages.
258 */
259 extend_by_pages = 1;
260 }
261 else
262 {
263 uint32 waitcount;
264
265 /*
266 * Try to extend at least by the number of pages the caller needs. We
267 * can remember the additional pages (either via FSM or bistate).
268 */
269 extend_by_pages = num_pages;
270
271 if (!RELATION_IS_LOCAL(relation))
272 waitcount = RelationExtensionLockWaiterCount(relation);
273 else
274 waitcount = 0;
275
276 /*
277 * Multiply the number of pages to extend by the number of waiters. Do
278 * this even if we're not using the FSM, as it still relieves
279 * contention, by deferring the next time this backend needs to
280 * extend. In that case the extended pages will be found via
281 * bistate->next_free.
282 */
283 extend_by_pages += extend_by_pages * waitcount;
284
285 /* ---
286 * If we previously extended using the same bistate, it's very likely
287 * we'll extend some more. Try to extend by as many pages as
288 * before. This can be important for performance for several reasons,
289 * including:
290 *
291 * - It prevents mdzeroextend() switching between extending the
292 * relation in different ways, which is inefficient for some
293 * filesystems.
294 *
295 * - Contention is often intermittent. Even if we currently don't see
296 * other waiters (see above), extending by larger amounts can
297 * prevent future contention.
298 * ---
299 */
300 if (bistate)
301 extend_by_pages = Max(extend_by_pages, bistate->already_extended_by);
302
303 /*
304 * Can't extend by more than MAX_BUFFERS_TO_EXTEND_BY, we need to pin
305 * them all concurrently.
306 */
307 extend_by_pages = Min(extend_by_pages, MAX_BUFFERS_TO_EXTEND_BY);
308 }
309
310 /*
311 * How many of the extended pages should be entered into the FSM?
312 *
313 * If we have a bistate, only enter pages that we don't need ourselves
314 * into the FSM. Otherwise every other backend will immediately try to
315 * use the pages this backend needs for itself, causing unnecessary
316 * contention. If we don't have a bistate, we can't avoid the FSM.
317 *
318 * Never enter the page returned into the FSM, we'll immediately use it.
319 */
320 if (num_pages > 1 && bistate == NULL)
321 not_in_fsm_pages = 1;
322 else
323 not_in_fsm_pages = num_pages;
324
325 /* prepare to put another buffer into the bistate */
326 if (bistate && bistate->current_buf != InvalidBuffer)
327 {
328 ReleaseBuffer(bistate->current_buf);
329 bistate->current_buf = InvalidBuffer;
330 }
331
332 /*
333 * Extend the relation. We ask for the first returned page to be locked,
334 * so that we are sure that nobody has inserted into the page
335 * concurrently.
336 *
337 * With the current MAX_BUFFERS_TO_EXTEND_BY there's no danger of
338 * [auto]vacuum trying to truncate later pages as REL_TRUNCATE_MINIMUM is
339 * way larger.
340 */
341 first_block = ExtendBufferedRelBy(BMR_REL(relation), MAIN_FORKNUM,
342 bistate ? bistate->strategy : NULL,
344 extend_by_pages,
345 victim_buffers,
346 &extend_by_pages);
347 buffer = victim_buffers[0]; /* the buffer the function will return */
348 last_block = first_block + (extend_by_pages - 1);
349 Assert(first_block == BufferGetBlockNumber(buffer));
350
351 /*
352 * Relation is now extended. Initialize the page. We do this here, before
353 * potentially releasing the lock on the page, because it allows us to
354 * double check that the page contents are empty (this should never
355 * happen, but if it does we don't want to risk wiping out valid data).
356 */
357 page = BufferGetPage(buffer);
358 if (!PageIsNew(page))
359 elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
360 first_block,
361 RelationGetRelationName(relation));
362
363 PageInit(page, BufferGetPageSize(buffer), 0);
364 MarkBufferDirty(buffer);
365
366 /*
367 * If we decided to put pages into the FSM, release the buffer lock (but
368 * not pin), we don't want to do IO while holding a buffer lock. This will
369 * necessitate a bit more extensive checking in our caller.
370 */
371 if (use_fsm && not_in_fsm_pages < extend_by_pages)
372 {
374 *did_unlock = true;
375 }
376 else
377 *did_unlock = false;
378
379 /*
380 * Relation is now extended. Release pins on all buffers, except for the
381 * first (which we'll return). If we decided to put pages into the FSM,
382 * we can do that as part of the same loop.
383 */
384 for (uint32 i = 1; i < extend_by_pages; i++)
385 {
386 BlockNumber curBlock = first_block + i;
387
388 Assert(curBlock == BufferGetBlockNumber(victim_buffers[i]));
389 Assert(BlockNumberIsValid(curBlock));
390
391 ReleaseBuffer(victim_buffers[i]);
392
393 if (use_fsm && i >= not_in_fsm_pages)
394 {
395 Size freespace = BufferGetPageSize(victim_buffers[i]) -
397
398 RecordPageWithFreeSpace(relation, curBlock, freespace);
399 }
400 }
401
402 if (use_fsm && not_in_fsm_pages < extend_by_pages)
403 {
404 BlockNumber first_fsm_block = first_block + not_in_fsm_pages;
405
406 FreeSpaceMapVacuumRange(relation, first_fsm_block, last_block);
407 }
408
409 if (bistate)
410 {
411 /*
412 * Remember the additional pages we extended by, so we later can use
413 * them without looking into the FSM.
414 */
415 if (extend_by_pages > 1)
416 {
417 bistate->next_free = first_block + 1;
418 bistate->last_free = last_block;
419 }
420 else
421 {
422 bistate->next_free = InvalidBlockNumber;
423 bistate->last_free = InvalidBlockNumber;
424 }
425
426 /* maintain bistate->current_buf */
427 IncrBufferRefCount(buffer);
428 bistate->current_buf = buffer;
429 bistate->already_extended_by += extend_by_pages;
430 }
431
432 return buffer;
433#undef MAX_BUFFERS_TO_EXTEND_BY
434}
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:878
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:385
@ EB_LOCK_FIRST
Definition: bufmgr.h:86
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
static bool PageIsNew(const PageData *page)
Definition: bufpage.h:234
#define SizeOfPageHeaderData
Definition: bufpage.h:217
PageData * Page
Definition: bufpage.h:82
#define Min(x, y)
Definition: c.h:961
#define Max(x, y)
Definition: c.h:955
uint32_t uint32
Definition: c.h:488
size_t Size
Definition: c.h:562
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:194
#define MAX_BUFFERS_TO_EXTEND_BY
int i
Definition: isn.c:72
int RelationExtensionLockWaiterCount(Relation relation)
Definition: lmgr.c:454
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
#define RelationGetRelationName(relation)
Definition: rel.h:539
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, Assert, BlockNumberIsValid(), BMR_REL, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), BulkInsertStateData::current_buf, EB_LOCK_FIRST, elog, ERROR, ExtendBufferedRelBy(), FreeSpaceMapVacuumRange(), i, IncrBufferRefCount(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), Max, MAX_BUFFERS_TO_EXTEND_BY, Min, BulkInsertStateData::next_free, PageInit(), PageIsNew(), RecordPageWithFreeSpace(), RELATION_IS_LOCAL, RelationExtensionLockWaiterCount(), RelationGetRelationName, ReleaseBuffer(), SizeOfPageHeaderData, and BulkInsertStateData::strategy.

Referenced by RelationGetBufferForTuple().

◆ RelationGetBufferForTuple()

Buffer RelationGetBufferForTuple ( Relation  relation,
Size  len,
Buffer  otherBuffer,
int  options,
BulkInsertState  bistate,
Buffer vmbuffer,
Buffer vmbuffer_other,
int  num_pages 
)

Definition at line 502 of file hio.c.

507{
508 bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
509 Buffer buffer = InvalidBuffer;
510 Page page;
511 Size nearlyEmptyFreeSpace,
512 pageFreeSpace = 0,
513 saveFreeSpace = 0,
514 targetFreeSpace = 0;
515 BlockNumber targetBlock,
516 otherBlock;
517 bool unlockedTargetBuffer;
518 bool recheckVmPins;
519
520 len = MAXALIGN(len); /* be conservative */
521
522 /* if the caller doesn't know by how many pages to extend, extend by 1 */
523 if (num_pages <= 0)
524 num_pages = 1;
525
526 /* Bulk insert is not supported for updates, only inserts. */
527 Assert(otherBuffer == InvalidBuffer || !bistate);
528
529 /*
530 * If we're gonna fail for oversize tuple, do it right away
531 */
532 if (len > MaxHeapTupleSize)
534 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
535 errmsg("row is too big: size %zu, maximum size %zu",
537
538 /* Compute desired extra freespace due to fillfactor option */
539 saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
541
542 /*
543 * Since pages without tuples can still have line pointers, we consider
544 * pages "empty" when the unavailable space is slight. This threshold is
545 * somewhat arbitrary, but it should prevent most unnecessary relation
546 * extensions while inserting large tuples into low-fillfactor tables.
547 */
548 nearlyEmptyFreeSpace = MaxHeapTupleSize -
549 (MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData));
550 if (len + saveFreeSpace > nearlyEmptyFreeSpace)
551 targetFreeSpace = Max(len, nearlyEmptyFreeSpace);
552 else
553 targetFreeSpace = len + saveFreeSpace;
554
555 if (otherBuffer != InvalidBuffer)
556 otherBlock = BufferGetBlockNumber(otherBuffer);
557 else
558 otherBlock = InvalidBlockNumber; /* just to keep compiler quiet */
559
560 /*
561 * We first try to put the tuple on the same page we last inserted a tuple
562 * on, as cached in the BulkInsertState or relcache entry. If that
563 * doesn't work, we ask the Free Space Map to locate a suitable page.
564 * Since the FSM's info might be out of date, we have to be prepared to
565 * loop around and retry multiple times. (To ensure this isn't an infinite
566 * loop, we must update the FSM with the correct amount of free space on
567 * each page that proves not to be suitable.) If the FSM has no record of
568 * a page with enough free space, we give up and extend the relation.
569 *
570 * When use_fsm is false, we either put the tuple onto the existing target
571 * page or extend the relation.
572 */
573 if (bistate && bistate->current_buf != InvalidBuffer)
574 targetBlock = BufferGetBlockNumber(bistate->current_buf);
575 else
576 targetBlock = RelationGetTargetBlock(relation);
577
578 if (targetBlock == InvalidBlockNumber && use_fsm)
579 {
580 /*
581 * We have no cached target page, so ask the FSM for an initial
582 * target.
583 */
584 targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
585 }
586
587 /*
588 * If the FSM knows nothing of the rel, try the last page before we give
589 * up and extend. This avoids one-tuple-per-page syndrome during
590 * bootstrapping or in a recently-started system.
591 */
592 if (targetBlock == InvalidBlockNumber)
593 {
594 BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
595
596 if (nblocks > 0)
597 targetBlock = nblocks - 1;
598 }
599
600loop:
601 while (targetBlock != InvalidBlockNumber)
602 {
603 /*
604 * Read and exclusive-lock the target block, as well as the other
605 * block if one was given, taking suitable care with lock ordering and
606 * the possibility they are the same block.
607 *
608 * If the page-level all-visible flag is set, caller will need to
609 * clear both that and the corresponding visibility map bit. However,
610 * by the time we return, we'll have x-locked the buffer, and we don't
611 * want to do any I/O while in that state. So we check the bit here
612 * before taking the lock, and pin the page if it appears necessary.
613 * Checking without the lock creates a risk of getting the wrong
614 * answer, so we'll have to recheck after acquiring the lock.
615 */
616 if (otherBuffer == InvalidBuffer)
617 {
618 /* easy case */
619 buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
620 if (PageIsAllVisible(BufferGetPage(buffer)))
621 visibilitymap_pin(relation, targetBlock, vmbuffer);
622
623 /*
624 * If the page is empty, pin vmbuffer to set all_frozen bit later.
625 */
626 if ((options & HEAP_INSERT_FROZEN) &&
628 visibilitymap_pin(relation, targetBlock, vmbuffer);
629
631 }
632 else if (otherBlock == targetBlock)
633 {
634 /* also easy case */
635 buffer = otherBuffer;
636 if (PageIsAllVisible(BufferGetPage(buffer)))
637 visibilitymap_pin(relation, targetBlock, vmbuffer);
639 }
640 else if (otherBlock < targetBlock)
641 {
642 /* lock other buffer first */
643 buffer = ReadBuffer(relation, targetBlock);
644 if (PageIsAllVisible(BufferGetPage(buffer)))
645 visibilitymap_pin(relation, targetBlock, vmbuffer);
646 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
648 }
649 else
650 {
651 /* lock target buffer first */
652 buffer = ReadBuffer(relation, targetBlock);
653 if (PageIsAllVisible(BufferGetPage(buffer)))
654 visibilitymap_pin(relation, targetBlock, vmbuffer);
656 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
657 }
658
659 /*
660 * We now have the target page (and the other buffer, if any) pinned
661 * and locked. However, since our initial PageIsAllVisible checks
662 * were performed before acquiring the lock, the results might now be
663 * out of date, either for the selected victim buffer, or for the
664 * other buffer passed by the caller. In that case, we'll need to
665 * give up our locks, go get the pin(s) we failed to get earlier, and
666 * re-lock. That's pretty painful, but hopefully shouldn't happen
667 * often.
668 *
669 * Note that there's a small possibility that we didn't pin the page
670 * above but still have the correct page pinned anyway, either because
671 * we've already made a previous pass through this loop, or because
672 * caller passed us the right page anyway.
673 *
674 * Note also that it's possible that by the time we get the pin and
675 * retake the buffer locks, the visibility map bit will have been
676 * cleared by some other backend anyway. In that case, we'll have
677 * done a bit of extra work for no gain, but there's no real harm
678 * done.
679 */
680 GetVisibilityMapPins(relation, buffer, otherBuffer,
681 targetBlock, otherBlock, vmbuffer,
682 vmbuffer_other);
683
684 /*
685 * Now we can check to see if there's enough free space here. If so,
686 * we're done.
687 */
688 page = BufferGetPage(buffer);
689
690 /*
691 * If necessary initialize page, it'll be used soon. We could avoid
692 * dirtying the buffer here, and rely on the caller to do so whenever
693 * it puts a tuple onto the page, but there seems not much benefit in
694 * doing so.
695 */
696 if (PageIsNew(page))
697 {
698 PageInit(page, BufferGetPageSize(buffer), 0);
699 MarkBufferDirty(buffer);
700 }
701
702 pageFreeSpace = PageGetHeapFreeSpace(page);
703 if (targetFreeSpace <= pageFreeSpace)
704 {
705 /* use this page as future insert target, too */
706 RelationSetTargetBlock(relation, targetBlock);
707 return buffer;
708 }
709
710 /*
711 * Not enough space, so we must give up our page locks and pin (if
712 * any) and prepare to look elsewhere. We don't care which order we
713 * unlock the two buffers in, so this can be slightly simpler than the
714 * code above.
715 */
717 if (otherBuffer == InvalidBuffer)
718 ReleaseBuffer(buffer);
719 else if (otherBlock != targetBlock)
720 {
721 LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
722 ReleaseBuffer(buffer);
723 }
724
725 /* Is there an ongoing bulk extension? */
726 if (bistate && bistate->next_free != InvalidBlockNumber)
727 {
728 Assert(bistate->next_free <= bistate->last_free);
729
730 /*
731 * We bulk extended the relation before, and there are still some
732 * unused pages from that extension, so we don't need to look in
733 * the FSM for a new page. But do record the free space from the
734 * last page, somebody might insert narrower tuples later.
735 */
736 if (use_fsm)
737 RecordPageWithFreeSpace(relation, targetBlock, pageFreeSpace);
738
739 targetBlock = bistate->next_free;
740 if (bistate->next_free >= bistate->last_free)
741 {
742 bistate->next_free = InvalidBlockNumber;
743 bistate->last_free = InvalidBlockNumber;
744 }
745 else
746 bistate->next_free++;
747 }
748 else if (!use_fsm)
749 {
750 /* Without FSM, always fall out of the loop and extend */
751 break;
752 }
753 else
754 {
755 /*
756 * Update FSM as to condition of this page, and ask for another
757 * page to try.
758 */
759 targetBlock = RecordAndGetPageWithFreeSpace(relation,
760 targetBlock,
761 pageFreeSpace,
762 targetFreeSpace);
763 }
764 }
765
766 /* Have to extend the relation */
767 buffer = RelationAddBlocks(relation, bistate, num_pages, use_fsm,
768 &unlockedTargetBuffer);
769
770 targetBlock = BufferGetBlockNumber(buffer);
771 page = BufferGetPage(buffer);
772
773 /*
774 * The page is empty, pin vmbuffer to set all_frozen bit. We don't want to
775 * do IO while the buffer is locked, so we unlock the page first if IO is
776 * needed (necessitating checks below).
777 */
779 {
780 Assert(PageGetMaxOffsetNumber(page) == 0);
781
782 if (!visibilitymap_pin_ok(targetBlock, *vmbuffer))
783 {
784 if (!unlockedTargetBuffer)
786 unlockedTargetBuffer = true;
787 visibilitymap_pin(relation, targetBlock, vmbuffer);
788 }
789 }
790
791 /*
792 * Reacquire locks if necessary.
793 *
794 * If the target buffer was unlocked above, or is unlocked while
795 * reacquiring the lock on otherBuffer below, it's unlikely, but possible,
796 * that another backend used space on this page. We check for that below,
797 * and retry if necessary.
798 */
799 recheckVmPins = false;
800 if (unlockedTargetBuffer)
801 {
802 /* released lock on target buffer above */
803 if (otherBuffer != InvalidBuffer)
804 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
806 recheckVmPins = true;
807 }
808 else if (otherBuffer != InvalidBuffer)
809 {
810 /*
811 * We did not release the target buffer, and otherBuffer is valid,
812 * need to lock the other buffer. It's guaranteed to be of a lower
813 * page number than the new page. To conform with the deadlock
814 * prevent rules, we ought to lock otherBuffer first, but that would
815 * give other backends a chance to put tuples on our page. To reduce
816 * the likelihood of that, attempt to lock the other buffer
817 * conditionally, that's very likely to work.
818 *
819 * Alternatively, we could acquire the lock on otherBuffer before
820 * extending the relation, but that'd require holding the lock while
821 * performing IO, which seems worse than an unlikely retry.
822 */
823 Assert(otherBuffer != buffer);
824 Assert(targetBlock > otherBlock);
825
826 if (unlikely(!ConditionalLockBuffer(otherBuffer)))
827 {
828 unlockedTargetBuffer = true;
830 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
832 }
833 recheckVmPins = true;
834 }
835
836 /*
837 * If one of the buffers was unlocked (always the case if otherBuffer is
838 * valid), it's possible, although unlikely, that an all-visible flag
839 * became set. We can use GetVisibilityMapPins to deal with that. It's
840 * possible that GetVisibilityMapPins() might need to temporarily release
841 * buffer locks, in which case we'll need to check if there's still enough
842 * space on the page below.
843 */
844 if (recheckVmPins)
845 {
846 if (GetVisibilityMapPins(relation, otherBuffer, buffer,
847 otherBlock, targetBlock, vmbuffer_other,
848 vmbuffer))
849 unlockedTargetBuffer = true;
850 }
851
852 /*
853 * If the target buffer was temporarily unlocked since the relation
854 * extension, it's possible, although unlikely, that all the space on the
855 * page was already used. If so, we just retry from the start. If we
856 * didn't unlock, something has gone wrong if there's not enough space -
857 * the test at the top should have prevented reaching this case.
858 */
859 pageFreeSpace = PageGetHeapFreeSpace(page);
860 if (len > pageFreeSpace)
861 {
862 if (unlockedTargetBuffer)
863 {
864 if (otherBuffer != InvalidBuffer)
865 LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
866 UnlockReleaseBuffer(buffer);
867
868 goto loop;
869 }
870 elog(PANIC, "tuple is too big: size %zu", len);
871 }
872
873 /*
874 * Remember the new page as our target for future insertions.
875 *
876 * XXX should we enter the new page into the free space map immediately,
877 * or just keep it for this backend's exclusive use in the short run
878 * (until VACUUM sees it)? Seems to depend on whether you expect the
879 * current backend to make more insertions or not, which is probably a
880 * good bet most of the time. So for now, don't add it to FSM yet.
881 */
882 RelationSetTargetBlock(relation, targetBlock);
883
884 return buffer;
885}
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5126
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4883
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
@ RBM_NORMAL
Definition: bufmgr.h:45
Size PageGetHeapFreeSpace(const PageData *page)
Definition: bufpage.c:980
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
#define MAXALIGN(LEN)
Definition: c.h:768
#define unlikely(x)
Definition: c.h:333
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:149
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:154
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:137
#define HEAP_INSERT_SKIP_FSM
Definition: heapam.h:36
#define HEAP_INSERT_FROZEN
Definition: heapam.h:37
static Buffer RelationAddBlocks(Relation relation, BulkInsertState bistate, int num_pages, bool use_fsm, bool *did_unlock)
Definition: hio.c:238
static bool GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
Definition: hio.c:140
static Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
Definition: hio.c:88
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define MaxHeapTupleSize
Definition: htup_details.h:558
struct ItemIdData ItemIdData
const void size_t len
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:378
#define RelationGetTargetBlock(relation)
Definition: rel.h:601
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:608
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:349

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), ConditionalLockBuffer(), BulkInsertStateData::current_buf, elog, ereport, errcode(), errmsg(), ERROR, GetPageWithFreeSpace(), GetVisibilityMapPins(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_SKIP_FSM, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, len, LockBuffer(), MarkBufferDirty(), Max, MAXALIGN, MaxHeapTupleSize, MaxHeapTuplesPerPage, BulkInsertStateData::next_free, PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageInit(), PageIsAllVisible(), PageIsNew(), PANIC, RBM_NORMAL, ReadBuffer(), ReadBufferBI(), RecordAndGetPageWithFreeSpace(), RecordPageWithFreeSpace(), RelationAddBlocks(), RelationGetNumberOfBlocks, RelationGetTargetBlock, RelationGetTargetPageFreeSpace, RelationSetTargetBlock, ReleaseBuffer(), unlikely, UnlockReleaseBuffer(), visibilitymap_pin(), and visibilitymap_pin_ok().

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

◆ RelationPutHeapTuple()

void RelationPutHeapTuple ( Relation  relation,
Buffer  buffer,
HeapTuple  tuple,
bool  token 
)

Definition at line 35 of file hio.c.

39{
40 Page pageHeader;
41 OffsetNumber offnum;
42
43 /*
44 * A tuple that's being inserted speculatively should already have its
45 * token set.
46 */
48
49 /*
50 * Do not allow tuples with invalid combinations of hint bits to be placed
51 * on a page. This combination is detected as corruption by the
52 * contrib/amcheck logic, so if you disable this assertion, make
53 * corresponding changes there.
54 */
57
58 /* Add the tuple to the page */
59 pageHeader = BufferGetPage(buffer);
60
61 offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
62 tuple->t_len, InvalidOffsetNumber, false, true);
63
64 if (offnum == InvalidOffsetNumber)
65 elog(PANIC, "failed to add tuple to page");
66
67 /* Update tuple->t_self to the actual position where it was stored */
68 ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
69
70 /*
71 * Insert the correct position into CTID of the stored tuple, too (unless
72 * this is a speculative insertion, in which case the token is held in
73 * CTID field instead)
74 */
75 if (!token)
76 {
77 ItemId itemId = PageGetItemId(pageHeader, offnum);
78 HeapTupleHeader item = (HeapTupleHeader) PageGetItem(pageHeader, itemId);
79
80 item->t_ctid = tuple->t_self;
81 }
82}
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define token
Definition: indent_globs.h:126
Pointer Item
Definition: item.h:17
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
ItemPointerData t_ctid
Definition: htup_details.h:161

References Assert, BufferGetBlockNumber(), BufferGetPage(), elog, HEAP_XMAX_COMMITTED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderIsSpeculative, InvalidOffsetNumber, ItemPointerSet(), PageAddItem, PageGetItem(), PageGetItemId(), PANIC, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, and token.

Referenced by heap_insert(), heap_multi_insert(), and heap_update().