PostgreSQL Source Code git master
hio.c File Reference
#include "postgres.h"
#include "access/heapam.h"
#include "access/hio.h"
#include "access/htup_details.h"
#include "access/visibilitymap.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
Include dependency graph for hio.c:

Go to the source code of this file.

Macros

#define MAX_BUFFERS_TO_EXTEND_BY   64
 

Functions

void RelationPutHeapTuple (Relation relation, Buffer buffer, HeapTuple tuple, bool token)
 
static Buffer ReadBufferBI (Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
 
static bool GetVisibilityMapPins (Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
 
static Buffer RelationAddBlocks (Relation relation, BulkInsertState bistate, int num_pages, bool use_fsm, bool *did_unlock)
 
Buffer RelationGetBufferForTuple (Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
 

Macro Definition Documentation

◆ MAX_BUFFERS_TO_EXTEND_BY

#define MAX_BUFFERS_TO_EXTEND_BY   64

Function Documentation

◆ GetVisibilityMapPins()

static bool GetVisibilityMapPins ( Relation  relation,
Buffer  buffer1,
Buffer  buffer2,
BlockNumber  block1,
BlockNumber  block2,
Buffer vmbuffer1,
Buffer vmbuffer2 
)
static

Definition at line 138 of file hio.c.

141{
142 bool need_to_pin_buffer1;
143 bool need_to_pin_buffer2;
144 bool released_locks = false;
145
146 /*
147 * Swap buffers around to handle case of a single block/buffer, and to
148 * handle if lock ordering rules require to lock block2 first.
149 */
150 if (!BufferIsValid(buffer1) ||
151 (BufferIsValid(buffer2) && block1 > block2))
152 {
153 Buffer tmpbuf = buffer1;
154 Buffer *tmpvmbuf = vmbuffer1;
155 BlockNumber tmpblock = block1;
156
157 buffer1 = buffer2;
158 vmbuffer1 = vmbuffer2;
159 block1 = block2;
160
161 buffer2 = tmpbuf;
162 vmbuffer2 = tmpvmbuf;
163 block2 = tmpblock;
164 }
165
166 Assert(BufferIsValid(buffer1));
167 Assert(buffer2 == InvalidBuffer || block1 <= block2);
168
169 while (1)
170 {
171 /* Figure out which pins we need but don't have. */
172 need_to_pin_buffer1 = PageIsAllVisible(BufferGetPage(buffer1))
173 && !visibilitymap_pin_ok(block1, *vmbuffer1);
174 need_to_pin_buffer2 = buffer2 != InvalidBuffer
176 && !visibilitymap_pin_ok(block2, *vmbuffer2);
177 if (!need_to_pin_buffer1 && !need_to_pin_buffer2)
178 break;
179
180 /* We must unlock both buffers before doing any I/O. */
181 released_locks = true;
183 if (buffer2 != InvalidBuffer && buffer2 != buffer1)
185
186 /* Get pins. */
187 if (need_to_pin_buffer1)
188 visibilitymap_pin(relation, block1, vmbuffer1);
189 if (need_to_pin_buffer2)
190 visibilitymap_pin(relation, block2, vmbuffer2);
191
192 /* Relock buffers. */
194 if (buffer2 != InvalidBuffer && buffer2 != buffer1)
196
197 /*
198 * If there are two buffers involved and we pinned just one of them,
199 * it's possible that the second one became all-visible while we were
200 * busy pinning the first one. If it looks like that's a possible
201 * scenario, we'll need to make a second pass through this loop.
202 */
203 if (buffer2 == InvalidBuffer || buffer1 == buffer2
204 || (need_to_pin_buffer1 && need_to_pin_buffer2))
205 break;
206 }
207
208 return released_locks;
209}
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:5604
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
@ BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:207
@ BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:205
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:387
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:428
Assert(PointerIsAligned(start, uint64))
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
static StringInfoData tmpbuf
Definition: walsender.c:178

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), InvalidBuffer, LockBuffer(), PageIsAllVisible(), tmpbuf, visibilitymap_pin(), and visibilitymap_pin_ok().

Referenced by RelationGetBufferForTuple().

◆ ReadBufferBI()

static Buffer ReadBufferBI ( Relation  relation,
BlockNumber  targetBlock,
ReadBufferMode  mode,
BulkInsertState  bistate 
)
static

Definition at line 86 of file hio.c.

88{
89 Buffer buffer;
90
91 /* If not bulk-insert, exactly like ReadBuffer */
92 if (!bistate)
93 return ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
94 mode, NULL);
95
96 /* If we have the desired block already pinned, re-pin and return it */
97 if (bistate->current_buf != InvalidBuffer)
98 {
99 if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
100 {
101 /*
102 * Currently the LOCK variants are only used for extending
103 * relation, which should never reach this branch.
104 */
107
109 return bistate->current_buf;
110 }
111 /* ... else drop the old buffer */
112 ReleaseBuffer(bistate->current_buf);
113 bistate->current_buf = InvalidBuffer;
114 }
115
116 /* Perform a read using the buffer strategy */
117 buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
118 mode, bistate->strategy);
119
120 /* Save the selected block as target for future inserts */
121 IncrBufferRefCount(buffer);
122 bistate->current_buf = buffer;
123
124 return buffer;
125}
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:5398
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4223
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5366
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:792
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:49
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:47
static PgChecksumMode mode
Definition: pg_checksums.c:56
@ MAIN_FORKNUM
Definition: relpath.h:58
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References Assert(), BufferGetBlockNumber(), BulkInsertStateData::current_buf, IncrBufferRefCount(), InvalidBuffer, MAIN_FORKNUM, mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, ReadBufferExtended(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by RelationGetBufferForTuple().

◆ RelationAddBlocks()

static Buffer RelationAddBlocks ( Relation  relation,
BulkInsertState  bistate,
int  num_pages,
bool  use_fsm,
bool *  did_unlock 
)
static

Definition at line 236 of file hio.c.

238{
239#define MAX_BUFFERS_TO_EXTEND_BY 64
240 Buffer victim_buffers[MAX_BUFFERS_TO_EXTEND_BY];
241 BlockNumber first_block = InvalidBlockNumber;
242 BlockNumber last_block = InvalidBlockNumber;
243 uint32 extend_by_pages;
244 uint32 not_in_fsm_pages;
245 Buffer buffer;
246 Page page;
247
248 /*
249 * Determine by how many pages to try to extend by.
250 */
251 if (bistate == NULL && !use_fsm)
252 {
253 /*
254 * If we have neither bistate, nor can use the FSM, we can't bulk
255 * extend - there'd be no way to find the additional pages.
256 */
257 extend_by_pages = 1;
258 }
259 else
260 {
261 uint32 waitcount;
262
263 /*
264 * Try to extend at least by the number of pages the caller needs. We
265 * can remember the additional pages (either via FSM or bistate).
266 */
267 extend_by_pages = num_pages;
268
269 if (!RELATION_IS_LOCAL(relation))
270 waitcount = RelationExtensionLockWaiterCount(relation);
271 else
272 waitcount = 0;
273
274 /*
275 * Multiply the number of pages to extend by the number of waiters. Do
276 * this even if we're not using the FSM, as it still relieves
277 * contention, by deferring the next time this backend needs to
278 * extend. In that case the extended pages will be found via
279 * bistate->next_free.
280 */
281 extend_by_pages += extend_by_pages * waitcount;
282
283 /* ---
284 * If we previously extended using the same bistate, it's very likely
285 * we'll extend some more. Try to extend by as many pages as
286 * before. This can be important for performance for several reasons,
287 * including:
288 *
289 * - It prevents mdzeroextend() switching between extending the
290 * relation in different ways, which is inefficient for some
291 * filesystems.
292 *
293 * - Contention is often intermittent. Even if we currently don't see
294 * other waiters (see above), extending by larger amounts can
295 * prevent future contention.
296 * ---
297 */
298 if (bistate)
299 extend_by_pages = Max(extend_by_pages, bistate->already_extended_by);
300
301 /*
302 * Can't extend by more than MAX_BUFFERS_TO_EXTEND_BY, we need to pin
303 * them all concurrently.
304 */
305 extend_by_pages = Min(extend_by_pages, MAX_BUFFERS_TO_EXTEND_BY);
306 }
307
308 /*
309 * How many of the extended pages should be entered into the FSM?
310 *
311 * If we have a bistate, only enter pages that we don't need ourselves
312 * into the FSM. Otherwise every other backend will immediately try to
313 * use the pages this backend needs for itself, causing unnecessary
314 * contention. If we don't have a bistate, we can't avoid the FSM.
315 *
316 * Never enter the page returned into the FSM, we'll immediately use it.
317 */
318 if (num_pages > 1 && bistate == NULL)
319 not_in_fsm_pages = 1;
320 else
321 not_in_fsm_pages = num_pages;
322
323 /* prepare to put another buffer into the bistate */
324 if (bistate && bistate->current_buf != InvalidBuffer)
325 {
326 ReleaseBuffer(bistate->current_buf);
327 bistate->current_buf = InvalidBuffer;
328 }
329
330 /*
331 * Extend the relation. We ask for the first returned page to be locked,
332 * so that we are sure that nobody has inserted into the page
333 * concurrently.
334 *
335 * With the current MAX_BUFFERS_TO_EXTEND_BY there's no danger of
336 * [auto]vacuum trying to truncate later pages as REL_TRUNCATE_MINIMUM is
337 * way larger.
338 */
339 first_block = ExtendBufferedRelBy(BMR_REL(relation), MAIN_FORKNUM,
340 bistate ? bistate->strategy : NULL,
342 extend_by_pages,
343 victim_buffers,
344 &extend_by_pages);
345 buffer = victim_buffers[0]; /* the buffer the function will return */
346 last_block = first_block + (extend_by_pages - 1);
347 Assert(first_block == BufferGetBlockNumber(buffer));
348
349 /*
350 * Relation is now extended. Initialize the page. We do this here, before
351 * potentially releasing the lock on the page, because it allows us to
352 * double check that the page contents are empty (this should never
353 * happen, but if it does we don't want to risk wiping out valid data).
354 */
355 page = BufferGetPage(buffer);
356 if (!PageIsNew(page))
357 elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
358 first_block,
359 RelationGetRelationName(relation));
360
361 PageInit(page, BufferGetPageSize(buffer), 0);
362 MarkBufferDirty(buffer);
363
364 /*
365 * If we decided to put pages into the FSM, release the buffer lock (but
366 * not pin), we don't want to do IO while holding a buffer lock. This will
367 * necessitate a bit more extensive checking in our caller.
368 */
369 if (use_fsm && not_in_fsm_pages < extend_by_pages)
370 {
372 *did_unlock = true;
373 }
374 else
375 *did_unlock = false;
376
377 /*
378 * Relation is now extended. Release pins on all buffers, except for the
379 * first (which we'll return). If we decided to put pages into the FSM,
380 * we can do that as part of the same loop.
381 */
382 for (uint32 i = 1; i < extend_by_pages; i++)
383 {
384 BlockNumber curBlock = first_block + i;
385
386 Assert(curBlock == BufferGetBlockNumber(victim_buffers[i]));
387 Assert(BlockNumberIsValid(curBlock));
388
389 ReleaseBuffer(victim_buffers[i]);
390
391 if (use_fsm && i >= not_in_fsm_pages)
392 {
393 Size freespace = BufferGetPageSize(victim_buffers[i]) -
395
396 RecordPageWithFreeSpace(relation, curBlock, freespace);
397 }
398 }
399
400 if (use_fsm && not_in_fsm_pages < extend_by_pages)
401 {
402 BlockNumber first_fsm_block = first_block + not_in_fsm_pages;
403
404 FreeSpaceMapVacuumRange(relation, first_fsm_block, last_block);
405 }
406
407 if (bistate)
408 {
409 /*
410 * Remember the additional pages we extended by, so we later can use
411 * them without looking into the FSM.
412 */
413 if (extend_by_pages > 1)
414 {
415 bistate->next_free = first_block + 1;
416 bistate->last_free = last_block;
417 }
418 else
419 {
420 bistate->next_free = InvalidBlockNumber;
421 bistate->last_free = InvalidBlockNumber;
422 }
423
424 /* maintain bistate->current_buf */
425 IncrBufferRefCount(buffer);
426 bistate->current_buf = buffer;
427 bistate->already_extended_by += extend_by_pages;
428 }
429
430 return buffer;
431#undef MAX_BUFFERS_TO_EXTEND_BY
432}
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:877
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2943
static Size BufferGetPageSize(Buffer buffer)
Definition: bufmgr.h:425
@ EB_LOCK_FIRST
Definition: bufmgr.h:87
#define BMR_REL(p_rel)
Definition: bufmgr.h:114
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
static bool PageIsNew(const PageData *page)
Definition: bufpage.h:233
#define SizeOfPageHeaderData
Definition: bufpage.h:216
PageData * Page
Definition: bufpage.h:81
#define Min(x, y)
Definition: c.h:1006
#define Max(x, y)
Definition: c.h:1000
uint32_t uint32
Definition: c.h:541
size_t Size
Definition: c.h:613
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:194
#define MAX_BUFFERS_TO_EXTEND_BY
int i
Definition: isn.c:77
int RelationExtensionLockWaiterCount(Relation relation)
Definition: lmgr.c:459
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:658
#define RelationGetRelationName(relation)
Definition: rel.h:549
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, Assert(), BlockNumberIsValid(), BMR_REL, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), BulkInsertStateData::current_buf, EB_LOCK_FIRST, elog, ERROR, ExtendBufferedRelBy(), FreeSpaceMapVacuumRange(), i, IncrBufferRefCount(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, LockBuffer(), MAIN_FORKNUM, MarkBufferDirty(), Max, MAX_BUFFERS_TO_EXTEND_BY, Min, BulkInsertStateData::next_free, PageInit(), PageIsNew(), RecordPageWithFreeSpace(), RELATION_IS_LOCAL, RelationExtensionLockWaiterCount(), RelationGetRelationName, ReleaseBuffer(), SizeOfPageHeaderData, and BulkInsertStateData::strategy.

Referenced by RelationGetBufferForTuple().

◆ RelationGetBufferForTuple()

Buffer RelationGetBufferForTuple ( Relation  relation,
Size  len,
Buffer  otherBuffer,
int  options,
BulkInsertState  bistate,
Buffer vmbuffer,
Buffer vmbuffer_other,
int  num_pages 
)

Definition at line 500 of file hio.c.

505{
506 bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
507 Buffer buffer = InvalidBuffer;
508 Page page;
509 Size nearlyEmptyFreeSpace,
510 pageFreeSpace = 0,
511 saveFreeSpace = 0,
512 targetFreeSpace = 0;
513 BlockNumber targetBlock,
514 otherBlock;
515 bool unlockedTargetBuffer;
516 bool recheckVmPins;
517
518 len = MAXALIGN(len); /* be conservative */
519
520 /* if the caller doesn't know by how many pages to extend, extend by 1 */
521 if (num_pages <= 0)
522 num_pages = 1;
523
524 /* Bulk insert is not supported for updates, only inserts. */
525 Assert(otherBuffer == InvalidBuffer || !bistate);
526
527 /*
528 * If we're gonna fail for oversize tuple, do it right away
529 */
530 if (len > MaxHeapTupleSize)
532 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
533 errmsg("row is too big: size %zu, maximum size %zu",
535
536 /* Compute desired extra freespace due to fillfactor option */
537 saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
539
540 /*
541 * Since pages without tuples can still have line pointers, we consider
542 * pages "empty" when the unavailable space is slight. This threshold is
543 * somewhat arbitrary, but it should prevent most unnecessary relation
544 * extensions while inserting large tuples into low-fillfactor tables.
545 */
546 nearlyEmptyFreeSpace = MaxHeapTupleSize -
547 (MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData));
548 if (len + saveFreeSpace > nearlyEmptyFreeSpace)
549 targetFreeSpace = Max(len, nearlyEmptyFreeSpace);
550 else
551 targetFreeSpace = len + saveFreeSpace;
552
553 if (otherBuffer != InvalidBuffer)
554 otherBlock = BufferGetBlockNumber(otherBuffer);
555 else
556 otherBlock = InvalidBlockNumber; /* just to keep compiler quiet */
557
558 /*
559 * We first try to put the tuple on the same page we last inserted a tuple
560 * on, as cached in the BulkInsertState or relcache entry. If that
561 * doesn't work, we ask the Free Space Map to locate a suitable page.
562 * Since the FSM's info might be out of date, we have to be prepared to
563 * loop around and retry multiple times. (To ensure this isn't an infinite
564 * loop, we must update the FSM with the correct amount of free space on
565 * each page that proves not to be suitable.) If the FSM has no record of
566 * a page with enough free space, we give up and extend the relation.
567 *
568 * When use_fsm is false, we either put the tuple onto the existing target
569 * page or extend the relation.
570 */
571 if (bistate && bistate->current_buf != InvalidBuffer)
572 targetBlock = BufferGetBlockNumber(bistate->current_buf);
573 else
574 targetBlock = RelationGetTargetBlock(relation);
575
576 if (targetBlock == InvalidBlockNumber && use_fsm)
577 {
578 /*
579 * We have no cached target page, so ask the FSM for an initial
580 * target.
581 */
582 targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
583 }
584
585 /*
586 * If the FSM knows nothing of the rel, try the last page before we give
587 * up and extend. This avoids one-tuple-per-page syndrome during
588 * bootstrapping or in a recently-started system.
589 */
590 if (targetBlock == InvalidBlockNumber)
591 {
592 BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
593
594 if (nblocks > 0)
595 targetBlock = nblocks - 1;
596 }
597
598loop:
599 while (targetBlock != InvalidBlockNumber)
600 {
601 /*
602 * Read and exclusive-lock the target block, as well as the other
603 * block if one was given, taking suitable care with lock ordering and
604 * the possibility they are the same block.
605 *
606 * If the page-level all-visible flag is set, caller will need to
607 * clear both that and the corresponding visibility map bit. However,
608 * by the time we return, we'll have x-locked the buffer, and we don't
609 * want to do any I/O while in that state. So we check the bit here
610 * before taking the lock, and pin the page if it appears necessary.
611 * Checking without the lock creates a risk of getting the wrong
612 * answer, so we'll have to recheck after acquiring the lock.
613 */
614 if (otherBuffer == InvalidBuffer)
615 {
616 /* easy case */
617 buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
618 if (PageIsAllVisible(BufferGetPage(buffer)))
619 visibilitymap_pin(relation, targetBlock, vmbuffer);
620
621 /*
622 * If the page is empty, pin vmbuffer to set all_frozen bit later.
623 */
624 if ((options & HEAP_INSERT_FROZEN) &&
626 visibilitymap_pin(relation, targetBlock, vmbuffer);
627
629 }
630 else if (otherBlock == targetBlock)
631 {
632 /* also easy case */
633 buffer = otherBuffer;
634 if (PageIsAllVisible(BufferGetPage(buffer)))
635 visibilitymap_pin(relation, targetBlock, vmbuffer);
637 }
638 else if (otherBlock < targetBlock)
639 {
640 /* lock other buffer first */
641 buffer = ReadBuffer(relation, targetBlock);
642 if (PageIsAllVisible(BufferGetPage(buffer)))
643 visibilitymap_pin(relation, targetBlock, vmbuffer);
644 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
646 }
647 else
648 {
649 /* lock target buffer first */
650 buffer = ReadBuffer(relation, targetBlock);
651 if (PageIsAllVisible(BufferGetPage(buffer)))
652 visibilitymap_pin(relation, targetBlock, vmbuffer);
654 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
655 }
656
657 /*
658 * We now have the target page (and the other buffer, if any) pinned
659 * and locked. However, since our initial PageIsAllVisible checks
660 * were performed before acquiring the lock, the results might now be
661 * out of date, either for the selected victim buffer, or for the
662 * other buffer passed by the caller. In that case, we'll need to
663 * give up our locks, go get the pin(s) we failed to get earlier, and
664 * re-lock. That's pretty painful, but hopefully shouldn't happen
665 * often.
666 *
667 * Note that there's a small possibility that we didn't pin the page
668 * above but still have the correct page pinned anyway, either because
669 * we've already made a previous pass through this loop, or because
670 * caller passed us the right page anyway.
671 *
672 * Note also that it's possible that by the time we get the pin and
673 * retake the buffer locks, the visibility map bit will have been
674 * cleared by some other backend anyway. In that case, we'll have
675 * done a bit of extra work for no gain, but there's no real harm
676 * done.
677 */
678 GetVisibilityMapPins(relation, buffer, otherBuffer,
679 targetBlock, otherBlock, vmbuffer,
680 vmbuffer_other);
681
682 /*
683 * Now we can check to see if there's enough free space here. If so,
684 * we're done.
685 */
686 page = BufferGetPage(buffer);
687
688 /*
689 * If necessary initialize page, it'll be used soon. We could avoid
690 * dirtying the buffer here, and rely on the caller to do so whenever
691 * it puts a tuple onto the page, but there seems not much benefit in
692 * doing so.
693 */
694 if (PageIsNew(page))
695 {
696 PageInit(page, BufferGetPageSize(buffer), 0);
697 MarkBufferDirty(buffer);
698 }
699
700 pageFreeSpace = PageGetHeapFreeSpace(page);
701 if (targetFreeSpace <= pageFreeSpace)
702 {
703 /* use this page as future insert target, too */
704 RelationSetTargetBlock(relation, targetBlock);
705 return buffer;
706 }
707
708 /*
709 * Not enough space, so we must give up our page locks and pin (if
710 * any) and prepare to look elsewhere. We don't care which order we
711 * unlock the two buffers in, so this can be slightly simpler than the
712 * code above.
713 */
715 if (otherBuffer == InvalidBuffer)
716 ReleaseBuffer(buffer);
717 else if (otherBlock != targetBlock)
718 {
719 LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
720 ReleaseBuffer(buffer);
721 }
722
723 /* Is there an ongoing bulk extension? */
724 if (bistate && bistate->next_free != InvalidBlockNumber)
725 {
726 Assert(bistate->next_free <= bistate->last_free);
727
728 /*
729 * We bulk extended the relation before, and there are still some
730 * unused pages from that extension, so we don't need to look in
731 * the FSM for a new page. But do record the free space from the
732 * last page, somebody might insert narrower tuples later.
733 */
734 if (use_fsm)
735 RecordPageWithFreeSpace(relation, targetBlock, pageFreeSpace);
736
737 targetBlock = bistate->next_free;
738 if (bistate->next_free >= bistate->last_free)
739 {
740 bistate->next_free = InvalidBlockNumber;
741 bistate->last_free = InvalidBlockNumber;
742 }
743 else
744 bistate->next_free++;
745 }
746 else if (!use_fsm)
747 {
748 /* Without FSM, always fall out of the loop and extend */
749 break;
750 }
751 else
752 {
753 /*
754 * Update FSM as to condition of this page, and ask for another
755 * page to try.
756 */
757 targetBlock = RecordAndGetPageWithFreeSpace(relation,
758 targetBlock,
759 pageFreeSpace,
760 targetFreeSpace);
761 }
762 }
763
764 /* Have to extend the relation */
765 buffer = RelationAddBlocks(relation, bistate, num_pages, use_fsm,
766 &unlockedTargetBuffer);
767
768 targetBlock = BufferGetBlockNumber(buffer);
769 page = BufferGetPage(buffer);
770
771 /*
772 * The page is empty, pin vmbuffer to set all_frozen bit. We don't want to
773 * do IO while the buffer is locked, so we unlock the page first if IO is
774 * needed (necessitating checks below).
775 */
777 {
778 Assert(PageGetMaxOffsetNumber(page) == 0);
779
780 if (!visibilitymap_pin_ok(targetBlock, *vmbuffer))
781 {
782 if (!unlockedTargetBuffer)
784 unlockedTargetBuffer = true;
785 visibilitymap_pin(relation, targetBlock, vmbuffer);
786 }
787 }
788
789 /*
790 * Reacquire locks if necessary.
791 *
792 * If the target buffer was unlocked above, or is unlocked while
793 * reacquiring the lock on otherBuffer below, it's unlikely, but possible,
794 * that another backend used space on this page. We check for that below,
795 * and retry if necessary.
796 */
797 recheckVmPins = false;
798 if (unlockedTargetBuffer)
799 {
800 /* released lock on target buffer above */
801 if (otherBuffer != InvalidBuffer)
802 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
804 recheckVmPins = true;
805 }
806 else if (otherBuffer != InvalidBuffer)
807 {
808 /*
809 * We did not release the target buffer, and otherBuffer is valid,
810 * need to lock the other buffer. It's guaranteed to be of a lower
811 * page number than the new page. To conform with the deadlock
812 * prevent rules, we ought to lock otherBuffer first, but that would
813 * give other backends a chance to put tuples on our page. To reduce
814 * the likelihood of that, attempt to lock the other buffer
815 * conditionally, that's very likely to work.
816 *
817 * Alternatively, we could acquire the lock on otherBuffer before
818 * extending the relation, but that'd require holding the lock while
819 * performing IO, which seems worse than an unlikely retry.
820 */
821 Assert(otherBuffer != buffer);
822 Assert(targetBlock > otherBlock);
823
824 if (unlikely(!ConditionalLockBuffer(otherBuffer)))
825 {
826 unlockedTargetBuffer = true;
828 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
830 }
831 recheckVmPins = true;
832 }
833
834 /*
835 * If one of the buffers was unlocked (always the case if otherBuffer is
836 * valid), it's possible, although unlikely, that an all-visible flag
837 * became set. We can use GetVisibilityMapPins to deal with that. It's
838 * possible that GetVisibilityMapPins() might need to temporarily release
839 * buffer locks, in which case we'll need to check if there's still enough
840 * space on the page below.
841 */
842 if (recheckVmPins)
843 {
844 if (GetVisibilityMapPins(relation, otherBuffer, buffer,
845 otherBlock, targetBlock, vmbuffer_other,
846 vmbuffer))
847 unlockedTargetBuffer = true;
848 }
849
850 /*
851 * If the target buffer was temporarily unlocked since the relation
852 * extension, it's possible, although unlikely, that all the space on the
853 * page was already used. If so, we just retry from the start. If we
854 * didn't unlock, something has gone wrong if there's not enough space -
855 * the test at the top should have prevented reaching this case.
856 */
857 pageFreeSpace = PageGetHeapFreeSpace(page);
858 if (len > pageFreeSpace)
859 {
860 if (unlockedTargetBuffer)
861 {
862 if (otherBuffer != InvalidBuffer)
863 LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
864 UnlockReleaseBuffer(buffer);
865
866 goto loop;
867 }
868 elog(PANIC, "tuple is too big: size %zu", len);
869 }
870
871 /*
872 * Remember the new page as our target for future insertions.
873 *
874 * XXX should we enter the new page into the free space map immediately,
875 * or just keep it for this backend's exclusive use in the short run
876 * (until VACUUM sees it)? Seems to depend on whether you expect the
877 * current backend to make more insertions or not, which is probably a
878 * good bet most of the time. So for now, don't add it to FSM yet.
879 */
880 RelationSetTargetBlock(relation, targetBlock);
881
882 return buffer;
883}
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5630
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5383
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:294
@ RBM_NORMAL
Definition: bufmgr.h:46
Size PageGetHeapFreeSpace(const PageData *page)
Definition: bufpage.c:990
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:371
#define MAXALIGN(LEN)
Definition: c.h:813
#define unlikely(x)
Definition: c.h:407
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:150
BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded)
Definition: freespace.c:154
BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
Definition: freespace.c:137
#define HEAP_INSERT_SKIP_FSM
Definition: heapam.h:37
#define HEAP_INSERT_FROZEN
Definition: heapam.h:38
static Buffer RelationAddBlocks(Relation relation, BulkInsertState bistate, int num_pages, bool use_fsm, bool *did_unlock)
Definition: hio.c:236
static bool GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, BlockNumber block1, BlockNumber block2, Buffer *vmbuffer1, Buffer *vmbuffer2)
Definition: hio.c:138
static Buffer ReadBufferBI(Relation relation, BlockNumber targetBlock, ReadBufferMode mode, BulkInsertState bistate)
Definition: hio.c:86
#define MaxHeapTuplesPerPage
Definition: htup_details.h:624
#define MaxHeapTupleSize
Definition: htup_details.h:610
struct ItemIdData ItemIdData
const void size_t len
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:390
#define RelationGetTargetBlock(relation)
Definition: rel.h:611
#define RelationSetTargetBlock(relation, targblock)
Definition: rel.h:618
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:361

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferGetPageSize(), ConditionalLockBuffer(), BulkInsertStateData::current_buf, elog, ereport, errcode(), errmsg(), ERROR, GetPageWithFreeSpace(), GetVisibilityMapPins(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_SKIP_FSM, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, len, LockBuffer(), MarkBufferDirty(), Max, MAXALIGN, MaxHeapTupleSize, MaxHeapTuplesPerPage, BulkInsertStateData::next_free, PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageInit(), PageIsAllVisible(), PageIsNew(), PANIC, RBM_NORMAL, ReadBuffer(), ReadBufferBI(), RecordAndGetPageWithFreeSpace(), RecordPageWithFreeSpace(), RelationAddBlocks(), RelationGetNumberOfBlocks, RelationGetTargetBlock, RelationGetTargetPageFreeSpace, RelationSetTargetBlock, ReleaseBuffer(), unlikely, UnlockReleaseBuffer(), visibilitymap_pin(), and visibilitymap_pin_ok().

Referenced by heap_insert(), heap_multi_insert(), and heap_update().

◆ RelationPutHeapTuple()

void RelationPutHeapTuple ( Relation  relation,
Buffer  buffer,
HeapTuple  tuple,
bool  token 
)

Definition at line 35 of file hio.c.

39{
40 Page pageHeader;
41 OffsetNumber offnum;
42
43 /*
44 * A tuple that's being inserted speculatively should already have its
45 * token set.
46 */
48
49 /*
50 * Do not allow tuples with invalid combinations of hint bits to be placed
51 * on a page. This combination is detected as corruption by the
52 * contrib/amcheck logic, so if you disable this assertion, make
53 * corresponding changes there.
54 */
57
58 /* Add the tuple to the page */
59 pageHeader = BufferGetPage(buffer);
60
61 offnum = PageAddItem(pageHeader, tuple->t_data, tuple->t_len, InvalidOffsetNumber, false, true);
62 if (offnum == InvalidOffsetNumber)
63 elog(PANIC, "failed to add tuple to page");
64
65 /* Update tuple->t_self to the actual position where it was stored */
66 ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
67
68 /*
69 * Insert the correct position into CTID of the stored tuple, too (unless
70 * this is a speculative insertion, in which case the token is held in
71 * CTID field instead)
72 */
73 if (!token)
74 {
75 ItemId itemId = PageGetItemId(pageHeader, offnum);
76 HeapTupleHeader item = (HeapTupleHeader) PageGetItem(pageHeader, itemId);
77
78 item->t_ctid = tuple->t_self;
79 }
80}
static void * PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:353
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
Definition: htup_details.h:461
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
ItemPointerData t_ctid
Definition: htup_details.h:161

References Assert(), BufferGetBlockNumber(), BufferGetPage(), elog, HEAP_XMAX_COMMITTED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderIsSpeculative(), InvalidOffsetNumber, ItemPointerSet(), PageAddItem, PageGetItem(), PageGetItemId(), PANIC, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, and HeapTupleData::t_self.

Referenced by heap_insert(), heap_multi_insert(), and heap_update().