PostgreSQL Source Code git master
bufmgr.h File Reference
#include "port/pg_iovec.h"
#include "storage/aio_types.h"
#include "storage/block.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "storage/relfilelocator.h"
#include "utils/relcache.h"
#include "utils/snapmgr.h"
Include dependency graph for bufmgr.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  PrefetchBufferResult
 
struct  BufferManagerRelation
 
struct  ReadBuffersOperation
 

Macros

#define BMR_REL(p_rel)    ((BufferManagerRelation){.rel = p_rel})
 
#define BMR_SMGR(p_smgr, p_relpersistence)    ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence})
 
#define BMR_GET_SMGR(bmr)    (RelationIsValid((bmr).rel) ? RelationGetSmgr((bmr).rel) : (bmr).smgr)
 
#define READ_BUFFERS_ZERO_ON_ERROR   (1 << 0)
 
#define READ_BUFFERS_ISSUE_ADVICE   (1 << 1)
 
#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES   (1 << 2)
 
#define READ_BUFFERS_SYNCHRONOUSLY   (1 << 3)
 
#define DEFAULT_EFFECTIVE_IO_CONCURRENCY   16
 
#define DEFAULT_MAINTENANCE_IO_CONCURRENCY   16
 
#define MAX_IO_COMBINE_LIMIT   PG_IOV_MAX
 
#define DEFAULT_IO_COMBINE_LIMIT   Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)
 
#define MAX_IO_CONCURRENCY   1000
 
#define P_NEW   InvalidBlockNumber /* grow the file to get a new page */
 
#define RelationGetNumberOfBlocks(reln)    RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
 

Typedefs

typedef void * Block
 
typedef enum BufferAccessStrategyType BufferAccessStrategyType
 
typedef struct PrefetchBufferResult PrefetchBufferResult
 
typedef enum ExtendBufferedFlags ExtendBufferedFlags
 
typedef struct SMgrRelationDataSMgrRelation
 
typedef struct BufferManagerRelation BufferManagerRelation
 
typedef struct ReadBuffersOperation ReadBuffersOperation
 
typedef struct WritebackContext WritebackContext
 
typedef enum BufferLockMode BufferLockMode
 

Enumerations

enum  BufferAccessStrategyType { BAS_NORMAL , BAS_BULKREAD , BAS_BULKWRITE , BAS_VACUUM }
 
enum  ReadBufferMode {
  RBM_NORMAL , RBM_ZERO_AND_LOCK , RBM_ZERO_AND_CLEANUP_LOCK , RBM_ZERO_ON_ERROR ,
  RBM_NORMAL_NO_LOG
}
 
enum  ExtendBufferedFlags {
  EB_SKIP_EXTENSION_LOCK = (1 << 0) , EB_PERFORMING_RECOVERY = (1 << 1) , EB_CREATE_FORK_IF_NEEDED = (1 << 2) , EB_LOCK_FIRST = (1 << 3) ,
  EB_CLEAR_SIZE_CACHE = (1 << 4) , EB_LOCK_TARGET = (1 << 5)
}
 
enum  BufferLockMode { BUFFER_LOCK_UNLOCK , BUFFER_LOCK_SHARE , BUFFER_LOCK_EXCLUSIVE }
 

Functions

PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
bool BufferIsLockedByMe (Buffer buffer)
 
bool BufferIsLockedByMeInMode (Buffer buffer, BufferLockMode mode)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
void InitBufferManagerAccess (void)
 
void AtEOXact_Buffers (bool isCommit)
 
char * DebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
void FlushOneBuffer (Buffer buffer)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, BufferLockMode mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool BgBufferSync (WritebackContext *wb_context)
 
uint32 GetPinLimit (void)
 
uint32 GetLocalPinLimit (void)
 
uint32 GetAdditionalPinLimit (void)
 
uint32 GetAdditionalLocalPinLimit (void)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
void LimitAdditionalLocalPins (uint32 *additional_pins)
 
bool EvictUnpinnedBuffer (Buffer buf, bool *buffer_flushed)
 
void EvictAllUnpinnedBuffers (int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
void EvictRelUnpinnedBuffers (Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
bool MarkDirtyUnpinnedBuffer (Buffer buf, bool *buffer_already_dirty)
 
void MarkDirtyRelUnpinnedBuffers (Relation rel, int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
void MarkDirtyAllUnpinnedBuffers (int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
void BufferManagerShmemInit (void)
 
Size BufferManagerShmemSize (void)
 
void AtProcExit_LocalBuffers (void)
 
BufferAccessStrategy GetAccessStrategy (BufferAccessStrategyType btype)
 
BufferAccessStrategy GetAccessStrategyWithSize (BufferAccessStrategyType btype, int ring_size_kb)
 
int GetAccessStrategyBufferCount (BufferAccessStrategy strategy)
 
int GetAccessStrategyPinLimit (BufferAccessStrategy strategy)
 
void FreeAccessStrategy (BufferAccessStrategy strategy)
 
static bool BufferIsValid (Buffer bufnum)
 
static Block BufferGetBlock (Buffer buffer)
 
static Size BufferGetPageSize (Buffer buffer)
 
static Page BufferGetPage (Buffer buffer)
 

Variables

PGDLLIMPORT int NBuffers
 
PGDLLIMPORT bool zero_damaged_pages
 
PGDLLIMPORT int bgwriter_lru_maxpages
 
PGDLLIMPORT double bgwriter_lru_multiplier
 
PGDLLIMPORT bool track_io_timing
 
PGDLLIMPORT int effective_io_concurrency
 
PGDLLIMPORT int maintenance_io_concurrency
 
PGDLLIMPORT int io_combine_limit
 
PGDLLIMPORT int io_combine_limit_guc
 
PGDLLIMPORT int io_max_combine_limit
 
PGDLLIMPORT int checkpoint_flush_after
 
PGDLLIMPORT int backend_flush_after
 
PGDLLIMPORT int bgwriter_flush_after
 
PGDLLIMPORT const PgAioHandleCallbacks aio_shared_buffer_readv_cb
 
PGDLLIMPORT const PgAioHandleCallbacks aio_local_buffer_readv_cb
 
PGDLLIMPORT char * BufferBlocks
 
PGDLLIMPORT int NLocBuffer
 
PGDLLIMPORT BlockLocalBufferBlockPointers
 
PGDLLIMPORT int32LocalRefCount
 

Macro Definition Documentation

◆ BMR_GET_SMGR

#define BMR_GET_SMGR (   bmr)     (RelationIsValid((bmr).rel) ? RelationGetSmgr((bmr).rel) : (bmr).smgr)

Definition at line 118 of file bufmgr.h.

◆ BMR_REL

#define BMR_REL (   p_rel)     ((BufferManagerRelation){.rel = p_rel})

Definition at line 114 of file bufmgr.h.

◆ BMR_SMGR

#define BMR_SMGR (   p_smgr,
  p_relpersistence 
)     ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence})

Definition at line 116 of file bufmgr.h.

◆ DEFAULT_EFFECTIVE_IO_CONCURRENCY

#define DEFAULT_EFFECTIVE_IO_CONCURRENCY   16

Definition at line 168 of file bufmgr.h.

◆ DEFAULT_IO_COMBINE_LIMIT

#define DEFAULT_IO_COMBINE_LIMIT   Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)

Definition at line 174 of file bufmgr.h.

◆ DEFAULT_MAINTENANCE_IO_CONCURRENCY

#define DEFAULT_MAINTENANCE_IO_CONCURRENCY   16

Definition at line 169 of file bufmgr.h.

◆ MAX_IO_COMBINE_LIMIT

#define MAX_IO_COMBINE_LIMIT   PG_IOV_MAX

Definition at line 173 of file bufmgr.h.

◆ MAX_IO_CONCURRENCY

#define MAX_IO_CONCURRENCY   1000

Definition at line 195 of file bufmgr.h.

◆ P_NEW

#define P_NEW   InvalidBlockNumber /* grow the file to get a new page */

Definition at line 198 of file bufmgr.h.

◆ READ_BUFFERS_IGNORE_CHECKSUM_FAILURES

#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES   (1 << 2)

Definition at line 126 of file bufmgr.h.

◆ READ_BUFFERS_ISSUE_ADVICE

#define READ_BUFFERS_ISSUE_ADVICE   (1 << 1)

Definition at line 124 of file bufmgr.h.

◆ READ_BUFFERS_SYNCHRONOUSLY

#define READ_BUFFERS_SYNCHRONOUSLY   (1 << 3)

Definition at line 128 of file bufmgr.h.

◆ READ_BUFFERS_ZERO_ON_ERROR

#define READ_BUFFERS_ZERO_ON_ERROR   (1 << 0)

Definition at line 122 of file bufmgr.h.

◆ RelationGetNumberOfBlocks

#define RelationGetNumberOfBlocks (   reln)     RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)

Definition at line 294 of file bufmgr.h.

Typedef Documentation

◆ Block

typedef void* Block

Definition at line 26 of file bufmgr.h.

◆ BufferAccessStrategyType

◆ BufferLockMode

◆ BufferManagerRelation

◆ ExtendBufferedFlags

◆ PrefetchBufferResult

◆ ReadBuffersOperation

Definition at line 154 of file bufmgr.h.

◆ SMgrRelation

typedef struct SMgrRelationData* SMgrRelation

Definition at line 97 of file bufmgr.h.

◆ WritebackContext

Definition at line 157 of file bufmgr.h.

Enumeration Type Documentation

◆ BufferAccessStrategyType

Enumerator
BAS_NORMAL 
BAS_BULKREAD 
BAS_BULKWRITE 
BAS_VACUUM 

Definition at line 34 of file bufmgr.h.

35{
36 BAS_NORMAL, /* Normal random access */
37 BAS_BULKREAD, /* Large read-only scan (hint bit updates are
38 * ok) */
39 BAS_BULKWRITE, /* Large multi-block write (e.g. COPY IN) */
40 BAS_VACUUM, /* VACUUM */
BufferAccessStrategyType
Definition: bufmgr.h:35
@ BAS_BULKREAD
Definition: bufmgr.h:37
@ BAS_NORMAL
Definition: bufmgr.h:36
@ BAS_VACUUM
Definition: bufmgr.h:40
@ BAS_BULKWRITE
Definition: bufmgr.h:39

◆ BufferLockMode

Enumerator
BUFFER_LOCK_UNLOCK 
BUFFER_LOCK_SHARE 
BUFFER_LOCK_EXCLUSIVE 

Definition at line 203 of file bufmgr.h.

204{
BufferLockMode
Definition: bufmgr.h:204
@ BUFFER_LOCK_SHARE
Definition: bufmgr.h:206
@ BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:207
@ BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:205

◆ ExtendBufferedFlags

Enumerator
EB_SKIP_EXTENSION_LOCK 
EB_PERFORMING_RECOVERY 
EB_CREATE_FORK_IF_NEEDED 
EB_LOCK_FIRST 
EB_CLEAR_SIZE_CACHE 
EB_LOCK_TARGET 

Definition at line 68 of file bufmgr.h.

69{
70 /*
71 * Don't acquire extension lock. This is safe only if the relation isn't
72 * shared, an access exclusive lock is held or if this is the startup
73 * process.
74 */
75 EB_SKIP_EXTENSION_LOCK = (1 << 0),
76
77 /* Is this extension part of recovery? */
78 EB_PERFORMING_RECOVERY = (1 << 1),
79
80 /*
81 * Should the fork be created if it does not currently exist? This likely
82 * only ever makes sense for relation forks.
83 */
84 EB_CREATE_FORK_IF_NEEDED = (1 << 2),
85
86 /* Should the first (possibly only) return buffer be returned locked? */
87 EB_LOCK_FIRST = (1 << 3),
88
89 /* Should the smgr size cache be cleared? */
90 EB_CLEAR_SIZE_CACHE = (1 << 4),
91
92 /* internal flags follow */
93 EB_LOCK_TARGET = (1 << 5),
ExtendBufferedFlags
Definition: bufmgr.h:69
@ EB_LOCK_TARGET
Definition: bufmgr.h:93
@ EB_CLEAR_SIZE_CACHE
Definition: bufmgr.h:90
@ EB_PERFORMING_RECOVERY
Definition: bufmgr.h:78
@ EB_CREATE_FORK_IF_NEEDED
Definition: bufmgr.h:84
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:75
@ EB_LOCK_FIRST
Definition: bufmgr.h:87

◆ ReadBufferMode

Enumerator
RBM_NORMAL 
RBM_ZERO_AND_LOCK 
RBM_ZERO_AND_CLEANUP_LOCK 
RBM_ZERO_ON_ERROR 
RBM_NORMAL_NO_LOG 

Definition at line 44 of file bufmgr.h.

45{
46 RBM_NORMAL, /* Normal read */
47 RBM_ZERO_AND_LOCK, /* Don't read from disk, caller will
48 * initialize. Also locks the page. */
49 RBM_ZERO_AND_CLEANUP_LOCK, /* Like RBM_ZERO_AND_LOCK, but locks the page
50 * in "cleanup" mode */
51 RBM_ZERO_ON_ERROR, /* Read, but return an all-zeros page on error */
52 RBM_NORMAL_NO_LOG, /* Don't log page as invalid during WAL
53 * replay; otherwise same as RBM_NORMAL */
ReadBufferMode
Definition: bufmgr.h:45
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:51
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:49
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:47
@ RBM_NORMAL
Definition: bufmgr.h:46
@ RBM_NORMAL_NO_LOG
Definition: bufmgr.h:52

Function Documentation

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 3990 of file bufmgr.c.

3991{
3993
3994 AtEOXact_LocalBuffers(isCommit);
3995
3997}
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:4059
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:217
Assert(PointerIsAligned(start, uint64))
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:1003

References Assert(), AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_LocalBuffers()

void AtProcExit_LocalBuffers ( void  )

Definition at line 1014 of file localbuf.c.

1015{
1016 /*
1017 * We shouldn't be holding any remaining pins; if we are, and assertions
1018 * aren't enabled, we'll fail later in DropRelationBuffers while trying to
1019 * drop the temp rels.
1020 */
1022}
static void CheckForLocalBufferLeaks(void)
Definition: localbuf.c:970

References CheckForLocalBufferLeaks().

Referenced by AtProcExit_Buffers().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3622 of file bufmgr.c.

3623{
3624 /* info obtained from freelist.c */
3625 int strategy_buf_id;
3626 uint32 strategy_passes;
3627 uint32 recent_alloc;
3628
3629 /*
3630 * Information saved between calls so we can determine the strategy
3631 * point's advance rate and avoid scanning already-cleaned buffers.
3632 */
3633 static bool saved_info_valid = false;
3634 static int prev_strategy_buf_id;
3635 static uint32 prev_strategy_passes;
3636 static int next_to_clean;
3637 static uint32 next_passes;
3638
3639 /* Moving averages of allocation rate and clean-buffer density */
3640 static float smoothed_alloc = 0;
3641 static float smoothed_density = 10.0;
3642
3643 /* Potentially these could be tunables, but for now, not */
3644 float smoothing_samples = 16;
3645 float scan_whole_pool_milliseconds = 120000.0;
3646
3647 /* Used to compute how far we scan ahead */
3648 long strategy_delta;
3649 int bufs_to_lap;
3650 int bufs_ahead;
3651 float scans_per_alloc;
3652 int reusable_buffers_est;
3653 int upcoming_alloc_est;
3654 int min_scan_buffers;
3655
3656 /* Variables for the scanning loop proper */
3657 int num_to_scan;
3658 int num_written;
3659 int reusable_buffers;
3660
3661 /* Variables for final smoothed_density update */
3662 long new_strategy_delta;
3663 uint32 new_recent_alloc;
3664
3665 /*
3666 * Find out where the clock-sweep currently is, and how many buffer
3667 * allocations have happened since our last call.
3668 */
3669 strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
3670
3671 /* Report buffer alloc counts to pgstat */
3672 PendingBgWriterStats.buf_alloc += recent_alloc;
3673
3674 /*
3675 * If we're not running the LRU scan, just stop after doing the stats
3676 * stuff. We mark the saved state invalid so that we can recover sanely
3677 * if LRU scan is turned back on later.
3678 */
3679 if (bgwriter_lru_maxpages <= 0)
3680 {
3681 saved_info_valid = false;
3682 return true;
3683 }
3684
3685 /*
3686 * Compute strategy_delta = how many buffers have been scanned by the
3687 * clock-sweep since last time. If first time through, assume none. Then
3688 * see if we are still ahead of the clock-sweep, and if so, how many
3689 * buffers we could scan before we'd catch up with it and "lap" it. Note:
3690 * weird-looking coding of xxx_passes comparisons are to avoid bogus
3691 * behavior when the passes counts wrap around.
3692 */
3693 if (saved_info_valid)
3694 {
3695 int32 passes_delta = strategy_passes - prev_strategy_passes;
3696
3697 strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3698 strategy_delta += (long) passes_delta * NBuffers;
3699
3700 Assert(strategy_delta >= 0);
3701
3702 if ((int32) (next_passes - strategy_passes) > 0)
3703 {
3704 /* we're one pass ahead of the strategy point */
3705 bufs_to_lap = strategy_buf_id - next_to_clean;
3706#ifdef BGW_DEBUG
3707 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3708 next_passes, next_to_clean,
3709 strategy_passes, strategy_buf_id,
3710 strategy_delta, bufs_to_lap);
3711#endif
3712 }
3713 else if (next_passes == strategy_passes &&
3714 next_to_clean >= strategy_buf_id)
3715 {
3716 /* on same pass, but ahead or at least not behind */
3717 bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
3718#ifdef BGW_DEBUG
3719 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3720 next_passes, next_to_clean,
3721 strategy_passes, strategy_buf_id,
3722 strategy_delta, bufs_to_lap);
3723#endif
3724 }
3725 else
3726 {
3727 /*
3728 * We're behind, so skip forward to the strategy point and start
3729 * cleaning from there.
3730 */
3731#ifdef BGW_DEBUG
3732 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3733 next_passes, next_to_clean,
3734 strategy_passes, strategy_buf_id,
3735 strategy_delta);
3736#endif
3737 next_to_clean = strategy_buf_id;
3738 next_passes = strategy_passes;
3739 bufs_to_lap = NBuffers;
3740 }
3741 }
3742 else
3743 {
3744 /*
3745 * Initializing at startup or after LRU scanning had been off. Always
3746 * start at the strategy point.
3747 */
3748#ifdef BGW_DEBUG
3749 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3750 strategy_passes, strategy_buf_id);
3751#endif
3752 strategy_delta = 0;
3753 next_to_clean = strategy_buf_id;
3754 next_passes = strategy_passes;
3755 bufs_to_lap = NBuffers;
3756 }
3757
3758 /* Update saved info for next time */
3759 prev_strategy_buf_id = strategy_buf_id;
3760 prev_strategy_passes = strategy_passes;
3761 saved_info_valid = true;
3762
3763 /*
3764 * Compute how many buffers had to be scanned for each new allocation, ie,
3765 * 1/density of reusable buffers, and track a moving average of that.
3766 *
3767 * If the strategy point didn't move, we don't update the density estimate
3768 */
3769 if (strategy_delta > 0 && recent_alloc > 0)
3770 {
3771 scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
3772 smoothed_density += (scans_per_alloc - smoothed_density) /
3773 smoothing_samples;
3774 }
3775
3776 /*
3777 * Estimate how many reusable buffers there are between the current
3778 * strategy point and where we've scanned ahead to, based on the smoothed
3779 * density estimate.
3780 */
3781 bufs_ahead = NBuffers - bufs_to_lap;
3782 reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3783
3784 /*
3785 * Track a moving average of recent buffer allocations. Here, rather than
3786 * a true average we want a fast-attack, slow-decline behavior: we
3787 * immediately follow any increase.
3788 */
3789 if (smoothed_alloc <= (float) recent_alloc)
3790 smoothed_alloc = recent_alloc;
3791 else
3792 smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3793 smoothing_samples;
3794
3795 /* Scale the estimate by a GUC to allow more aggressive tuning. */
3796 upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
3797
3798 /*
3799 * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3800 * eventually underflow to zero, and the underflows produce annoying
3801 * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3802 * zero, there's no point in tracking smaller and smaller values of
3803 * smoothed_alloc, so just reset it to exactly zero to avoid this
3804 * syndrome. It will pop back up as soon as recent_alloc increases.
3805 */
3806 if (upcoming_alloc_est == 0)
3807 smoothed_alloc = 0;
3808
3809 /*
3810 * Even in cases where there's been little or no buffer allocation
3811 * activity, we want to make a small amount of progress through the buffer
3812 * cache so that as many reusable buffers as possible are clean after an
3813 * idle period.
3814 *
3815 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3816 * the BGW will be called during the scan_whole_pool time; slice the
3817 * buffer pool into that many sections.
3818 */
3819 min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3820
3821 if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3822 {
3823#ifdef BGW_DEBUG
3824 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3825 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3826#endif
3827 upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3828 }
3829
3830 /*
3831 * Now write out dirty reusable buffers, working forward from the
3832 * next_to_clean point, until we have lapped the strategy scan, or cleaned
3833 * enough buffers to match our estimate of the next cycle's allocation
3834 * requirements, or hit the bgwriter_lru_maxpages limit.
3835 */
3836
3837 num_to_scan = bufs_to_lap;
3838 num_written = 0;
3839 reusable_buffers = reusable_buffers_est;
3840
3841 /* Execute the LRU scan */
3842 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3843 {
3844 int sync_state = SyncOneBuffer(next_to_clean, true,
3845 wb_context);
3846
3847 if (++next_to_clean >= NBuffers)
3848 {
3849 next_to_clean = 0;
3850 next_passes++;
3851 }
3852 num_to_scan--;
3853
3854 if (sync_state & BUF_WRITTEN)
3855 {
3856 reusable_buffers++;
3857 if (++num_written >= bgwriter_lru_maxpages)
3858 {
3860 break;
3861 }
3862 }
3863 else if (sync_state & BUF_REUSABLE)
3864 reusable_buffers++;
3865 }
3866
3868
3869#ifdef BGW_DEBUG
3870 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3871 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3872 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3873 bufs_to_lap - num_to_scan,
3874 num_written,
3875 reusable_buffers - reusable_buffers_est);
3876#endif
3877
3878 /*
3879 * Consider the above scan as being like a new allocation scan.
3880 * Characterize its density and update the smoothed one based on it. This
3881 * effectively halves the moving average period in cases where both the
3882 * strategy and the background writer are doing some useful scanning,
3883 * which is helpful because a long memory isn't as desirable on the
3884 * density estimates.
3885 */
3886 new_strategy_delta = bufs_to_lap - num_to_scan;
3887 new_recent_alloc = reusable_buffers - reusable_buffers_est;
3888 if (new_strategy_delta > 0 && new_recent_alloc > 0)
3889 {
3890 scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3891 smoothed_density += (scans_per_alloc - smoothed_density) /
3892 smoothing_samples;
3893
3894#ifdef BGW_DEBUG
3895 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3896 new_recent_alloc, new_strategy_delta,
3897 scans_per_alloc, smoothed_density);
3898#endif
3899 }
3900
3901 /* Return true if OK to hibernate */
3902 return (bufs_to_lap == 0 && recent_alloc == 0);
3903}
int BgWriterDelay
Definition: bgwriter.c:58
#define BUF_REUSABLE
Definition: bufmgr.c:81
double bgwriter_lru_multiplier
Definition: bufmgr.c:146
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:3920
int bgwriter_lru_maxpages
Definition: bufmgr.c:145
#define BUF_WRITTEN
Definition: bufmgr.c:80
int32_t int32
Definition: c.h:537
uint32_t uint32
Definition: c.h:541
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:226
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:321
int NBuffers
Definition: globals.c:142
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition: pgstat.h:242
PgStat_Counter maxwritten_clean
Definition: pgstat.h:243
PgStat_Counter buf_alloc
Definition: pgstat.h:244

References Assert(), bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ BufferGetBlock()

static Block BufferGetBlock ( Buffer  buffer)
inlinestatic

Definition at line 403 of file bufmgr.h.

404{
405 Assert(BufferIsValid(buffer));
406
407 if (BufferIsLocal(buffer))
408 return LocalBufferBlockPointers[-buffer - 1];
409 else
410 return (Block) (BufferBlocks + ((Size) (buffer - 1)) * BLCKSZ);
411}
#define BufferIsLocal(buffer)
Definition: buf.h:37
PGDLLIMPORT Block * LocalBufferBlockPointers
Definition: localbuf.c:48
void * Block
Definition: bufmgr.h:26
PGDLLIMPORT char * BufferBlocks
Definition: buf_init.c:22
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:387
size_t Size
Definition: c.h:613

References Assert(), BufferBlocks, BufferIsLocal, BufferIsValid(), and LocalBufferBlockPointers.

Referenced by AsyncReadBuffers(), buffer_readv_complete_one(), BufferGetPage(), heap_inplace_update_and_unlock(), pg_buffercache_os_pages_internal(), read_rel_block_ll(), and XLogSaveBufferForHint().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 4223 of file bufmgr.c.

4224{
4225 BufferDesc *bufHdr;
4226
4227 Assert(BufferIsPinned(buffer));
4228
4229 if (BufferIsLocal(buffer))
4230 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4231 else
4232 bufHdr = GetBufferDescriptor(buffer - 1);
4233
4234 /* pinned, so OK to read tag without spinlock */
4235 return bufHdr->tag.blockNum;
4236}
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
static BufferDesc * GetBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:483
BufferTag tag
BlockNumber blockNum

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_finish_split(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), AsyncReadBuffers(), BitmapHeapScanNextBlock(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), btvacuumpage(), check_index_page(), CheckReadBuffersOperation(), collect_corrupt_items(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), gistvacuumpage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_would_be_all_visible(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_heap(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum_heap_rel(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), prune_freeze_plan(), read_stream_start_pending_read(), ReadBufferBI(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), StartReadBuffersImpl(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), verify_heapam(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and visibilitymap_set_vmbits().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 4499 of file bufmgr.c.

4500{
4501 char *page = BufferGetPage(buffer);
4502 BufferDesc *bufHdr;
4503 XLogRecPtr lsn;
4504
4505 /*
4506 * If we don't need locking for correctness, fastpath out.
4507 */
4508 if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
4509 return PageGetLSN(page);
4510
4511 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4512 Assert(BufferIsValid(buffer));
4513 Assert(BufferIsPinned(buffer));
4514
4515 bufHdr = GetBufferDescriptor(buffer - 1);
4516 LockBufHdr(bufHdr);
4517 lsn = PageGetLSN(page);
4518 UnlockBufHdr(bufHdr);
4519
4520 return lsn;
4521}
static void UnlockBufHdr(BufferDesc *desc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:6264
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
static XLogRecPtr PageGetLSN(const PageData *page)
Definition: bufpage.h:385
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_killitems(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetPage()

static Page BufferGetPage ( Buffer  buffer)
inlinestatic

Definition at line 436 of file bufmgr.h.

437{
438 return (Page) BufferGetBlock(buffer);
439}
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:403
PageData * Page
Definition: bufpage.h:81

References BufferGetBlock().

Referenced by _bt_allocbuf(), _bt_binsrch(), _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_clear_incomplete_split(), _bt_conditionallockbuf(), _bt_dedup_pass(), _bt_delete_or_dedup_one_page(), _bt_delitems_delete(), _bt_delitems_delete_check(), _bt_delitems_vacuum(), _bt_endpoint(), _bt_findinsertloc(), _bt_finish_split(), _bt_get_endpoint(), _bt_getmeta(), _bt_getroot(), _bt_getstackbuf(), _bt_gettrueroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_killitems(), _bt_leftsib_splitflag(), _bt_lock_and_validate_left(), _bt_lock_subtree_parent(), _bt_lockbuf(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_rightsib_halfdeadflag(), _bt_search(), _bt_search_insert(), _bt_set_cleanup_info(), _bt_simpledel_pass(), _bt_split(), _bt_stepright(), _bt_unlink_halfdead_page(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _bt_vacuum_needs_cleanup(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_get_newblock_from_oldbucket(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_getcachedmetap(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_init(), _hash_init_metabuffer(), _hash_initbitmapbuffer(), _hash_initbuf(), _hash_kill_items(), _hash_pgaddmultitup(), _hash_pgaddtup(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), allocNewBuffer(), BitmapHeapScanNextBlock(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_can_do_samepage_update(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_start_evacuating_page(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinSetHeapBlockItemptr(), bt_metap(), bt_page_items_internal(), bt_recheck_sibling_links(), bt_rootdescend(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), btvacuumpage(), BufferGetLSNAtomic(), check_index_page(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), collectMatchesForHeapRow(), count_nondeletable_pages(), createPostingTree(), dataBeginPlaceToPage(), dataBeginPlaceToPageInternal(), dataBeginPlaceToPageLeaf(), dataExecPlaceToPage(), dataExecPlaceToPageInternal(), dataLocateItem(), dataPlaceToPageLeafRecompress(), dataPrepareDownlink(), dataSplitPageInternal(), doPickSplit(), entryExecPlaceToPage(), entryIsEnoughSpace(), entryLoadMoreItems(), entryLocateEntry(), entryLocateLeafEntry(), entryPrepareDownlink(), entrySplitPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), generic_redo(), GenericXLogFinish(), GenericXLogRegisterBuffer(), get_raw_page_internal(), GetBTPageStatistics(), GetRecordedFreeSpace(), GetVisibilityMapPins(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginDeletePage(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), GinInitBuffer(), GinInitMetabuffer(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertData(), ginRedoInsertEntry(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumEntryPage(), ginVacuumPostingTreeLeaf(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), GISTInitBuffer(), gistkillitems(), gistMemorizeAllDownlinks(), gistNewBuffer(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_bitmap_info(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_freeze_prepared_tuples(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_index_delete_tuples(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_page_prune_execute(), heap_page_prune_opt(), heap_page_would_be_all_visible(), heap_pre_freeze_checks(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_validate_scan(), heapam_scan_analyze_next_tuple(), heapam_scan_bitmap_next_tuple(), heapam_scan_sample_next_tuple(), heapgettup_continue_page(), heapgettup_pagemode(), heapgettup_start_page(), index_compute_xid_horizon_for_tuples(), initBloomState(), lazy_scan_heap(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), log_heap_prune_and_freeze(), log_heap_update(), log_newpage_buffer(), log_newpage_range(), log_split_page(), MarkBufferDirtyHint(), modify_rel_block(), moveLeafs(), moveRightIfItNeeded(), nextval_internal(), palloc_btree_page(), pg_get_sequence_data(), pg_visibility(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), prune_freeze_plan(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistInitBuffer(), SpGistNewBuffer(), SpGistSetLastUsedPage(), SpGistUpdateMetaPage(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), terminate_brin_buildstate(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), vm_readbuf(), writeListPage(), XLogCheckBufferNeedsBackup(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), XLogRecordPageWithFreeSpace(), XLogRegisterBuffer(), XLogSaveBufferForHint(), xlogVacuumPage(), and ZeroAndLockBuffer().

◆ BufferGetPageSize()

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 4244 of file bufmgr.c.

4246{
4247 BufferDesc *bufHdr;
4248
4249 /* Do the same checks as BufferGetBlockNumber. */
4250 Assert(BufferIsPinned(buffer));
4251
4252 if (BufferIsLocal(buffer))
4253 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4254 else
4255 bufHdr = GetBufferDescriptor(buffer - 1);
4256
4257 /* pinned, so OK to read tag without spinlock */
4258 *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4259 *forknum = BufTagGetForkNum(&bufHdr->tag);
4260 *blknum = bufHdr->tag.blockNum;
4261}
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), heap_inplace_update_and_unlock(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 2911 of file bufmgr.c.

2912{
2913 BufferDesc *bufHdr;
2914
2915 Assert(BufferIsPinned(buffer));
2916
2917 if (BufferIsLocal(buffer))
2918 {
2919 int bufid = -buffer - 1;
2920
2921 bufHdr = GetLocalBufferDescriptor(bufid);
2922 /* Content locks are not maintained for local buffers. */
2923 }
2924 else
2925 {
2926 bufHdr = GetBufferDescriptor(buffer - 1);
2928 }
2929
2930 return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
2931}
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:237
#define BM_DIRTY
Definition: buf_internals.h:69
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:2869
pg_atomic_uint32 state

References Assert(), BM_DIRTY, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by heap_multi_insert(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), log_heap_prune_and_freeze(), and XLogRegisterBuffer().

◆ BufferIsLockedByMe()

bool BufferIsLockedByMe ( Buffer  buffer)

Definition at line 2843 of file bufmgr.c.

2844{
2845 BufferDesc *bufHdr;
2846
2847 Assert(BufferIsPinned(buffer));
2848
2849 if (BufferIsLocal(buffer))
2850 {
2851 /* Content locks are not maintained for local buffers. */
2852 return true;
2853 }
2854 else
2855 {
2856 bufHdr = GetBufferDescriptor(buffer - 1);
2858 }
2859}
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1977

References Assert(), PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), and LWLockHeldByMe().

Referenced by FlushOneBuffer(), and MarkBufferDirtyHint().

◆ BufferIsLockedByMeInMode()

bool BufferIsLockedByMeInMode ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 2869 of file bufmgr.c.

2870{
2871 BufferDesc *bufHdr;
2872
2873 Assert(BufferIsPinned(buffer));
2874
2875 if (BufferIsLocal(buffer))
2876 {
2877 /* Content locks are not maintained for local buffers. */
2878 return true;
2879 }
2880 else
2881 {
2882 LWLockMode lw_mode;
2883
2884 switch (mode)
2885 {
2887 lw_mode = LW_EXCLUSIVE;
2888 break;
2889 case BUFFER_LOCK_SHARE:
2890 lw_mode = LW_SHARED;
2891 break;
2892 default:
2894 }
2895
2896 bufHdr = GetBufferDescriptor(buffer - 1);
2898 lw_mode);
2899 }
2900}
#define pg_unreachable()
Definition: c.h:336
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2021
LWLockMode
Definition: lwlock.h:111
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
static PgChecksumMode mode
Definition: pg_checksums.c:56

References Assert(), PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockHeldByMeInMode(), mode, and pg_unreachable.

Referenced by BufferIsDirty(), IsBufferCleanupOK(), MarkBufferDirty(), visibilitymap_set(), visibilitymap_set_vmbits(), and XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 4469 of file bufmgr.c.

4470{
4471 BufferDesc *bufHdr;
4472
4473 /* Local buffers are used only for temp relations. */
4474 if (BufferIsLocal(buffer))
4475 return false;
4476
4477 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4478 Assert(BufferIsValid(buffer));
4479 Assert(BufferIsPinned(buffer));
4480
4481 /*
4482 * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
4483 * need not bother with the buffer header spinlock. Even if someone else
4484 * changes the buffer header state while we're doing this, the state is
4485 * changed atomically, so we'll read the old value or the new value, but
4486 * not random garbage.
4487 */
4488 bufHdr = GetBufferDescriptor(buffer - 1);
4489 return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
4490}
#define BM_PERMANENT
Definition: buf_internals.h:77

References Assert(), BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

◆ BufferIsValid()

static bool BufferIsValid ( Buffer  bufnum)
inlinestatic

Definition at line 387 of file bufmgr.h.

388{
389 Assert(bufnum <= NBuffers);
390 Assert(bufnum >= -NLocBuffer);
391
392 return bufnum != InvalidBuffer;
393}
#define InvalidBuffer
Definition: buf.h:25
PGDLLIMPORT int NBuffers
Definition: globals.c:142
PGDLLIMPORT int NLocBuffer
Definition: localbuf.c:45

References Assert(), InvalidBuffer, NBuffers, and NLocBuffer.

Referenced by _bt_clear_incomplete_split(), _bt_endpoint(), _bt_first(), _bt_get_endpoint(), _bt_insertonpg(), _bt_relandgetbuf(), _bt_search(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_dropscanbuf(), _hash_freeovflpage(), _hash_getbucketbuf_from_hashkey(), _hash_getcachedmetap(), _hash_readnext(), _hash_readpage(), _hash_readprev(), BitmapHeapScanNextBlock(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinGetTupleForHeapBlock(), brininsert(), brinsummarize(), bt_recheck_sibling_links(), bt_rootdescend(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), btvacuumscan(), buffer_readv_complete(), BufferGetBlock(), BufferGetLSNAtomic(), BufferGetPageSize(), BufferIsPermanent(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), doPickSplit(), entryGetItem(), entryLoadMoreItems(), EvictUnpinnedBuffer(), ExecStoreBufferHeapTuple(), ExecStorePinnedBufferHeapTuple(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_vacuum_page(), generic_redo(), GetPrivateRefCount(), GetPrivateRefCountEntry(), GetRecordedFreeSpace(), GetVisibilityMapPins(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoDeletePage(), ginRedoInsert(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginScanToDelete(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageUpdateRecord(), gistvacuumscan(), gistXLogSplit(), gistXLogUpdate(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_endscan(), heap_fetch_next_buffer(), heap_index_delete_tuples(), heap_inplace_lock(), heap_lock_tuple(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_analyze_next_block(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapam_tuple_satisfies_snapshot(), heapgettup(), heapgettup_continue_page(), heapgettup_pagemode(), heapgettup_start_page(), invalidate_rel_block(), IsBufferCleanupOK(), lazy_scan_heap(), lazy_vacuum_heap_rel(), log_heap_visible(), MarkBufferDirty(), MarkBufferDirtyHint(), read_stream_next_buffer(), ReadRecentBuffer(), ReleaseAndReadBuffer(), ReleaseBuffer(), ResOwnerReleaseBufferPin(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgvacuumscan(), statapprox_heap(), tts_buffer_heap_clear(), tts_buffer_heap_copyslot(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), XLogPrefetcherNextBlock(), XLogReadBufferExtended(), and XLogReadBufferForRedoExtended().

◆ BufferManagerShmemInit()

void BufferManagerShmemInit ( void  )

Definition at line 68 of file buf_init.c.

69{
70 bool foundBufs,
71 foundDescs,
72 foundIOCV,
73 foundBufCkpt;
74
75 /* Align descriptors to a cacheline boundary. */
77 ShmemInitStruct("Buffer Descriptors",
78 NBuffers * sizeof(BufferDescPadded),
79 &foundDescs);
80
81 /* Align buffer pool on IO page size boundary. */
82 BufferBlocks = (char *)
84 ShmemInitStruct("Buffer Blocks",
85 NBuffers * (Size) BLCKSZ + PG_IO_ALIGN_SIZE,
86 &foundBufs));
87
88 /* Align condition variables to cacheline boundary. */
90 ShmemInitStruct("Buffer IO Condition Variables",
92 &foundIOCV);
93
94 /*
95 * The array used to sort to-be-checkpointed buffer ids is located in
96 * shared memory, to avoid having to allocate significant amounts of
97 * memory at runtime. As that'd be in the middle of a checkpoint, or when
98 * the checkpointer is restarted, memory allocation failures would be
99 * painful.
100 */
102 ShmemInitStruct("Checkpoint BufferIds",
103 NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
104
105 if (foundDescs || foundBufs || foundIOCV || foundBufCkpt)
106 {
107 /* should find all of these, or none of them */
108 Assert(foundDescs && foundBufs && foundIOCV && foundBufCkpt);
109 /* note: this path is only taken in EXEC_BACKEND case */
110 }
111 else
112 {
113 int i;
114
115 /*
116 * Initialize all the buffer headers.
117 */
118 for (i = 0; i < NBuffers; i++)
119 {
121
122 ClearBufferTag(&buf->tag);
123
124 pg_atomic_init_u32(&buf->state, 0);
125 buf->wait_backend_pgprocno = INVALID_PROC_NUMBER;
126
127 buf->buf_id = i;
128
129 pgaio_wref_clear(&buf->io_wref);
130
132 LWTRANCHE_BUFFER_CONTENT);
133
135 }
136 }
137
138 /* Init other shared buffer-management stuff */
139 StrategyInitialize(!foundDescs);
140
141 /* Initialize per-backend file flush context */
144}
void pgaio_wref_clear(PgAioWaitRef *iow)
Definition: aio.c:964
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:219
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
char * BufferBlocks
Definition: buf_init.c:22
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
ConditionVariableMinimallyPadded * BufferIOCVArray
Definition: buf_init.c:23
BufferDescPadded * BufferDescriptors
Definition: buf_init.c:21
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
static void ClearBufferTag(BufferTag *tag)
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:6406
int backend_flush_after
Definition: bufmgr.c:180
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:806
void ConditionVariableInit(ConditionVariable *cv)
void StrategyInitialize(bool init)
Definition: freelist.c:401
int i
Definition: isn.c:77
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:698
#define PG_IO_ALIGN_SIZE
static char * buf
Definition: pg_test_fsync.c:72
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:389

References Assert(), backend_flush_after, BackendWritebackContext, buf, BufferBlocks, BufferDescriptorGetContentLock(), BufferDescriptorGetIOCV(), BufferDescriptors, BufferIOCVArray, CkptBufferIds, ClearBufferTag(), ConditionVariableInit(), GetBufferDescriptor(), i, INVALID_PROC_NUMBER, LWLockInitialize(), NBuffers, pg_atomic_init_u32(), PG_IO_ALIGN_SIZE, pgaio_wref_clear(), ShmemInitStruct(), StrategyInitialize(), TYPEALIGN, and WritebackContextInit().

Referenced by CreateOrAttachShmemStructs().

◆ BufferManagerShmemSize()

Size BufferManagerShmemSize ( void  )

Definition at line 153 of file buf_init.c.

154{
155 Size size = 0;
156
157 /* size of buffer descriptors */
158 size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
159 /* to allow aligning buffer descriptors */
160 size = add_size(size, PG_CACHE_LINE_SIZE);
161
162 /* size of data pages, plus alignment padding */
163 size = add_size(size, PG_IO_ALIGN_SIZE);
164 size = add_size(size, mul_size(NBuffers, BLCKSZ));
165
166 /* size of stuff controlled by freelist.c */
167 size = add_size(size, StrategyShmemSize());
168
169 /* size of I/O condition variables */
170 size = add_size(size, mul_size(NBuffers,
172 /* to allow aligning the above */
173 size = add_size(size, PG_CACHE_LINE_SIZE);
174
175 /* size of checkpoint sort array in bufmgr.c */
176 size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
177
178 return size;
179}
Size StrategyShmemSize(void)
Definition: freelist.c:380
#define PG_CACHE_LINE_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:495
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

References add_size(), mul_size(), NBuffers, PG_CACHE_LINE_SIZE, PG_IO_ALIGN_SIZE, and StrategyShmemSize().

Referenced by CalculateShmemSize().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 5651 of file bufmgr.c.

5652{
5653 if (BufferIsLocal(buffer))
5654 {
5655 if (LocalRefCount[-buffer - 1] != 1)
5656 elog(ERROR, "incorrect local pin count: %d",
5657 LocalRefCount[-buffer - 1]);
5658 }
5659 else
5660 {
5661 if (GetPrivateRefCount(buffer) != 1)
5662 elog(ERROR, "incorrect local pin count: %d",
5663 GetPrivateRefCount(buffer));
5664 }
5665}
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:425
#define ERROR
Definition: elog.h:39
int32 * LocalRefCount
Definition: localbuf.c:49

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), lazy_scan_heap(), and LockBufferForCleanup().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 4209 of file bufmgr.c.

4210{
4211 BufferSync(flags);
4212}
static void BufferSync(int flags)
Definition: bufmgr.c:3343

References BufferSync().

Referenced by CheckPointGuts().

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 5857 of file bufmgr.c.

5858{
5859 BufferDesc *bufHdr;
5860 uint32 buf_state,
5861 refcount;
5862
5863 Assert(BufferIsValid(buffer));
5864
5865 /* see AIO related comment in LockBufferForCleanup() */
5866
5867 if (BufferIsLocal(buffer))
5868 {
5869 refcount = LocalRefCount[-buffer - 1];
5870 /* There should be exactly one pin */
5871 Assert(refcount > 0);
5872 if (refcount != 1)
5873 return false;
5874 /* Nobody else to wait for */
5875 return true;
5876 }
5877
5878 /* There should be exactly one local pin */
5879 refcount = GetPrivateRefCount(buffer);
5880 Assert(refcount);
5881 if (refcount != 1)
5882 return false;
5883
5884 /* Try to acquire lock */
5885 if (!ConditionalLockBuffer(buffer))
5886 return false;
5887
5888 bufHdr = GetBufferDescriptor(buffer - 1);
5889 buf_state = LockBufHdr(bufHdr);
5890 refcount = BUF_STATE_GET_REFCOUNT(buf_state);
5891
5892 Assert(refcount > 0);
5893 if (refcount == 1)
5894 {
5895 /* Successfully acquired exclusive lock with pincount 1 */
5896 UnlockBufHdr(bufHdr);
5897 return true;
5898 }
5899
5900 /* Failed, so release the lock */
5901 UnlockBufHdr(bufHdr);
5903 return false;
5904}
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:59
void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:5604
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5630

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 5242 of file bufmgr.c.

5244{
5245 char relpersistence;
5246 SMgrRelation src_rel;
5247 SMgrRelation dst_rel;
5248
5249 /* Set the relpersistence. */
5250 relpersistence = permanent ?
5251 RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
5252
5253 src_rel = smgropen(src_rlocator, INVALID_PROC_NUMBER);
5254 dst_rel = smgropen(dst_rlocator, INVALID_PROC_NUMBER);
5255
5256 /*
5257 * Create and copy all forks of the relation. During create database we
5258 * have a separate cleanup mechanism which deletes complete database
5259 * directory. Therefore, each individual relation doesn't need to be
5260 * registered for cleanup.
5261 */
5262 RelationCreateStorage(dst_rlocator, relpersistence, false);
5263
5264 /* copy main fork. */
5265 RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM,
5266 permanent);
5267
5268 /* copy those extra forks that exist */
5269 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
5270 forkNum <= MAX_FORKNUM; forkNum++)
5271 {
5272 if (smgrexists(src_rel, forkNum))
5273 {
5274 smgrcreate(dst_rel, forkNum, false);
5275
5276 /*
5277 * WAL log creation if the relation is persistent, or this is the
5278 * init fork of an unlogged relation.
5279 */
5280 if (permanent || forkNum == INIT_FORKNUM)
5281 log_smgrcreate(&dst_rlocator, forkNum);
5282
5283 /* Copy a fork's data, block by block. */
5284 RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
5285 permanent);
5286 }
5287 }
5288}
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition: bufmgr.c:5128
ForkNumber
Definition: relpath.h:56
@ MAIN_FORKNUM
Definition: relpath.h:58
@ INIT_FORKNUM
Definition: relpath.h:61
#define MAX_FORKNUM
Definition: relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:240
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:481
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:462
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:122
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:187

References INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char * DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 4166 of file bufmgr.c.

4167{
4168 BufferDesc *buf;
4169 int32 loccount;
4170 char *result;
4171 ProcNumber backend;
4172 uint32 buf_state;
4173
4174 Assert(BufferIsValid(buffer));
4175 if (BufferIsLocal(buffer))
4176 {
4177 buf = GetLocalBufferDescriptor(-buffer - 1);
4178 loccount = LocalRefCount[-buffer - 1];
4179 backend = MyProcNumber;
4180 }
4181 else
4182 {
4183 buf = GetBufferDescriptor(buffer - 1);
4184 loccount = GetPrivateRefCount(buffer);
4185 backend = INVALID_PROC_NUMBER;
4186 }
4187
4188 /* theoretically we should lock the bufhdr here */
4189 buf_state = pg_atomic_read_u32(&buf->state);
4190
4191 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
4192 buffer,
4194 BufTagGetForkNum(&buf->tag)).str,
4195 buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
4196 BUF_STATE_GET_REFCOUNT(buf_state), loccount);
4197 return result;
4198}
#define BUF_FLAG_MASK
Definition: buf_internals.h:56
ProcNumber MyProcNumber
Definition: globals.c:90
int ProcNumber
Definition: procnumber.h:24
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:141

References Assert(), buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pg_atomic_read_u32(), psprintf(), and relpathbackend.

Referenced by buffer_call_start_io(), buffer_call_terminate_io(), CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBufferPin().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 4895 of file bufmgr.c.

4896{
4897 int i;
4898
4899 /*
4900 * We needn't consider local buffers, since by assumption the target
4901 * database isn't our own.
4902 */
4903
4904 for (i = 0; i < NBuffers; i++)
4905 {
4906 BufferDesc *bufHdr = GetBufferDescriptor(i);
4907
4908 /*
4909 * As in DropRelationBuffers, an unlocked precheck should be safe and
4910 * saves some cycles.
4911 */
4912 if (bufHdr->tag.dbOid != dbid)
4913 continue;
4914
4915 LockBufHdr(bufHdr);
4916 if (bufHdr->tag.dbOid == dbid)
4917 InvalidateBuffer(bufHdr); /* releases spinlock */
4918 else
4919 UnlockBufHdr(bufHdr);
4920 }
4921}
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:2154

References buftag::dbOid, GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, BufferDesc::tag, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 4545 of file bufmgr.c.

4547{
4548 int i;
4549 int j;
4550 RelFileLocatorBackend rlocator;
4551 BlockNumber nForkBlock[MAX_FORKNUM];
4552 uint64 nBlocksToInvalidate = 0;
4553
4554 rlocator = smgr_reln->smgr_rlocator;
4555
4556 /* If it's a local relation, it's localbuf.c's problem. */
4557 if (RelFileLocatorBackendIsTemp(rlocator))
4558 {
4559 if (rlocator.backend == MyProcNumber)
4560 DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
4561 firstDelBlock);
4562
4563 return;
4564 }
4565
4566 /*
4567 * To remove all the pages of the specified relation forks from the buffer
4568 * pool, we need to scan the entire buffer pool but we can optimize it by
4569 * finding the buffers from BufMapping table provided we know the exact
4570 * size of each fork of the relation. The exact size is required to ensure
4571 * that we don't leave any buffer for the relation being dropped as
4572 * otherwise the background writer or checkpointer can lead to a PANIC
4573 * error while flushing buffers corresponding to files that don't exist.
4574 *
4575 * To know the exact size, we rely on the size cached for each fork by us
4576 * during recovery which limits the optimization to recovery and on
4577 * standbys but we can easily extend it once we have shared cache for
4578 * relation size.
4579 *
4580 * In recovery, we cache the value returned by the first lseek(SEEK_END)
4581 * and the future writes keeps the cached value up-to-date. See
4582 * smgrextend. It is possible that the value of the first lseek is smaller
4583 * than the actual number of existing blocks in the file due to buggy
4584 * Linux kernels that might not have accounted for the recent write. But
4585 * that should be fine because there must not be any buffers after that
4586 * file size.
4587 */
4588 for (i = 0; i < nforks; i++)
4589 {
4590 /* Get the number of blocks for a relation's fork */
4591 nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
4592
4593 if (nForkBlock[i] == InvalidBlockNumber)
4594 {
4595 nBlocksToInvalidate = InvalidBlockNumber;
4596 break;
4597 }
4598
4599 /* calculate the number of blocks to be invalidated */
4600 nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
4601 }
4602
4603 /*
4604 * We apply the optimization iff the total number of blocks to invalidate
4605 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4606 */
4607 if (BlockNumberIsValid(nBlocksToInvalidate) &&
4608 nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4609 {
4610 for (j = 0; j < nforks; j++)
4611 FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4612 nForkBlock[j], firstDelBlock[j]);
4613 return;
4614 }
4615
4616 for (i = 0; i < NBuffers; i++)
4617 {
4618 BufferDesc *bufHdr = GetBufferDescriptor(i);
4619
4620 /*
4621 * We can make this a tad faster by prechecking the buffer tag before
4622 * we attempt to lock the buffer; this saves a lot of lock
4623 * acquisitions in typical cases. It should be safe because the
4624 * caller must have AccessExclusiveLock on the relation, or some other
4625 * reason to be certain that no one is loading new pages of the rel
4626 * into the buffer pool. (Otherwise we might well miss such pages
4627 * entirely.) Therefore, while the tag might be changing while we
4628 * look at it, it can't be changing *to* a value we care about, only
4629 * *away* from such a value. So false negatives are impossible, and
4630 * false positives are safe because we'll recheck after getting the
4631 * buffer lock.
4632 *
4633 * We could check forkNum and blockNum as well as the rlocator, but
4634 * the incremental win from doing so seems small.
4635 */
4636 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4637 continue;
4638
4639 LockBufHdr(bufHdr);
4640
4641 for (j = 0; j < nforks; j++)
4642 {
4643 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4644 BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4645 bufHdr->tag.blockNum >= firstDelBlock[j])
4646 {
4647 InvalidateBuffer(bufHdr); /* releases spinlock */
4648 break;
4649 }
4650 }
4651 if (j >= nforks)
4652 UnlockBufHdr(bufHdr);
4653 }
4654}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:91
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:4835
uint64_t uint64
Definition: c.h:542
int j
Definition: isn.c:78
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Definition: localbuf.c:665
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:847
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:38

References RelFileLocatorBackend::backend, buftag::blockNum, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, SMgrRelationData::smgr_rlocator, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4665 of file bufmgr.c.

4666{
4667 int i;
4668 int n = 0;
4669 SMgrRelation *rels;
4670 BlockNumber (*block)[MAX_FORKNUM + 1];
4671 uint64 nBlocksToInvalidate = 0;
4672 RelFileLocator *locators;
4673 bool cached = true;
4674 bool use_bsearch;
4675
4676 if (nlocators == 0)
4677 return;
4678
4679 rels = palloc(sizeof(SMgrRelation) * nlocators); /* non-local relations */
4680
4681 /* If it's a local relation, it's localbuf.c's problem. */
4682 for (i = 0; i < nlocators; i++)
4683 {
4684 if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4685 {
4686 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4687 DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4688 }
4689 else
4690 rels[n++] = smgr_reln[i];
4691 }
4692
4693 /*
4694 * If there are no non-local relations, then we're done. Release the
4695 * memory and return.
4696 */
4697 if (n == 0)
4698 {
4699 pfree(rels);
4700 return;
4701 }
4702
4703 /*
4704 * This is used to remember the number of blocks for all the relations
4705 * forks.
4706 */
4707 block = (BlockNumber (*)[MAX_FORKNUM + 1])
4708 palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4709
4710 /*
4711 * We can avoid scanning the entire buffer pool if we know the exact size
4712 * of each of the given relation forks. See DropRelationBuffers.
4713 */
4714 for (i = 0; i < n && cached; i++)
4715 {
4716 for (int j = 0; j <= MAX_FORKNUM; j++)
4717 {
4718 /* Get the number of blocks for a relation's fork. */
4719 block[i][j] = smgrnblocks_cached(rels[i], j);
4720
4721 /* We need to only consider the relation forks that exists. */
4722 if (block[i][j] == InvalidBlockNumber)
4723 {
4724 if (!smgrexists(rels[i], j))
4725 continue;
4726 cached = false;
4727 break;
4728 }
4729
4730 /* calculate the total number of blocks to be invalidated */
4731 nBlocksToInvalidate += block[i][j];
4732 }
4733 }
4734
4735 /*
4736 * We apply the optimization iff the total number of blocks to invalidate
4737 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4738 */
4739 if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4740 {
4741 for (i = 0; i < n; i++)
4742 {
4743 for (int j = 0; j <= MAX_FORKNUM; j++)
4744 {
4745 /* ignore relation forks that doesn't exist */
4746 if (!BlockNumberIsValid(block[i][j]))
4747 continue;
4748
4749 /* drop all the buffers for a particular relation fork */
4750 FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4751 j, block[i][j], 0);
4752 }
4753 }
4754
4755 pfree(block);
4756 pfree(rels);
4757 return;
4758 }
4759
4760 pfree(block);
4761 locators = palloc(sizeof(RelFileLocator) * n); /* non-local relations */
4762 for (i = 0; i < n; i++)
4763 locators[i] = rels[i]->smgr_rlocator.locator;
4764
4765 /*
4766 * For low number of relations to drop just use a simple walk through, to
4767 * save the bsearch overhead. The threshold to use is rather a guess than
4768 * an exactly determined value, as it depends on many factors (CPU and RAM
4769 * speeds, amount of shared buffers etc.).
4770 */
4771 use_bsearch = n > RELS_BSEARCH_THRESHOLD;
4772
4773 /* sort the list of rlocators if necessary */
4774 if (use_bsearch)
4775 qsort(locators, n, sizeof(RelFileLocator), rlocator_comparator);
4776
4777 for (i = 0; i < NBuffers; i++)
4778 {
4779 RelFileLocator *rlocator = NULL;
4780 BufferDesc *bufHdr = GetBufferDescriptor(i);
4781
4782 /*
4783 * As in DropRelationBuffers, an unlocked precheck should be safe and
4784 * saves some cycles.
4785 */
4786
4787 if (!use_bsearch)
4788 {
4789 int j;
4790
4791 for (j = 0; j < n; j++)
4792 {
4793 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
4794 {
4795 rlocator = &locators[j];
4796 break;
4797 }
4798 }
4799 }
4800 else
4801 {
4802 RelFileLocator locator;
4803
4804 locator = BufTagGetRelFileLocator(&bufHdr->tag);
4805 rlocator = bsearch(&locator,
4806 locators, n, sizeof(RelFileLocator),
4808 }
4809
4810 /* buffer doesn't belong to any of the given relfilelocators; skip it */
4811 if (rlocator == NULL)
4812 continue;
4813
4814 LockBufHdr(bufHdr);
4815 if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4816 InvalidateBuffer(bufHdr); /* releases spinlock */
4817 else
4818 UnlockBufHdr(bufHdr);
4819 }
4820
4821 pfree(locators);
4822 pfree(rels);
4823}
static int rlocator_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:6237
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:83
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition: localbuf.c:702
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
#define qsort(a, b, c, d)
Definition: port.h:500

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, if(), InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictAllUnpinnedBuffers()

void EvictAllUnpinnedBuffers ( int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 6683 of file bufmgr.c.

6685{
6686 *buffers_evicted = 0;
6687 *buffers_skipped = 0;
6688 *buffers_flushed = 0;
6689
6690 for (int buf = 1; buf <= NBuffers; buf++)
6691 {
6692 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6693 uint32 buf_state;
6694 bool buffer_flushed;
6695
6697
6698 buf_state = pg_atomic_read_u32(&desc->state);
6699 if (!(buf_state & BM_VALID))
6700 continue;
6701
6704
6705 LockBufHdr(desc);
6706
6707 if (EvictUnpinnedBufferInternal(desc, &buffer_flushed))
6708 (*buffers_evicted)++;
6709 else
6710 (*buffers_skipped)++;
6711
6712 if (buffer_flushed)
6713 (*buffers_flushed)++;
6714 }
6715}
#define BM_VALID
Definition: buf_internals.h:70
static bool EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed)
Definition: bufmgr.c:6592
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:259
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition: resowner.c:449

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u32(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_evict_all().

◆ EvictRelUnpinnedBuffers()

void EvictRelUnpinnedBuffers ( Relation  rel,
int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 6733 of file bufmgr.c.

6735{
6737
6738 *buffers_skipped = 0;
6739 *buffers_evicted = 0;
6740 *buffers_flushed = 0;
6741
6742 for (int buf = 1; buf <= NBuffers; buf++)
6743 {
6744 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6745 uint32 buf_state = pg_atomic_read_u32(&(desc->state));
6746 bool buffer_flushed;
6747
6749
6750 /* An unlocked precheck should be safe and saves some cycles. */
6751 if ((buf_state & BM_VALID) == 0 ||
6753 continue;
6754
6755 /* Make sure we can pin the buffer. */
6758
6759 buf_state = LockBufHdr(desc);
6760
6761 /* recheck, could have changed without the lock */
6762 if ((buf_state & BM_VALID) == 0 ||
6764 {
6765 UnlockBufHdr(desc);
6766 continue;
6767 }
6768
6769 if (EvictUnpinnedBufferInternal(desc, &buffer_flushed))
6770 (*buffers_evicted)++;
6771 else
6772 (*buffers_skipped)++;
6773
6774 if (buffer_flushed)
6775 (*buffers_flushed)++;
6776 }
6777}
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:647
RelFileLocator rd_locator
Definition: rel.h:57

References Assert(), BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u32(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_evict_relation().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf,
bool *  buffer_flushed 
)

Definition at line 6654 of file bufmgr.c.

6655{
6656 BufferDesc *desc;
6657
6659
6660 /* Make sure we can pin the buffer. */
6663
6664 desc = GetBufferDescriptor(buf - 1);
6665 LockBufHdr(desc);
6666
6667 return EvictUnpinnedBufferInternal(desc, buffer_flushed);
6668}

References Assert(), buf, BufferIsLocal, BufferIsValid(), CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by invalidate_rel_block(), modify_rel_block(), and pg_buffercache_evict().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 845 of file bufmgr.c.

849{
850 Buffer buf;
851 uint32 extend_by = 1;
852
853 ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
854 &buf, &extend_by);
855
856 return buf;
857}
int Buffer
Definition: buf.h:23
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:877

References buf, and ExtendBufferedRelBy().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 877 of file bufmgr.c.

884{
885 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
886 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
887 Assert(extend_by > 0);
888
889 if (bmr.relpersistence == '\0')
890 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
891
892 return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
893 extend_by, InvalidBlockNumber,
894 buffers, extended_by);
895}
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2531
SMgrRelation smgr
Definition: bufmgr.h:110
Form_pg_class rd_rel
Definition: rel.h:111

References Assert(), ExtendBufferedRelCommon(), InvalidBlockNumber, RelationData::rd_rel, BufferManagerRelation::rel, BufferManagerRelation::relpersistence, and BufferManagerRelation::smgr.

Referenced by ExtendBufferedRel(), grow_rel(), and RelationAddBlocks().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 906 of file bufmgr.c.

912{
914 uint32 extended_by = 0;
915 Buffer buffer = InvalidBuffer;
916 Buffer buffers[64];
917
918 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
919 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
920 Assert(extend_to != InvalidBlockNumber && extend_to > 0);
921
922 if (bmr.relpersistence == '\0')
923 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
924
925 /*
926 * If desired, create the file if it doesn't exist. If
927 * smgr_cached_nblocks[fork] is positive then it must exist, no need for
928 * an smgrexists call.
929 */
930 if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
931 (BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == 0 ||
932 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == InvalidBlockNumber) &&
933 !smgrexists(BMR_GET_SMGR(bmr), fork))
934 {
936
937 /* recheck, fork might have been created concurrently */
938 if (!smgrexists(BMR_GET_SMGR(bmr), fork))
940
942 }
943
944 /*
945 * If requested, invalidate size cache, so that smgrnblocks asks the
946 * kernel.
947 */
948 if (flags & EB_CLEAR_SIZE_CACHE)
949 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
950
951 /*
952 * Estimate how many pages we'll need to extend by. This avoids acquiring
953 * unnecessarily many victim buffers.
954 */
956
957 /*
958 * Since no-one else can be looking at the page contents yet, there is no
959 * difference between an exclusive lock and a cleanup-strength lock. Note
960 * that we pass the original mode to ReadBuffer_common() below, when
961 * falling back to reading the buffer to a concurrent relation extension.
962 */
964 flags |= EB_LOCK_TARGET;
965
966 while (current_size < extend_to)
967 {
968 uint32 num_pages = lengthof(buffers);
969 BlockNumber first_block;
970
971 if ((uint64) current_size + num_pages > extend_to)
972 num_pages = extend_to - current_size;
973
974 first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
975 num_pages, extend_to,
976 buffers, &extended_by);
977
978 current_size = first_block + extended_by;
979 Assert(num_pages != 0 || current_size >= extend_to);
980
981 for (uint32 i = 0; i < extended_by; i++)
982 {
983 if (first_block + i != extend_to - 1)
984 ReleaseBuffer(buffers[i]);
985 else
986 buffer = buffers[i];
987 }
988 }
989
990 /*
991 * It's possible that another backend concurrently extended the relation.
992 * In that case read the buffer.
993 *
994 * XXX: Should we control this via a flag?
995 */
996 if (buffer == InvalidBuffer)
997 {
998 Assert(extended_by == 0);
999 buffer = ReadBuffer_common(bmr.rel, BMR_GET_SMGR(bmr), bmr.relpersistence,
1000 fork, extend_to - 1, mode, strategy);
1001 }
1002
1003 return buffer;
1004}
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:1174
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5366
#define BMR_GET_SMGR(bmr)
Definition: bufmgr.h:118
#define lengthof(array)
Definition: c.h:790
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:474
#define ExclusiveLock
Definition: lockdefs.h:42
static int64 current_size
Definition: pg_checksums.c:64
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:819

References Assert(), BMR_GET_SMGR, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RelationData::rd_rel, ReadBuffer_common(), BufferManagerRelation::rel, ReleaseBuffer(), BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 5306 of file bufmgr.c.

5307{
5308 int i;
5309 BufferDesc *bufHdr;
5310
5311 for (i = 0; i < NBuffers; i++)
5312 {
5313 uint32 buf_state;
5314
5315 bufHdr = GetBufferDescriptor(i);
5316
5317 /*
5318 * As in DropRelationBuffers, an unlocked precheck should be safe and
5319 * saves some cycles.
5320 */
5321 if (bufHdr->tag.dbOid != dbid)
5322 continue;
5323
5324 /* Make sure we can handle the pin */
5327
5328 buf_state = LockBufHdr(bufHdr);
5329 if (bufHdr->tag.dbOid == dbid &&
5330 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5331 {
5332 PinBuffer_Locked(bufHdr);
5334 UnpinBuffer(bufHdr);
5335 }
5336 else
5337 UnlockBufHdr(bufHdr);
5338 }
5339}
static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:4420
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:3179
static void UnpinBuffer(BufferDesc *buf)
Definition: bufmgr.c:3247
@ IOOBJECT_RELATION
Definition: pgstat.h:277
@ IOCONTEXT_NORMAL
Definition: pgstat.h:289

References BM_DIRTY, BM_VALID, CurrentResourceOwner, buftag::dbOid, FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 5346 of file bufmgr.c.

5347{
5348 BufferDesc *bufHdr;
5349
5350 /* currently not needed, but no fundamental reason not to support */
5351 Assert(!BufferIsLocal(buffer));
5352
5353 Assert(BufferIsPinned(buffer));
5354
5355 bufHdr = GetBufferDescriptor(buffer - 1);
5356
5357 Assert(BufferIsLockedByMe(buffer));
5358
5360}
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:4283
bool BufferIsLockedByMe(Buffer buffer)
Definition: bufmgr.c:2843

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsLockedByMe(), BufferIsPinned, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), invalidate_rel_block(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 4942 of file bufmgr.c.

4943{
4944 int i;
4945 BufferDesc *bufHdr;
4946 SMgrRelation srel = RelationGetSmgr(rel);
4947
4948 if (RelationUsesLocalBuffers(rel))
4949 {
4950 for (i = 0; i < NLocBuffer; i++)
4951 {
4952 uint32 buf_state;
4953
4954 bufHdr = GetLocalBufferDescriptor(i);
4955 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4956 ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
4957 (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4958 {
4959 ErrorContextCallback errcallback;
4960
4961 /* Setup error traceback support for ereport() */
4963 errcallback.arg = bufHdr;
4964 errcallback.previous = error_context_stack;
4965 error_context_stack = &errcallback;
4966
4967 /* Make sure we can handle the pin */
4970
4971 /*
4972 * Pin/unpin mostly to make valgrind work, but it also seems
4973 * like the right thing to do.
4974 */
4975 PinLocalBuffer(bufHdr, false);
4976
4977
4978 FlushLocalBuffer(bufHdr, srel);
4979
4981
4982 /* Pop the error context stack */
4983 error_context_stack = errcallback.previous;
4984 }
4985 }
4986
4987 return;
4988 }
4989
4990 for (i = 0; i < NBuffers; i++)
4991 {
4992 uint32 buf_state;
4993
4994 bufHdr = GetBufferDescriptor(i);
4995
4996 /*
4997 * As in DropRelationBuffers, an unlocked precheck should be safe and
4998 * saves some cycles.
4999 */
5000 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator))
5001 continue;
5002
5003 /* Make sure we can handle the pin */
5006
5007 buf_state = LockBufHdr(bufHdr);
5008 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5009 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5010 {
5011 PinBuffer_Locked(bufHdr);
5013 UnpinBuffer(bufHdr);
5014 }
5015 else
5016 UnlockBufHdr(bufHdr);
5017 }
5018}
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:6221
ErrorContextCallback * error_context_stack
Definition: elog.c:95
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
Definition: localbuf.c:183
void UnpinLocalBuffer(Buffer buffer)
Definition: localbuf.c:841
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition: localbuf.c:805
int NLocBuffer
Definition: localbuf.c:45
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:577
struct ErrorContextCallback * previous
Definition: elog.h:297
void(* callback)(void *arg)
Definition: elog.h:298

References ErrorContextCallback::arg, BM_DIRTY, BM_VALID, BufferDescriptorGetBuffer(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushLocalBuffer(), FlushUnlockedBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, local_buffer_write_error_callback(), LockBufHdr(), NBuffers, NLocBuffer, pg_atomic_read_u32(), PinBuffer_Locked(), PinLocalBuffer(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 5030 of file bufmgr.c.

5031{
5032 int i;
5033 SMgrSortArray *srels;
5034 bool use_bsearch;
5035
5036 if (nrels == 0)
5037 return;
5038
5039 /* fill-in array for qsort */
5040 srels = palloc(sizeof(SMgrSortArray) * nrels);
5041
5042 for (i = 0; i < nrels; i++)
5043 {
5044 Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
5045
5046 srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
5047 srels[i].srel = smgrs[i];
5048 }
5049
5050 /*
5051 * Save the bsearch overhead for low number of relations to sync. See
5052 * DropRelationsAllBuffers for details.
5053 */
5054 use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
5055
5056 /* sort the list of SMgrRelations if necessary */
5057 if (use_bsearch)
5058 qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
5059
5060 for (i = 0; i < NBuffers; i++)
5061 {
5062 SMgrSortArray *srelent = NULL;
5063 BufferDesc *bufHdr = GetBufferDescriptor(i);
5064 uint32 buf_state;
5065
5066 /*
5067 * As in DropRelationBuffers, an unlocked precheck should be safe and
5068 * saves some cycles.
5069 */
5070
5071 if (!use_bsearch)
5072 {
5073 int j;
5074
5075 for (j = 0; j < nrels; j++)
5076 {
5077 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
5078 {
5079 srelent = &srels[j];
5080 break;
5081 }
5082 }
5083 }
5084 else
5085 {
5086 RelFileLocator rlocator;
5087
5088 rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
5089 srelent = bsearch(&rlocator,
5090 srels, nrels, sizeof(SMgrSortArray),
5092 }
5093
5094 /* buffer doesn't belong to any of the given relfilelocators; skip it */
5095 if (srelent == NULL)
5096 continue;
5097
5098 /* Make sure we can handle the pin */
5101
5102 buf_state = LockBufHdr(bufHdr);
5103 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
5104 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5105 {
5106 PinBuffer_Locked(bufHdr);
5108 UnpinBuffer(bufHdr);
5109 }
5110 else
5111 UnlockBufHdr(bufHdr);
5112 }
5113
5114 pfree(srels);
5115}
SMgrRelation srel
Definition: bufmgr.c:140
RelFileLocator rlocator
Definition: bufmgr.c:139

References Assert(), BM_DIRTY, BM_VALID, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, RelFileLocatorBackend::locator, LockBufHdr(), NBuffers, palloc(), pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrSortArray::rlocator, rlocator_comparator(), SMgrRelationData::smgr_rlocator, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ FreeAccessStrategy()

void FreeAccessStrategy ( BufferAccessStrategy  strategy)

Definition at line 643 of file freelist.c.

644{
645 /* don't crash if called on a "default" strategy */
646 if (strategy != NULL)
647 pfree(strategy);
648}

References pfree().

Referenced by blgetbitmap(), FreeBulkInsertState(), heap_endscan(), initscan(), parallel_vacuum_main(), and RelationCopyStorageUsingBuffer().

◆ GetAccessStrategy()

BufferAccessStrategy GetAccessStrategy ( BufferAccessStrategyType  btype)

Definition at line 461 of file freelist.c.

462{
463 int ring_size_kb;
464
465 /*
466 * Select ring size to use. See buffer/README for rationales.
467 *
468 * Note: if you change the ring size for BAS_BULKREAD, see also
469 * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
470 */
471 switch (btype)
472 {
473 case BAS_NORMAL:
474 /* if someone asks for NORMAL, just give 'em a "default" object */
475 return NULL;
476
477 case BAS_BULKREAD:
478 {
479 int ring_max_kb;
480
481 /*
482 * The ring always needs to be large enough to allow some
483 * separation in time between providing a buffer to the user
484 * of the strategy and that buffer being reused. Otherwise the
485 * user's pin will prevent reuse of the buffer, even without
486 * concurrent activity.
487 *
488 * We also need to ensure the ring always is large enough for
489 * SYNC_SCAN_REPORT_INTERVAL, as noted above.
490 *
491 * Thus we start out a minimal size and increase the size
492 * further if appropriate.
493 */
494 ring_size_kb = 256;
495
496 /*
497 * There's no point in a larger ring if we won't be allowed to
498 * pin sufficiently many buffers. But we never limit to less
499 * than the minimal size above.
500 */
501 ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
502 ring_max_kb = Max(ring_size_kb, ring_max_kb);
503
504 /*
505 * We would like the ring to additionally have space for the
506 * configured degree of IO concurrency. While being read in,
507 * buffers can obviously not yet be reused.
508 *
509 * Each IO can be up to io_combine_limit blocks large, and we
510 * want to start up to effective_io_concurrency IOs.
511 *
512 * Note that effective_io_concurrency may be 0, which disables
513 * AIO.
514 */
515 ring_size_kb += (BLCKSZ / 1024) *
517
518 if (ring_size_kb > ring_max_kb)
519 ring_size_kb = ring_max_kb;
520 break;
521 }
522 case BAS_BULKWRITE:
523 ring_size_kb = 16 * 1024;
524 break;
525 case BAS_VACUUM:
526 ring_size_kb = 2048;
527 break;
528
529 default:
530 elog(ERROR, "unrecognized buffer access strategy: %d",
531 (int) btype);
532 return NULL; /* keep compiler quiet */
533 }
534
535 return GetAccessStrategyWithSize(btype, ring_size_kb);
536}
int effective_io_concurrency
Definition: bufmgr.c:155
int io_combine_limit
Definition: bufmgr.c:170
uint32 GetPinLimit(void)
Definition: bufmgr.c:2475
#define Max(x, y)
Definition: c.h:1000
BufferAccessStrategy GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
Definition: freelist.c:546

References BAS_BULKREAD, BAS_BULKWRITE, BAS_NORMAL, BAS_VACUUM, effective_io_concurrency, elog, ERROR, GetAccessStrategyWithSize(), GetPinLimit(), io_combine_limit, and Max.

Referenced by blgetbitmap(), bt_check_every_level(), collect_corrupt_items(), collect_visibility_data(), GetBulkInsertState(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), initscan(), pgstat_index(), pgstathashindex(), pgstatindex_impl(), RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), statapprox_heap(), and verify_heapam().

◆ GetAccessStrategyBufferCount()

int GetAccessStrategyBufferCount ( BufferAccessStrategy  strategy)

Definition at line 586 of file freelist.c.

587{
588 if (strategy == NULL)
589 return 0;
590
591 return strategy->nbuffers;
592}

References BufferAccessStrategyData::nbuffers.

Referenced by parallel_vacuum_init().

◆ GetAccessStrategyPinLimit()

int GetAccessStrategyPinLimit ( BufferAccessStrategy  strategy)

Definition at line 609 of file freelist.c.

610{
611 if (strategy == NULL)
612 return NBuffers;
613
614 switch (strategy->btype)
615 {
616 case BAS_BULKREAD:
617
618 /*
619 * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
620 * shouldn't be a problem and the caller is free to pin up to the
621 * entire ring at once.
622 */
623 return strategy->nbuffers;
624
625 default:
626
627 /*
628 * Tell caller not to pin more than half the buffers in the ring.
629 * This is a trade-off between look ahead distance and deferring
630 * writeback and associated WAL traffic.
631 */
632 return strategy->nbuffers / 2;
633 }
634}
BufferAccessStrategyType btype
Definition: freelist.c:67

References BAS_BULKREAD, BufferAccessStrategyData::btype, BufferAccessStrategyData::nbuffers, and NBuffers.

Referenced by read_stream_begin_impl().

◆ GetAccessStrategyWithSize()

BufferAccessStrategy GetAccessStrategyWithSize ( BufferAccessStrategyType  btype,
int  ring_size_kb 
)

Definition at line 546 of file freelist.c.

547{
548 int ring_buffers;
549 BufferAccessStrategy strategy;
550
551 Assert(ring_size_kb >= 0);
552
553 /* Figure out how many buffers ring_size_kb is */
554 ring_buffers = ring_size_kb / (BLCKSZ / 1024);
555
556 /* 0 means unlimited, so no BufferAccessStrategy required */
557 if (ring_buffers == 0)
558 return NULL;
559
560 /* Cap to 1/8th of shared_buffers */
561 ring_buffers = Min(NBuffers / 8, ring_buffers);
562
563 /* NBuffers should never be less than 16, so this shouldn't happen */
564 Assert(ring_buffers > 0);
565
566 /* Allocate the object and initialize all elements to zeroes */
567 strategy = (BufferAccessStrategy)
568 palloc0(offsetof(BufferAccessStrategyData, buffers) +
569 ring_buffers * sizeof(Buffer));
570
571 /* Set fields that don't start out zero */
572 strategy->btype = btype;
573 strategy->nbuffers = ring_buffers;
574
575 return strategy;
576}
struct BufferAccessStrategyData * BufferAccessStrategy
Definition: buf.h:44
#define Min(x, y)
Definition: c.h:1006
void * palloc0(Size size)
Definition: mcxt.c:1395

References Assert(), BufferAccessStrategyData::btype, Min, BufferAccessStrategyData::nbuffers, NBuffers, and palloc0().

Referenced by do_autovacuum(), ExecVacuum(), GetAccessStrategy(), and parallel_vacuum_main().

◆ GetAdditionalLocalPinLimit()

uint32 GetAdditionalLocalPinLimit ( void  )

Definition at line 315 of file localbuf.c.

316{
319}
int num_temp_buffers
Definition: guc_tables.c:553
static int NLocalPinnedBuffers
Definition: localbuf.c:56

References Assert(), NLocalPinnedBuffers, and num_temp_buffers.

Referenced by read_stream_start_pending_read().

◆ GetAdditionalPinLimit()

uint32 GetAdditionalPinLimit ( void  )

Definition at line 2487 of file bufmgr.c.

2488{
2489 uint32 estimated_pins_held;
2490
2491 /*
2492 * We get the number of "overflowed" pins for free, but don't know the
2493 * number of pins in PrivateRefCountArray. The cost of calculating that
2494 * exactly doesn't seem worth it, so just assume the max.
2495 */
2496 estimated_pins_held = PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES;
2497
2498 /* Is this backend already holding more than its fair share? */
2499 if (estimated_pins_held > MaxProportionalPins)
2500 return 0;
2501
2502 return MaxProportionalPins - estimated_pins_held;
2503}
static uint32 MaxProportionalPins
Definition: bufmgr.c:221
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:100

References MaxProportionalPins, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by LimitAdditionalPins(), and read_stream_start_pending_read().

◆ GetLocalPinLimit()

uint32 GetLocalPinLimit ( void  )

Definition at line 307 of file localbuf.c.

308{
309 /* Every backend has its own temporary buffers, and can pin them all. */
310 return num_temp_buffers;
311}

References num_temp_buffers.

Referenced by read_stream_begin_impl().

◆ GetPinLimit()

uint32 GetPinLimit ( void  )

Definition at line 2475 of file bufmgr.c.

2476{
2477 return MaxProportionalPins;
2478}

References MaxProportionalPins.

Referenced by GetAccessStrategy(), and read_stream_begin_impl().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 5831 of file bufmgr.c.

5832{
5833 int bufid = GetStartupBufferPinWaitBufId();
5834
5835 /*
5836 * If we get woken slowly then it's possible that the Startup process was
5837 * already woken by other backends before we got here. Also possible that
5838 * we get here by multiple interrupts or interrupts at inappropriate
5839 * times, so make sure we do nothing if the bufid is not set.
5840 */
5841 if (bufid < 0)
5842 return false;
5843
5844 if (GetPrivateRefCount(bufid + 1) > 0)
5845 return true;
5846
5847 return false;
5848}
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:771

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 5398 of file bufmgr.c.

5399{
5400 Assert(BufferIsPinned(buffer));
5402 if (BufferIsLocal(buffer))
5403 LocalRefCount[-buffer - 1]++;
5404 else
5405 {
5407
5408 ref = GetPrivateRefCountEntry(buffer, true);
5409 Assert(ref != NULL);
5410 ref->refcount++;
5411 }
5413}
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:351

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlarge(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), RelationAddBlocks(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferManagerAccess()

void InitBufferManagerAccess ( void  )

Definition at line 4007 of file bufmgr.c.

4008{
4009 HASHCTL hash_ctl;
4010
4011 /*
4012 * An advisory limit on the number of pins each backend should hold, based
4013 * on shared_buffers and the maximum number of connections possible.
4014 * That's very pessimistic, but outside toy-sized shared_buffers it should
4015 * allow plenty of pins. LimitAdditionalPins() and
4016 * GetAdditionalPinLimit() can be used to check the remaining balance.
4017 */
4019
4020 memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
4021
4022 hash_ctl.keysize = sizeof(Buffer);
4023 hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
4024
4025 PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
4027
4028 /*
4029 * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
4030 * the corresponding phase of backend shutdown.
4031 */
4032 Assert(MyProc != NULL);
4034}
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:4041
struct PrivateRefCountEntry PrivateRefCountEntry
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:215
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:216
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:358
int MaxBackends
Definition: globals.c:146
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
#define NUM_AUXILIARY_PROCS
Definition: proc.h:463
PGPROC * MyProc
Definition: proc.c:67
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76

References Assert(), AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MaxBackends, MaxProportionalPins, MyProc, NBuffers, NUM_AUXILIARY_PROCS, on_shmem_exit(), PrivateRefCountArray, and PrivateRefCountHash.

Referenced by BaseInit().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 5915 of file bufmgr.c.

5916{
5917 BufferDesc *bufHdr;
5918 uint32 buf_state;
5919
5920 Assert(BufferIsValid(buffer));
5921
5922 /* see AIO related comment in LockBufferForCleanup() */
5923
5924 if (BufferIsLocal(buffer))
5925 {
5926 /* There should be exactly one pin */
5927 if (LocalRefCount[-buffer - 1] != 1)
5928 return false;
5929 /* Nobody else to wait for */
5930 return true;
5931 }
5932
5933 /* There should be exactly one local pin */
5934 if (GetPrivateRefCount(buffer) != 1)
5935 return false;
5936
5937 bufHdr = GetBufferDescriptor(buffer - 1);
5938
5939 /* caller must hold exclusive lock on buffer */
5941
5942 buf_state = LockBufHdr(bufHdr);
5943
5944 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5945 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5946 {
5947 /* pincount is OK. */
5948 UnlockBufHdr(bufHdr);
5949 return true;
5950 }
5951
5952 UnlockBufHdr(bufHdr);
5953 return false;
5954}

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsValid(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ LimitAdditionalLocalPins()

void LimitAdditionalLocalPins ( uint32 additional_pins)

Definition at line 323 of file localbuf.c.

324{
325 uint32 max_pins;
326
327 if (*additional_pins <= 1)
328 return;
329
330 /*
331 * In contrast to LimitAdditionalPins() other backends don't play a role
332 * here. We can allow up to NLocBuffer pins in total, but it might not be
333 * initialized yet so read num_temp_buffers.
334 */
336
337 if (*additional_pins >= max_pins)
338 *additional_pins = max_pins;
339}

References NLocalPinnedBuffers, and num_temp_buffers.

Referenced by ExtendBufferedRelLocal().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2513 of file bufmgr.c.

2514{
2515 uint32 limit;
2516
2517 if (*additional_pins <= 1)
2518 return;
2519
2520 limit = GetAdditionalPinLimit();
2521 limit = Max(limit, 1);
2522 if (limit < *additional_pins)
2523 *additional_pins = limit;
2524}
uint32 GetAdditionalPinLimit(void)
Definition: bufmgr.c:2487

References GetAdditionalPinLimit(), and Max.

Referenced by ExtendBufferedRelShared().

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 5604 of file bufmgr.c.

5605{
5606 BufferDesc *buf;
5607
5608 Assert(BufferIsPinned(buffer));
5609 if (BufferIsLocal(buffer))
5610 return; /* local buffers need no lock */
5611
5612 buf = GetBufferDescriptor(buffer - 1);
5613
5614 if (mode == BUFFER_LOCK_UNLOCK)
5616 else if (mode == BUFFER_LOCK_SHARE)
5618 else if (mode == BUFFER_LOCK_EXCLUSIVE)
5620 else
5621 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
5622}
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894

References Assert(), buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), and mode.

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), BitmapHeapScanNextBlock(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), create_toy_buffer(), entryLoadMoreItems(), ExtendBufferedRelShared(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_lock(), heap_inplace_unlock(), heap_inplace_update_and_unlock(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_opt(), heap_prepare_pagescan(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgettup(), initBloomState(), invalidate_rel_block(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), ScanSourceDatabasePgClass(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), update_most_recent_deletion_info(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferForRedoExtended(), XLogRecordPageWithFreeSpace(), and ZeroAndLockBuffer().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 5684 of file bufmgr.c.

5685{
5686 BufferDesc *bufHdr;
5687 TimestampTz waitStart = 0;
5688 bool waiting = false;
5689 bool logged_recovery_conflict = false;
5690
5691 Assert(BufferIsPinned(buffer));
5692 Assert(PinCountWaitBuf == NULL);
5693
5695
5696 /*
5697 * We do not yet need to be worried about in-progress AIOs holding a pin,
5698 * as we, so far, only support doing reads via AIO and this function can
5699 * only be called once the buffer is valid (i.e. no read can be in
5700 * flight).
5701 */
5702
5703 /* Nobody else to wait for */
5704 if (BufferIsLocal(buffer))
5705 return;
5706
5707 bufHdr = GetBufferDescriptor(buffer - 1);
5708
5709 for (;;)
5710 {
5711 uint32 buf_state;
5712 uint32 unset_bits = 0;
5713
5714 /* Try to acquire lock */
5716 buf_state = LockBufHdr(bufHdr);
5717
5718 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5719 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5720 {
5721 /* Successfully acquired exclusive lock with pincount 1 */
5722 UnlockBufHdr(bufHdr);
5723
5724 /*
5725 * Emit the log message if recovery conflict on buffer pin was
5726 * resolved but the startup process waited longer than
5727 * deadlock_timeout for it.
5728 */
5729 if (logged_recovery_conflict)
5731 waitStart, GetCurrentTimestamp(),
5732 NULL, false);
5733
5734 if (waiting)
5735 {
5736 /* reset ps display to remove the suffix if we added one */
5738 waiting = false;
5739 }
5740 return;
5741 }
5742 /* Failed, so mark myself as waiting for pincount 1 */
5743 if (buf_state & BM_PIN_COUNT_WAITER)
5744 {
5745 UnlockBufHdr(bufHdr);
5747 elog(ERROR, "multiple backends attempting to wait for pincount 1");
5748 }
5750 PinCountWaitBuf = bufHdr;
5751 UnlockBufHdrExt(bufHdr, buf_state,
5753 0);
5755
5756 /* Wait to be signaled by UnpinBuffer() */
5757 if (InHotStandby)
5758 {
5759 if (!waiting)
5760 {
5761 /* adjust the process title to indicate that it's waiting */
5762 set_ps_display_suffix("waiting");
5763 waiting = true;
5764 }
5765
5766 /*
5767 * Emit the log message if the startup process is waiting longer
5768 * than deadlock_timeout for recovery conflict on buffer pin.
5769 *
5770 * Skip this if first time through because the startup process has
5771 * not started waiting yet in this case. So, the wait start
5772 * timestamp is set after this logic.
5773 */
5774 if (waitStart != 0 && !logged_recovery_conflict)
5775 {
5777
5778 if (TimestampDifferenceExceeds(waitStart, now,
5780 {
5782 waitStart, now, NULL, true);
5783 logged_recovery_conflict = true;
5784 }
5785 }
5786
5787 /*
5788 * Set the wait start timestamp if logging is enabled and first
5789 * time through.
5790 */
5791 if (log_recovery_conflict_waits && waitStart == 0)
5792 waitStart = GetCurrentTimestamp();
5793
5794 /* Publish the bufid that Startup process waits on */
5795 SetStartupBufferPinWaitBufId(buffer - 1);
5796 /* Set alarm and then wait to be signaled by UnpinBuffer() */
5798 /* Reset the published bufid */
5800 }
5801 else
5802 ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
5803
5804 /*
5805 * Remove flag marking us as waiter. Normally this will not be set
5806 * anymore, but ProcWaitForSignal() can return for other signals as
5807 * well. We take care to only reset the flag if we're the waiter, as
5808 * theoretically another backend could have started waiting. That's
5809 * impossible with the current usages due to table level locking, but
5810 * better be safe.
5811 */
5812 buf_state = LockBufHdr(bufHdr);
5813 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5815 unset_bits |= BM_PIN_COUNT_WAITER;
5816
5817 UnlockBufHdrExt(bufHdr, buf_state,
5818 0, unset_bits,
5819 0);
5820
5821 PinCountWaitBuf = NULL;
5822 /* Loop back and try again */
5823 }
5824}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:75
static uint32 UnlockBufHdrExt(BufferDesc *desc, uint32 old_buf_state, uint32 set_bits, uint32 unset_bits, int refcount_change)
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition: bufmgr.c:5651
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:183
int64 TimestampTz
Definition: timestamp.h:39
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:387
int DeadlockTimeout
Definition: proc.c:58
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:759
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1984
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:793
bool log_recovery_conflict_waits
Definition: standby.c:42
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:274
int wait_backend_pgprocno
static volatile sig_atomic_t waiting
Definition: waiteventset.c:171
#define InHotStandby
Definition: xlogutils.h:60

References Assert(), BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), UnlockBufHdrExt(), BufferDesc::wait_backend_pgprocno, and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroAndLockBuffer().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 2943 of file bufmgr.c.

2944{
2945 BufferDesc *bufHdr;
2946 uint32 buf_state;
2947 uint32 old_buf_state;
2948
2949 if (!BufferIsValid(buffer))
2950 elog(ERROR, "bad buffer ID: %d", buffer);
2951
2952 if (BufferIsLocal(buffer))
2953 {
2954 MarkLocalBufferDirty(buffer);
2955 return;
2956 }
2957
2958 bufHdr = GetBufferDescriptor(buffer - 1);
2959
2960 Assert(BufferIsPinned(buffer));
2962
2963 /*
2964 * NB: We have to wait for the buffer header spinlock to be not held, as
2965 * TerminateBufferIO() relies on the spinlock.
2966 */
2967 old_buf_state = pg_atomic_read_u32(&bufHdr->state);
2968 for (;;)
2969 {
2970 if (old_buf_state & BM_LOCKED)
2971 old_buf_state = WaitBufHdrUnlocked(bufHdr);
2972
2973 buf_state = old_buf_state;
2974
2975 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2976 buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
2977
2978 if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
2979 buf_state))
2980 break;
2981 }
2982
2983 /*
2984 * If the buffer was not dirty already, do vacuum accounting.
2985 */
2986 if (!(old_buf_state & BM_DIRTY))
2987 {
2989 if (VacuumCostActive)
2991 }
2992}
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:347
#define BM_LOCKED
Definition: buf_internals.h:68
#define BM_JUST_DIRTIED
Definition: buf_internals.h:74
pg_noinline uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:6294
bool VacuumCostActive
Definition: globals.c:158
int VacuumCostBalance
Definition: globals.c:157
int VacuumCostPageDirty
Definition: globals.c:153
BufferUsage pgBufferUsage
Definition: instrument.c:20
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:491
int64 shared_blks_dirtied
Definition: instrument.h:28

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), MarkDirtyUnpinnedBufferInternal(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 5430 of file bufmgr.c.

5431{
5432 BufferDesc *bufHdr;
5433 Page page = BufferGetPage(buffer);
5434
5435 if (!BufferIsValid(buffer))
5436 elog(ERROR, "bad buffer ID: %d", buffer);
5437
5438 if (BufferIsLocal(buffer))
5439 {
5440 MarkLocalBufferDirty(buffer);
5441 return;
5442 }
5443
5444 bufHdr = GetBufferDescriptor(buffer - 1);
5445
5446 Assert(GetPrivateRefCount(buffer) > 0);
5447 /* here, either share or exclusive lock is OK */
5448 Assert(BufferIsLockedByMe(buffer));
5449
5450 /*
5451 * This routine might get called many times on the same page, if we are
5452 * making the first scan after commit of an xact that added/deleted many
5453 * tuples. So, be as quick as we can if the buffer is already dirty. We
5454 * do this by not acquiring spinlock if it looks like the status bits are
5455 * already set. Since we make this test unlocked, there's a chance we
5456 * might fail to notice that the flags have just been cleared, and failed
5457 * to reset them, due to memory-ordering issues. But since this function
5458 * is only intended to be used in cases where failing to write out the
5459 * data would be harmless anyway, it doesn't really matter.
5460 */
5461 if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
5463 {
5465 bool dirtied = false;
5466 bool delayChkptFlags = false;
5467 uint32 buf_state;
5468
5469 /*
5470 * If we need to protect hint bit updates from torn writes, WAL-log a
5471 * full page image of the page. This full page image is only necessary
5472 * if the hint bit update is the first change to the page since the
5473 * last checkpoint.
5474 *
5475 * We don't check full_page_writes here because that logic is included
5476 * when we call XLogInsert() since the value changes dynamically.
5477 */
5478 if (XLogHintBitIsNeeded() &&
5480 {
5481 /*
5482 * If we must not write WAL, due to a relfilelocator-specific
5483 * condition or being in recovery, don't dirty the page. We can
5484 * set the hint, just not dirty the page as a result so the hint
5485 * is lost when we evict the page or shutdown.
5486 *
5487 * See src/backend/storage/page/README for longer discussion.
5488 */
5489 if (RecoveryInProgress() ||
5491 return;
5492
5493 /*
5494 * If the block is already dirty because we either made a change
5495 * or set a hint already, then we don't need to write a full page
5496 * image. Note that aggressive cleaning of blocks dirtied by hint
5497 * bit setting would increase the call rate. Bulk setting of hint
5498 * bits would reduce the call rate...
5499 *
5500 * We must issue the WAL record before we mark the buffer dirty.
5501 * Otherwise we might write the page before we write the WAL. That
5502 * causes a race condition, since a checkpoint might occur between
5503 * writing the WAL record and marking the buffer dirty. We solve
5504 * that with a kluge, but one that is already in use during
5505 * transaction commit to prevent race conditions. Basically, we
5506 * simply prevent the checkpoint WAL record from being written
5507 * until we have marked the buffer dirty. We don't start the
5508 * checkpoint flush until we have marked dirty, so our checkpoint
5509 * must flush the change to disk successfully or the checkpoint
5510 * never gets written, so crash recovery will fix.
5511 *
5512 * It's possible we may enter here without an xid, so it is
5513 * essential that CreateCheckPoint waits for virtual transactions
5514 * rather than full transactionids.
5515 */
5518 delayChkptFlags = true;
5519 lsn = XLogSaveBufferForHint(buffer, buffer_std);
5520 }
5521
5522 buf_state = LockBufHdr(bufHdr);
5523
5524 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5525
5526 if (!(buf_state & BM_DIRTY))
5527 {
5528 dirtied = true; /* Means "will be dirtied by this action" */
5529
5530 /*
5531 * Set the page LSN if we wrote a backup block. We aren't supposed
5532 * to set this when only holding a share lock but as long as we
5533 * serialise it somehow we're OK. We choose to set LSN while
5534 * holding the buffer header lock, which causes any reader of an
5535 * LSN who holds only a share lock to also obtain a buffer header
5536 * lock before using PageGetLSN(), which is enforced in
5537 * BufferGetLSNAtomic().
5538 *
5539 * If checksums are enabled, you might think we should reset the
5540 * checksum here. That will happen when the page is written
5541 * sometime later in this checkpoint cycle.
5542 */
5543 if (XLogRecPtrIsValid(lsn))
5544 PageSetLSN(page, lsn);
5545 }
5546
5547 UnlockBufHdrExt(bufHdr, buf_state,
5549 0, 0);
5550
5551 if (delayChkptFlags)
5552 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
5553
5554 if (dirtied)
5555 {
5557 if (VacuumCostActive)
5559 }
5560 }
5561}
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
#define DELAY_CHKPT_START
Definition: proc.h:135
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition: storage.c:573
int delayChkptFlags
Definition: proc.h:257
bool RecoveryInProgress(void)
Definition: xlog.c:6406
#define XLogRecPtrIsValid(r)
Definition: xlogdefs.h:29
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:1087

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsLockedByMe(), BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdrExt(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsValid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ MarkDirtyAllUnpinnedBuffers()

void MarkDirtyAllUnpinnedBuffers ( int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 6933 of file bufmgr.c.

6936{
6937 *buffers_dirtied = 0;
6938 *buffers_already_dirty = 0;
6939 *buffers_skipped = 0;
6940
6941 for (int buf = 1; buf <= NBuffers; buf++)
6942 {
6943 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6944 uint32 buf_state;
6945 bool buffer_already_dirty;
6946
6948
6949 buf_state = pg_atomic_read_u32(&desc->state);
6950 if (!(buf_state & BM_VALID))
6951 continue;
6952
6955
6956 LockBufHdr(desc);
6957
6958 if (MarkDirtyUnpinnedBufferInternal(buf, desc, &buffer_already_dirty))
6959 (*buffers_dirtied)++;
6960 else if (buffer_already_dirty)
6961 (*buffers_already_dirty)++;
6962 else
6963 (*buffers_skipped)++;
6964 }
6965}
static bool MarkDirtyUnpinnedBufferInternal(Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
Definition: bufmgr.c:6784

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u32(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_mark_dirty_all().

◆ MarkDirtyRelUnpinnedBuffers()

void MarkDirtyRelUnpinnedBuffers ( Relation  rel,
int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 6876 of file bufmgr.c.

6880{
6882
6883 *buffers_dirtied = 0;
6884 *buffers_already_dirty = 0;
6885 *buffers_skipped = 0;
6886
6887 for (int buf = 1; buf <= NBuffers; buf++)
6888 {
6889 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6890 uint32 buf_state = pg_atomic_read_u32(&(desc->state));
6891 bool buffer_already_dirty;
6892
6894
6895 /* An unlocked precheck should be safe and saves some cycles. */
6896 if ((buf_state & BM_VALID) == 0 ||
6898 continue;
6899
6900 /* Make sure we can pin the buffer. */
6903
6904 buf_state = LockBufHdr(desc);
6905
6906 /* recheck, could have changed without the lock */
6907 if ((buf_state & BM_VALID) == 0 ||
6909 {
6910 UnlockBufHdr(desc);
6911 continue;
6912 }
6913
6914 if (MarkDirtyUnpinnedBufferInternal(buf, desc, &buffer_already_dirty))
6915 (*buffers_dirtied)++;
6916 else if (buffer_already_dirty)
6917 (*buffers_already_dirty)++;
6918 else
6919 (*buffers_skipped)++;
6920 }
6921}

References Assert(), BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u32(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_mark_dirty_relation().

◆ MarkDirtyUnpinnedBuffer()

bool MarkDirtyUnpinnedBuffer ( Buffer  buf,
bool *  buffer_already_dirty 
)

Definition at line 6840 of file bufmgr.c.

6841{
6842 BufferDesc *desc;
6843 bool buffer_dirtied = false;
6844
6846
6847 /* Make sure we can pin the buffer. */
6850
6851 desc = GetBufferDescriptor(buf - 1);
6852 LockBufHdr(desc);
6853
6854 buffer_dirtied = MarkDirtyUnpinnedBufferInternal(buf, desc, buffer_already_dirty);
6855 /* Both can not be true at the same time */
6856 Assert(!(buffer_dirtied && *buffer_already_dirty));
6857
6858 return buffer_dirtied;
6859}

References Assert(), buf, BufferIsLocal, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by pg_buffercache_mark_dirty().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 653 of file bufmgr.c.

654{
655 Assert(RelationIsValid(reln));
656 Assert(BlockNumberIsValid(blockNum));
657
658 if (RelationUsesLocalBuffers(reln))
659 {
660 /* see comments in ReadBufferExtended */
661 if (RELATION_IS_OTHER_TEMP(reln))
663 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
664 errmsg("cannot access temporary tables of other sessions")));
665
666 /* pass it off to localbuf.c */
667 return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
668 }
669 else
670 {
671 /* pass it to the shared buffer version */
672 return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
673 }
674}
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:563
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ereport(elevel,...)
Definition: elog.h:150
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:72
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:668
#define RelationIsValid(relation)
Definition: rel.h:490

References Assert(), BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by count_nondeletable_pages(), invalidate_rel_block(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 563 of file bufmgr.c.

566{
567 PrefetchBufferResult result = {InvalidBuffer, false};
568 BufferTag newTag; /* identity of requested block */
569 uint32 newHash; /* hash value for newTag */
570 LWLock *newPartitionLock; /* buffer partition lock for it */
571 int buf_id;
572
573 Assert(BlockNumberIsValid(blockNum));
574
575 /* create a tag so we can lookup the buffer */
576 InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
577 forkNum, blockNum);
578
579 /* determine its hash code and partition lock ID */
580 newHash = BufTableHashCode(&newTag);
581 newPartitionLock = BufMappingPartitionLock(newHash);
582
583 /* see if the block is in the buffer pool already */
584 LWLockAcquire(newPartitionLock, LW_SHARED);
585 buf_id = BufTableLookup(&newTag, newHash);
586 LWLockRelease(newPartitionLock);
587
588 /* If not in buffers, initiate prefetch */
589 if (buf_id < 0)
590 {
591#ifdef USE_PREFETCH
592 /*
593 * Try to initiate an asynchronous read. This returns false in
594 * recovery if the relation file doesn't exist.
595 */
596 if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
597 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
598 {
599 result.initiated_io = true;
600 }
601#endif /* USE_PREFETCH */
602 }
603 else
604 {
605 /*
606 * Report the buffer it was in at that time. The caller may be able
607 * to avoid a buffer table lookup, but it's not pinned and it must be
608 * rechecked!
609 */
610 result.recent_buffer = buf_id + 1;
611 }
612
613 /*
614 * If the block *is* in buffers, we do nothing. This is not really ideal:
615 * the block might be just about to be evicted, which would be stupid
616 * since we know we are going to need it soon. But the only easy answer
617 * is to bump the usage_count, which does not seem like a great solution:
618 * when the caller does ultimately touch the block, usage_count would get
619 * bumped again, resulting in too much favoritism for blocks that are
620 * involved in a prefetch sequence. A real fix would involve some
621 * additional per-buffer state, and it's not clear that there's enough of
622 * a problem to justify that.
623 */
624
625 return result;
626}
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
static LWLock * BufMappingPartitionLock(uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:78
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_DATA
Definition: fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:678
Definition: lwlock.h:42
Buffer recent_buffer
Definition: bufmgr.h:61

References Assert(), BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, RelFileLocatorBackend::locator, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rlocator, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 745 of file bufmgr.c.

746{
747 return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
748}
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:792

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 792 of file bufmgr.c.

794{
795 Buffer buf;
796
797 /*
798 * Reject attempts to read non-local temporary relations; we would be
799 * likely to get wrong data since we have no visibility into the owning
800 * session's local buffers.
801 */
802 if (RELATION_IS_OTHER_TEMP(reln))
804 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
805 errmsg("cannot access temporary tables of other sessions")));
806
807 /*
808 * Read the buffer, and update pgstat counters to reflect a cache hit or
809 * miss.
810 */
811 buf = ReadBuffer_common(reln, RelationGetSmgr(reln), 0,
812 forkNum, blockNum, mode, strategy);
813
814 return buf;
815}

References buf, ereport, errcode(), errmsg(), ERROR, mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), bt_recheck_sibling_links(), btvacuumpage(), count_nondeletable_pages(), create_toy_buffer(), fsm_readbuf(), get_raw_page_internal(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), statapprox_heap(), and vm_readbuf().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 829 of file bufmgr.c.

832{
833 SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
834
835 return ReadBuffer_common(NULL, smgr,
836 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
837 forkNum, blockNum,
838 mode, strategy);
839}

References INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 684 of file bufmgr.c.

686{
687 BufferDesc *bufHdr;
688 BufferTag tag;
689 uint32 buf_state;
690
691 Assert(BufferIsValid(recent_buffer));
692
695 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
696
697 if (BufferIsLocal(recent_buffer))
698 {
699 int b = -recent_buffer - 1;
700
701 bufHdr = GetLocalBufferDescriptor(b);
702 buf_state = pg_atomic_read_u32(&bufHdr->state);
703
704 /* Is it still valid and holding the right tag? */
705 if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
706 {
707 PinLocalBuffer(bufHdr, true);
708
710
711 return true;
712 }
713 }
714 else
715 {
716 bufHdr = GetBufferDescriptor(recent_buffer - 1);
717
718 /*
719 * Is it still valid and holding the right tag? We do an unlocked tag
720 * comparison first, to make it unlikely that we'll increment the
721 * usage counter of the wrong buffer, if someone calls us with a very
722 * out of date recent_buffer. Then we'll check it again if we get the
723 * pin.
724 */
725 if (BufferTagsEqual(&tag, &bufHdr->tag) &&
726 PinBuffer(bufHdr, NULL, true))
727 {
728 if (BufferTagsEqual(&tag, &bufHdr->tag))
729 {
731 return true;
732 }
733 UnpinBuffer(bufHdr);
734 }
735 }
736
737 return false;
738}
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
Definition: bufmgr.c:3068
int b
Definition: isn.c:74
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_hit
Definition: instrument.h:26

References Assert(), b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), InitBufferTag(), BufferUsage::local_blks_hit, pg_atomic_read_u32(), pgBufferUsage, PinBuffer(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, BufferDesc::state, BufferDesc::tag, and UnpinBuffer().

Referenced by invalidate_rel_block(), and XLogReadBufferExtended().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 4437 of file bufmgr.c.

4438{
4439 if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
4440 {
4441 /*
4442 * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
4443 * tableam returns the size in bytes - but for the purpose of this
4444 * routine, we want the number of blocks. Therefore divide, rounding
4445 * up.
4446 */
4447 uint64 szbytes;
4448
4449 szbytes = table_relation_size(relation, forkNum);
4450
4451 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
4452 }
4453 else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
4454 {
4455 return smgrnblocks(RelationGetSmgr(relation), forkNum);
4456 }
4457 else
4458 Assert(false);
4459
4460 return 0; /* keep compiler quiet */
4461}
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1847

References Assert(), RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 3008 of file bufmgr.c.

3011{
3012 ForkNumber forkNum = MAIN_FORKNUM;
3013 BufferDesc *bufHdr;
3014
3015 if (BufferIsValid(buffer))
3016 {
3017 Assert(BufferIsPinned(buffer));
3018 if (BufferIsLocal(buffer))
3019 {
3020 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3021 if (bufHdr->tag.blockNum == blockNum &&
3022 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3023 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3024 return buffer;
3025 UnpinLocalBuffer(buffer);
3026 }
3027 else
3028 {
3029 bufHdr = GetBufferDescriptor(buffer - 1);
3030 /* we have pin, so it's ok to examine tag without spinlock */
3031 if (bufHdr->tag.blockNum == blockNum &&
3032 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3033 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3034 return buffer;
3035 UnpinBuffer(bufHdr);
3036 }
3037 }
3038
3039 return ReadBuffer(relation, blockNum);
3040}
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), BufferDesc::tag, UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), and heapam_index_fetch_tuple().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 5366 of file bufmgr.c.

5367{
5368 if (!BufferIsValid(buffer))
5369 elog(ERROR, "bad buffer ID: %d", buffer);
5370
5371 if (BufferIsLocal(buffer))
5372 UnpinLocalBuffer(buffer);
5373 else
5374 UnpinBuffer(GetBufferDescriptor(buffer - 1));
5375}

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), BitmapHeapScanNextBlock(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), buffer_create_toy(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndIndexOnlyScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_search(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), grow_rel(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_fetch_next_buffer(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgettup(), heapgettup_pagemode(), invalidate_rel_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), modify_rel_block(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pg_visibility_map_summary(), pgstatindex_impl(), read_rel_block_ll(), read_stream_reset(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ StartReadBuffer()

bool StartReadBuffer ( ReadBuffersOperation operation,
Buffer buffer,
BlockNumber  blocknum,
int  flags 
)

Definition at line 1489 of file bufmgr.c.

1493{
1494 int nblocks = 1;
1495 bool result;
1496
1497 result = StartReadBuffersImpl(operation, buffer, blocknum, &nblocks, flags,
1498 false /* single block, no forwarding */ );
1499 Assert(nblocks == 1); /* single block can't be short */
1500
1501 return result;
1502}
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
Definition: bufmgr.c:1243

References Assert(), PrivateRefCountEntry::buffer, and StartReadBuffersImpl().

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ StartReadBuffers()

bool StartReadBuffers ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int *  nblocks,
int  flags 
)

Definition at line 1470 of file bufmgr.c.

1475{
1476 return StartReadBuffersImpl(operation, buffers, blockNum, nblocks, flags,
1477 true /* expect forwarded buffers */ );
1478}

References StartReadBuffersImpl().

Referenced by read_stream_start_pending_read().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 5573 of file bufmgr.c.

5574{
5576
5577 if (buf)
5578 {
5579 uint32 buf_state;
5580 uint32 unset_bits = 0;
5581
5582 buf_state = LockBufHdr(buf);
5583
5584 /*
5585 * Don't complain if flag bit not set; it could have been reset but we
5586 * got a cancel/die interrupt before getting the signal.
5587 */
5588 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5589 buf->wait_backend_pgprocno == MyProcNumber)
5590 unset_bits = BM_PIN_COUNT_WAITER;
5591
5592 UnlockBufHdrExt(buf, buf_state,
5593 0, unset_bits,
5594 0);
5595
5596 PinCountWaitBuf = NULL;
5597 }
5598}

References BM_PIN_COUNT_WAITER, buf, LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdrExt().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 5383 of file bufmgr.c.

5384{
5386 ReleaseBuffer(buffer);
5387}

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), gin_refind_parent(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_get_sequence_data(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ WaitReadBuffers()

void WaitReadBuffers ( ReadBuffersOperation operation)

Definition at line 1613 of file bufmgr.c.

1614{
1615 PgAioReturn *aio_ret = &operation->io_return;
1616 IOContext io_context;
1617 IOObject io_object;
1618
1619 if (operation->persistence == RELPERSISTENCE_TEMP)
1620 {
1621 io_context = IOCONTEXT_NORMAL;
1622 io_object = IOOBJECT_TEMP_RELATION;
1623 }
1624 else
1625 {
1626 io_context = IOContextForStrategy(operation->strategy);
1627 io_object = IOOBJECT_RELATION;
1628 }
1629
1630 /*
1631 * If we get here without an IO operation having been issued, the
1632 * io_method == IOMETHOD_SYNC path must have been used. Otherwise the
1633 * caller should not have called WaitReadBuffers().
1634 *
1635 * In the case of IOMETHOD_SYNC, we start - as we used to before the
1636 * introducing of AIO - the IO in WaitReadBuffers(). This is done as part
1637 * of the retry logic below, no extra code is required.
1638 *
1639 * This path is expected to eventually go away.
1640 */
1641 if (!pgaio_wref_valid(&operation->io_wref) && io_method != IOMETHOD_SYNC)
1642 elog(ERROR, "waiting for read operation that didn't read");
1643
1644 /*
1645 * To handle partial reads, and IOMETHOD_SYNC, we re-issue IO until we're
1646 * done. We may need multiple retries, not just because we could get
1647 * multiple partial reads, but also because some of the remaining
1648 * to-be-read buffers may have been read in by other backends, limiting
1649 * the IO size.
1650 */
1651 while (true)
1652 {
1653 int ignored_nblocks_progress;
1654
1655 CheckReadBuffersOperation(operation, false);
1656
1657 /*
1658 * If there is an IO associated with the operation, we may need to
1659 * wait for it.
1660 */
1661 if (pgaio_wref_valid(&operation->io_wref))
1662 {
1663 /*
1664 * Track the time spent waiting for the IO to complete. As
1665 * tracking a wait even if we don't actually need to wait
1666 *
1667 * a) is not cheap, due to the timestamping overhead
1668 *
1669 * b) reports some time as waiting, even if we never waited
1670 *
1671 * we first check if we already know the IO is complete.
1672 */
1673 if (aio_ret->result.status == PGAIO_RS_UNKNOWN &&
1674 !pgaio_wref_check_done(&operation->io_wref))
1675 {
1677
1678 pgaio_wref_wait(&operation->io_wref);
1679
1680 /*
1681 * The IO operation itself was already counted earlier, in
1682 * AsyncReadBuffers(), this just accounts for the wait time.
1683 */
1684 pgstat_count_io_op_time(io_object, io_context, IOOP_READ,
1685 io_start, 0, 0);
1686 }
1687 else
1688 {
1689 Assert(pgaio_wref_check_done(&operation->io_wref));
1690 }
1691
1692 /*
1693 * We now are sure the IO completed. Check the results. This
1694 * includes reporting on errors if there were any.
1695 */
1696 ProcessReadBuffersResult(operation);
1697 }
1698
1699 /*
1700 * Most of the time, the one IO we already started, will read in
1701 * everything. But we need to deal with partial reads and buffers not
1702 * needing IO anymore.
1703 */
1704 if (operation->nblocks_done == operation->nblocks)
1705 break;
1706
1708
1709 /*
1710 * This may only complete the IO partially, either because some
1711 * buffers were already valid, or because of a partial read.
1712 *
1713 * NB: In contrast to after the AsyncReadBuffers() call in
1714 * StartReadBuffers(), we do *not* reduce
1715 * ReadBuffersOperation->nblocks here, callers expect the full
1716 * operation to be completed at this point (as more operations may
1717 * have been queued).
1718 */
1719 AsyncReadBuffers(operation, &ignored_nblocks_progress);
1720 }
1721
1722 CheckReadBuffersOperation(operation, true);
1723
1724 /* NB: READ_DONE tracepoint was already executed in completion callback */
1725}
int io_method
Definition: aio.c:74
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition: aio.c:971
bool pgaio_wref_check_done(PgAioWaitRef *iow)
Definition: aio.c:1005
void pgaio_wref_wait(PgAioWaitRef *iow)
Definition: aio.c:991
@ IOMETHOD_SYNC
Definition: aio.h:34
@ PGAIO_RS_UNKNOWN
Definition: aio_types.h:80
bool track_io_timing
Definition: bufmgr.c:147
static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete)
Definition: bufmgr.c:1508
static void ProcessReadBuffersResult(ReadBuffersOperation *operation)
Definition: bufmgr.c:1574
static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
Definition: bufmgr.c:1745
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:747
IOObject
Definition: pgstat.h:276
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:278
IOContext
Definition: pgstat.h:285
@ IOOP_READ
Definition: pgstat.h:315
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:122
uint32 status
Definition: aio_types.h:108
PgAioResult result
Definition: aio_types.h:132
PgAioWaitRef io_wref
Definition: bufmgr.h:150
BufferAccessStrategy strategy
Definition: bufmgr.h:138
PgAioReturn io_return
Definition: bufmgr.h:151

References Assert(), AsyncReadBuffers(), CHECK_FOR_INTERRUPTS, CheckReadBuffersOperation(), elog, ERROR, io_method, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOMETHOD_SYNC, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_READ, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_RS_UNKNOWN, pgaio_wref_check_done(), pgaio_wref_valid(), pgaio_wref_wait(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), ProcessReadBuffersResult(), PgAioReturn::result, PgAioResult::status, ReadBuffersOperation::strategy, and track_io_timing.

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

Variable Documentation

◆ aio_local_buffer_readv_cb

PGDLLIMPORT const PgAioHandleCallbacks aio_local_buffer_readv_cb
extern

Definition at line 7645 of file bufmgr.c.

◆ aio_shared_buffer_readv_cb

PGDLLIMPORT const PgAioHandleCallbacks aio_shared_buffer_readv_cb
extern

Definition at line 7636 of file bufmgr.c.

◆ backend_flush_after

PGDLLIMPORT int backend_flush_after
extern

Definition at line 180 of file bufmgr.c.

Referenced by BufferManagerShmemInit().

◆ bgwriter_flush_after

PGDLLIMPORT int bgwriter_flush_after
extern

Definition at line 179 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

PGDLLIMPORT int bgwriter_lru_maxpages
extern

Definition at line 145 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

PGDLLIMPORT double bgwriter_lru_multiplier
extern

Definition at line 146 of file bufmgr.c.

Referenced by BgBufferSync().

◆ BufferBlocks

PGDLLIMPORT char* BufferBlocks
extern

Definition at line 22 of file buf_init.c.

Referenced by BufferGetBlock(), and BufferManagerShmemInit().

◆ checkpoint_flush_after

PGDLLIMPORT int checkpoint_flush_after
extern

Definition at line 178 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

PGDLLIMPORT int effective_io_concurrency
extern

◆ io_combine_limit

◆ io_combine_limit_guc

PGDLLIMPORT int io_combine_limit_guc
extern

Definition at line 171 of file bufmgr.c.

Referenced by assign_io_max_combine_limit().

◆ io_max_combine_limit

◆ LocalBufferBlockPointers

PGDLLIMPORT Block* LocalBufferBlockPointers
extern

Definition at line 48 of file localbuf.c.

Referenced by BufferGetBlock(), and InitLocalBuffers().

◆ LocalRefCount

◆ maintenance_io_concurrency

◆ NBuffers

◆ NLocBuffer

◆ track_io_timing

◆ zero_damaged_pages

PGDLLIMPORT bool zero_damaged_pages
extern

Definition at line 144 of file bufmgr.c.

Referenced by AsyncReadBuffers(), mdreadv(), and read_rel_block_ll().