PostgreSQL Source Code git master
bufmgr.h File Reference
#include "port/pg_iovec.h"
#include "storage/aio_types.h"
#include "storage/block.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "storage/relfilelocator.h"
#include "utils/relcache.h"
#include "utils/snapmgr.h"
Include dependency graph for bufmgr.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  PrefetchBufferResult
 
struct  BufferManagerRelation
 
struct  ReadBuffersOperation
 

Macros

#define BMR_REL(p_rel)    ((BufferManagerRelation){.rel = p_rel})
 
#define BMR_SMGR(p_smgr, p_relpersistence)    ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence})
 
#define BMR_GET_SMGR(bmr)    (RelationIsValid((bmr).rel) ? RelationGetSmgr((bmr).rel) : (bmr).smgr)
 
#define READ_BUFFERS_ZERO_ON_ERROR   (1 << 0)
 
#define READ_BUFFERS_ISSUE_ADVICE   (1 << 1)
 
#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES   (1 << 2)
 
#define READ_BUFFERS_SYNCHRONOUSLY   (1 << 3)
 
#define DEFAULT_EFFECTIVE_IO_CONCURRENCY   16
 
#define DEFAULT_MAINTENANCE_IO_CONCURRENCY   16
 
#define MAX_IO_COMBINE_LIMIT   PG_IOV_MAX
 
#define DEFAULT_IO_COMBINE_LIMIT   Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)
 
#define MAX_IO_CONCURRENCY   1000
 
#define P_NEW   InvalidBlockNumber /* grow the file to get a new page */
 
#define RelationGetNumberOfBlocks(reln)    RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
 

Typedefs

typedef void * Block
 
typedef enum BufferAccessStrategyType BufferAccessStrategyType
 
typedef struct PrefetchBufferResult PrefetchBufferResult
 
typedef enum ExtendBufferedFlags ExtendBufferedFlags
 
typedef struct SMgrRelationDataSMgrRelation
 
typedef struct BufferManagerRelation BufferManagerRelation
 
typedef struct ReadBuffersOperation ReadBuffersOperation
 
typedef struct WritebackContext WritebackContext
 
typedef enum BufferLockMode BufferLockMode
 

Enumerations

enum  BufferAccessStrategyType { BAS_NORMAL , BAS_BULKREAD , BAS_BULKWRITE , BAS_VACUUM }
 
enum  ReadBufferMode {
  RBM_NORMAL , RBM_ZERO_AND_LOCK , RBM_ZERO_AND_CLEANUP_LOCK , RBM_ZERO_ON_ERROR ,
  RBM_NORMAL_NO_LOG
}
 
enum  ExtendBufferedFlags {
  EB_SKIP_EXTENSION_LOCK = (1 << 0) , EB_PERFORMING_RECOVERY = (1 << 1) , EB_CREATE_FORK_IF_NEEDED = (1 << 2) , EB_LOCK_FIRST = (1 << 3) ,
  EB_CLEAR_SIZE_CACHE = (1 << 4) , EB_LOCK_TARGET = (1 << 5)
}
 
enum  BufferLockMode { BUFFER_LOCK_UNLOCK , BUFFER_LOCK_SHARE , BUFFER_LOCK_EXCLUSIVE }
 

Functions

PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
bool BufferIsLockedByMe (Buffer buffer)
 
bool BufferIsLockedByMeInMode (Buffer buffer, BufferLockMode mode)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
void InitBufferManagerAccess (void)
 
void AtEOXact_Buffers (bool isCommit)
 
char * DebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
void FlushOneBuffer (Buffer buffer)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, BufferLockMode mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool BgBufferSync (WritebackContext *wb_context)
 
uint32 GetPinLimit (void)
 
uint32 GetLocalPinLimit (void)
 
uint32 GetAdditionalPinLimit (void)
 
uint32 GetAdditionalLocalPinLimit (void)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
void LimitAdditionalLocalPins (uint32 *additional_pins)
 
bool EvictUnpinnedBuffer (Buffer buf, bool *buffer_flushed)
 
void EvictAllUnpinnedBuffers (int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
void EvictRelUnpinnedBuffers (Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
bool MarkDirtyUnpinnedBuffer (Buffer buf, bool *buffer_already_dirty)
 
void MarkDirtyRelUnpinnedBuffers (Relation rel, int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
void MarkDirtyAllUnpinnedBuffers (int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
void BufferManagerShmemInit (void)
 
Size BufferManagerShmemSize (void)
 
void AtProcExit_LocalBuffers (void)
 
BufferAccessStrategy GetAccessStrategy (BufferAccessStrategyType btype)
 
BufferAccessStrategy GetAccessStrategyWithSize (BufferAccessStrategyType btype, int ring_size_kb)
 
int GetAccessStrategyBufferCount (BufferAccessStrategy strategy)
 
int GetAccessStrategyPinLimit (BufferAccessStrategy strategy)
 
void FreeAccessStrategy (BufferAccessStrategy strategy)
 
static bool BufferIsValid (Buffer bufnum)
 
static Block BufferGetBlock (Buffer buffer)
 
static Size BufferGetPageSize (Buffer buffer)
 
static Page BufferGetPage (Buffer buffer)
 

Variables

PGDLLIMPORT int NBuffers
 
PGDLLIMPORT bool zero_damaged_pages
 
PGDLLIMPORT int bgwriter_lru_maxpages
 
PGDLLIMPORT double bgwriter_lru_multiplier
 
PGDLLIMPORT bool track_io_timing
 
PGDLLIMPORT int effective_io_concurrency
 
PGDLLIMPORT int maintenance_io_concurrency
 
PGDLLIMPORT int io_combine_limit
 
PGDLLIMPORT int io_combine_limit_guc
 
PGDLLIMPORT int io_max_combine_limit
 
PGDLLIMPORT int checkpoint_flush_after
 
PGDLLIMPORT int backend_flush_after
 
PGDLLIMPORT int bgwriter_flush_after
 
PGDLLIMPORT const PgAioHandleCallbacks aio_shared_buffer_readv_cb
 
PGDLLIMPORT const PgAioHandleCallbacks aio_local_buffer_readv_cb
 
PGDLLIMPORT char * BufferBlocks
 
PGDLLIMPORT int NLocBuffer
 
PGDLLIMPORT BlockLocalBufferBlockPointers
 
PGDLLIMPORT int32LocalRefCount
 

Macro Definition Documentation

◆ BMR_GET_SMGR

#define BMR_GET_SMGR (   bmr)     (RelationIsValid((bmr).rel) ? RelationGetSmgr((bmr).rel) : (bmr).smgr)

Definition at line 118 of file bufmgr.h.

◆ BMR_REL

#define BMR_REL (   p_rel)     ((BufferManagerRelation){.rel = p_rel})

Definition at line 114 of file bufmgr.h.

◆ BMR_SMGR

#define BMR_SMGR (   p_smgr,
  p_relpersistence 
)     ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence})

Definition at line 116 of file bufmgr.h.

◆ DEFAULT_EFFECTIVE_IO_CONCURRENCY

#define DEFAULT_EFFECTIVE_IO_CONCURRENCY   16

Definition at line 168 of file bufmgr.h.

◆ DEFAULT_IO_COMBINE_LIMIT

#define DEFAULT_IO_COMBINE_LIMIT   Min(MAX_IO_COMBINE_LIMIT, (128 * 1024) / BLCKSZ)

Definition at line 174 of file bufmgr.h.

◆ DEFAULT_MAINTENANCE_IO_CONCURRENCY

#define DEFAULT_MAINTENANCE_IO_CONCURRENCY   16

Definition at line 169 of file bufmgr.h.

◆ MAX_IO_COMBINE_LIMIT

#define MAX_IO_COMBINE_LIMIT   PG_IOV_MAX

Definition at line 173 of file bufmgr.h.

◆ MAX_IO_CONCURRENCY

#define MAX_IO_CONCURRENCY   1000

Definition at line 195 of file bufmgr.h.

◆ P_NEW

#define P_NEW   InvalidBlockNumber /* grow the file to get a new page */

Definition at line 198 of file bufmgr.h.

◆ READ_BUFFERS_IGNORE_CHECKSUM_FAILURES

#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES   (1 << 2)

Definition at line 126 of file bufmgr.h.

◆ READ_BUFFERS_ISSUE_ADVICE

#define READ_BUFFERS_ISSUE_ADVICE   (1 << 1)

Definition at line 124 of file bufmgr.h.

◆ READ_BUFFERS_SYNCHRONOUSLY

#define READ_BUFFERS_SYNCHRONOUSLY   (1 << 3)

Definition at line 128 of file bufmgr.h.

◆ READ_BUFFERS_ZERO_ON_ERROR

#define READ_BUFFERS_ZERO_ON_ERROR   (1 << 0)

Definition at line 122 of file bufmgr.h.

◆ RelationGetNumberOfBlocks

#define RelationGetNumberOfBlocks (   reln)     RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)

Definition at line 294 of file bufmgr.h.

Typedef Documentation

◆ Block

typedef void* Block

Definition at line 26 of file bufmgr.h.

◆ BufferAccessStrategyType

◆ BufferLockMode

◆ BufferManagerRelation

◆ ExtendBufferedFlags

◆ PrefetchBufferResult

◆ ReadBuffersOperation

Definition at line 154 of file bufmgr.h.

◆ SMgrRelation

typedef struct SMgrRelationData* SMgrRelation

Definition at line 97 of file bufmgr.h.

◆ WritebackContext

Definition at line 157 of file bufmgr.h.

Enumeration Type Documentation

◆ BufferAccessStrategyType

Enumerator
BAS_NORMAL 
BAS_BULKREAD 
BAS_BULKWRITE 
BAS_VACUUM 

Definition at line 34 of file bufmgr.h.

35{
36 BAS_NORMAL, /* Normal random access */
37 BAS_BULKREAD, /* Large read-only scan (hint bit updates are
38 * ok) */
39 BAS_BULKWRITE, /* Large multi-block write (e.g. COPY IN) */
40 BAS_VACUUM, /* VACUUM */
BufferAccessStrategyType
Definition: bufmgr.h:35
@ BAS_BULKREAD
Definition: bufmgr.h:37
@ BAS_NORMAL
Definition: bufmgr.h:36
@ BAS_VACUUM
Definition: bufmgr.h:40
@ BAS_BULKWRITE
Definition: bufmgr.h:39

◆ BufferLockMode

Enumerator
BUFFER_LOCK_UNLOCK 
BUFFER_LOCK_SHARE 
BUFFER_LOCK_EXCLUSIVE 

Definition at line 203 of file bufmgr.h.

204{
BufferLockMode
Definition: bufmgr.h:204
@ BUFFER_LOCK_SHARE
Definition: bufmgr.h:206
@ BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:207
@ BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:205

◆ ExtendBufferedFlags

Enumerator
EB_SKIP_EXTENSION_LOCK 
EB_PERFORMING_RECOVERY 
EB_CREATE_FORK_IF_NEEDED 
EB_LOCK_FIRST 
EB_CLEAR_SIZE_CACHE 
EB_LOCK_TARGET 

Definition at line 68 of file bufmgr.h.

69{
70 /*
71 * Don't acquire extension lock. This is safe only if the relation isn't
72 * shared, an access exclusive lock is held or if this is the startup
73 * process.
74 */
75 EB_SKIP_EXTENSION_LOCK = (1 << 0),
76
77 /* Is this extension part of recovery? */
78 EB_PERFORMING_RECOVERY = (1 << 1),
79
80 /*
81 * Should the fork be created if it does not currently exist? This likely
82 * only ever makes sense for relation forks.
83 */
84 EB_CREATE_FORK_IF_NEEDED = (1 << 2),
85
86 /* Should the first (possibly only) return buffer be returned locked? */
87 EB_LOCK_FIRST = (1 << 3),
88
89 /* Should the smgr size cache be cleared? */
90 EB_CLEAR_SIZE_CACHE = (1 << 4),
91
92 /* internal flags follow */
93 EB_LOCK_TARGET = (1 << 5),
ExtendBufferedFlags
Definition: bufmgr.h:69
@ EB_LOCK_TARGET
Definition: bufmgr.h:93
@ EB_CLEAR_SIZE_CACHE
Definition: bufmgr.h:90
@ EB_PERFORMING_RECOVERY
Definition: bufmgr.h:78
@ EB_CREATE_FORK_IF_NEEDED
Definition: bufmgr.h:84
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:75
@ EB_LOCK_FIRST
Definition: bufmgr.h:87

◆ ReadBufferMode

Enumerator
RBM_NORMAL 
RBM_ZERO_AND_LOCK 
RBM_ZERO_AND_CLEANUP_LOCK 
RBM_ZERO_ON_ERROR 
RBM_NORMAL_NO_LOG 

Definition at line 44 of file bufmgr.h.

45{
46 RBM_NORMAL, /* Normal read */
47 RBM_ZERO_AND_LOCK, /* Don't read from disk, caller will
48 * initialize. Also locks the page. */
49 RBM_ZERO_AND_CLEANUP_LOCK, /* Like RBM_ZERO_AND_LOCK, but locks the page
50 * in "cleanup" mode */
51 RBM_ZERO_ON_ERROR, /* Read, but return an all-zeros page on error */
52 RBM_NORMAL_NO_LOG, /* Don't log page as invalid during WAL
53 * replay; otherwise same as RBM_NORMAL */
ReadBufferMode
Definition: bufmgr.h:45
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:51
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:49
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:47
@ RBM_NORMAL
Definition: bufmgr.h:46
@ RBM_NORMAL_NO_LOG
Definition: bufmgr.h:52

Function Documentation

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 4084 of file bufmgr.c.

4085{
4087
4088 AtEOXact_LocalBuffers(isCommit);
4089
4091}
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:4154
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:241
Assert(PointerIsAligned(start, uint64))
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:1003

References Assert(), AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_LocalBuffers()

void AtProcExit_LocalBuffers ( void  )

Definition at line 1014 of file localbuf.c.

1015{
1016 /*
1017 * We shouldn't be holding any remaining pins; if we are, and assertions
1018 * aren't enabled, we'll fail later in DropRelationBuffers while trying to
1019 * drop the temp rels.
1020 */
1022}
static void CheckForLocalBufferLeaks(void)
Definition: localbuf.c:970

References CheckForLocalBufferLeaks().

Referenced by AtProcExit_Buffers().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3716 of file bufmgr.c.

3717{
3718 /* info obtained from freelist.c */
3719 int strategy_buf_id;
3720 uint32 strategy_passes;
3721 uint32 recent_alloc;
3722
3723 /*
3724 * Information saved between calls so we can determine the strategy
3725 * point's advance rate and avoid scanning already-cleaned buffers.
3726 */
3727 static bool saved_info_valid = false;
3728 static int prev_strategy_buf_id;
3729 static uint32 prev_strategy_passes;
3730 static int next_to_clean;
3731 static uint32 next_passes;
3732
3733 /* Moving averages of allocation rate and clean-buffer density */
3734 static float smoothed_alloc = 0;
3735 static float smoothed_density = 10.0;
3736
3737 /* Potentially these could be tunables, but for now, not */
3738 float smoothing_samples = 16;
3739 float scan_whole_pool_milliseconds = 120000.0;
3740
3741 /* Used to compute how far we scan ahead */
3742 long strategy_delta;
3743 int bufs_to_lap;
3744 int bufs_ahead;
3745 float scans_per_alloc;
3746 int reusable_buffers_est;
3747 int upcoming_alloc_est;
3748 int min_scan_buffers;
3749
3750 /* Variables for the scanning loop proper */
3751 int num_to_scan;
3752 int num_written;
3753 int reusable_buffers;
3754
3755 /* Variables for final smoothed_density update */
3756 long new_strategy_delta;
3757 uint32 new_recent_alloc;
3758
3759 /*
3760 * Find out where the clock-sweep currently is, and how many buffer
3761 * allocations have happened since our last call.
3762 */
3763 strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
3764
3765 /* Report buffer alloc counts to pgstat */
3766 PendingBgWriterStats.buf_alloc += recent_alloc;
3767
3768 /*
3769 * If we're not running the LRU scan, just stop after doing the stats
3770 * stuff. We mark the saved state invalid so that we can recover sanely
3771 * if LRU scan is turned back on later.
3772 */
3773 if (bgwriter_lru_maxpages <= 0)
3774 {
3775 saved_info_valid = false;
3776 return true;
3777 }
3778
3779 /*
3780 * Compute strategy_delta = how many buffers have been scanned by the
3781 * clock-sweep since last time. If first time through, assume none. Then
3782 * see if we are still ahead of the clock-sweep, and if so, how many
3783 * buffers we could scan before we'd catch up with it and "lap" it. Note:
3784 * weird-looking coding of xxx_passes comparisons are to avoid bogus
3785 * behavior when the passes counts wrap around.
3786 */
3787 if (saved_info_valid)
3788 {
3789 int32 passes_delta = strategy_passes - prev_strategy_passes;
3790
3791 strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3792 strategy_delta += (long) passes_delta * NBuffers;
3793
3794 Assert(strategy_delta >= 0);
3795
3796 if ((int32) (next_passes - strategy_passes) > 0)
3797 {
3798 /* we're one pass ahead of the strategy point */
3799 bufs_to_lap = strategy_buf_id - next_to_clean;
3800#ifdef BGW_DEBUG
3801 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3802 next_passes, next_to_clean,
3803 strategy_passes, strategy_buf_id,
3804 strategy_delta, bufs_to_lap);
3805#endif
3806 }
3807 else if (next_passes == strategy_passes &&
3808 next_to_clean >= strategy_buf_id)
3809 {
3810 /* on same pass, but ahead or at least not behind */
3811 bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
3812#ifdef BGW_DEBUG
3813 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3814 next_passes, next_to_clean,
3815 strategy_passes, strategy_buf_id,
3816 strategy_delta, bufs_to_lap);
3817#endif
3818 }
3819 else
3820 {
3821 /*
3822 * We're behind, so skip forward to the strategy point and start
3823 * cleaning from there.
3824 */
3825#ifdef BGW_DEBUG
3826 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3827 next_passes, next_to_clean,
3828 strategy_passes, strategy_buf_id,
3829 strategy_delta);
3830#endif
3831 next_to_clean = strategy_buf_id;
3832 next_passes = strategy_passes;
3833 bufs_to_lap = NBuffers;
3834 }
3835 }
3836 else
3837 {
3838 /*
3839 * Initializing at startup or after LRU scanning had been off. Always
3840 * start at the strategy point.
3841 */
3842#ifdef BGW_DEBUG
3843 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3844 strategy_passes, strategy_buf_id);
3845#endif
3846 strategy_delta = 0;
3847 next_to_clean = strategy_buf_id;
3848 next_passes = strategy_passes;
3849 bufs_to_lap = NBuffers;
3850 }
3851
3852 /* Update saved info for next time */
3853 prev_strategy_buf_id = strategy_buf_id;
3854 prev_strategy_passes = strategy_passes;
3855 saved_info_valid = true;
3856
3857 /*
3858 * Compute how many buffers had to be scanned for each new allocation, ie,
3859 * 1/density of reusable buffers, and track a moving average of that.
3860 *
3861 * If the strategy point didn't move, we don't update the density estimate
3862 */
3863 if (strategy_delta > 0 && recent_alloc > 0)
3864 {
3865 scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
3866 smoothed_density += (scans_per_alloc - smoothed_density) /
3867 smoothing_samples;
3868 }
3869
3870 /*
3871 * Estimate how many reusable buffers there are between the current
3872 * strategy point and where we've scanned ahead to, based on the smoothed
3873 * density estimate.
3874 */
3875 bufs_ahead = NBuffers - bufs_to_lap;
3876 reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3877
3878 /*
3879 * Track a moving average of recent buffer allocations. Here, rather than
3880 * a true average we want a fast-attack, slow-decline behavior: we
3881 * immediately follow any increase.
3882 */
3883 if (smoothed_alloc <= (float) recent_alloc)
3884 smoothed_alloc = recent_alloc;
3885 else
3886 smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3887 smoothing_samples;
3888
3889 /* Scale the estimate by a GUC to allow more aggressive tuning. */
3890 upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
3891
3892 /*
3893 * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3894 * eventually underflow to zero, and the underflows produce annoying
3895 * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3896 * zero, there's no point in tracking smaller and smaller values of
3897 * smoothed_alloc, so just reset it to exactly zero to avoid this
3898 * syndrome. It will pop back up as soon as recent_alloc increases.
3899 */
3900 if (upcoming_alloc_est == 0)
3901 smoothed_alloc = 0;
3902
3903 /*
3904 * Even in cases where there's been little or no buffer allocation
3905 * activity, we want to make a small amount of progress through the buffer
3906 * cache so that as many reusable buffers as possible are clean after an
3907 * idle period.
3908 *
3909 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3910 * the BGW will be called during the scan_whole_pool time; slice the
3911 * buffer pool into that many sections.
3912 */
3913 min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3914
3915 if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3916 {
3917#ifdef BGW_DEBUG
3918 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3919 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3920#endif
3921 upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3922 }
3923
3924 /*
3925 * Now write out dirty reusable buffers, working forward from the
3926 * next_to_clean point, until we have lapped the strategy scan, or cleaned
3927 * enough buffers to match our estimate of the next cycle's allocation
3928 * requirements, or hit the bgwriter_lru_maxpages limit.
3929 */
3930
3931 num_to_scan = bufs_to_lap;
3932 num_written = 0;
3933 reusable_buffers = reusable_buffers_est;
3934
3935 /* Execute the LRU scan */
3936 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3937 {
3938 int sync_state = SyncOneBuffer(next_to_clean, true,
3939 wb_context);
3940
3941 if (++next_to_clean >= NBuffers)
3942 {
3943 next_to_clean = 0;
3944 next_passes++;
3945 }
3946 num_to_scan--;
3947
3948 if (sync_state & BUF_WRITTEN)
3949 {
3950 reusable_buffers++;
3951 if (++num_written >= bgwriter_lru_maxpages)
3952 {
3954 break;
3955 }
3956 }
3957 else if (sync_state & BUF_REUSABLE)
3958 reusable_buffers++;
3959 }
3960
3962
3963#ifdef BGW_DEBUG
3964 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3965 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3966 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3967 bufs_to_lap - num_to_scan,
3968 num_written,
3969 reusable_buffers - reusable_buffers_est);
3970#endif
3971
3972 /*
3973 * Consider the above scan as being like a new allocation scan.
3974 * Characterize its density and update the smoothed one based on it. This
3975 * effectively halves the moving average period in cases where both the
3976 * strategy and the background writer are doing some useful scanning,
3977 * which is helpful because a long memory isn't as desirable on the
3978 * density estimates.
3979 */
3980 new_strategy_delta = bufs_to_lap - num_to_scan;
3981 new_recent_alloc = reusable_buffers - reusable_buffers_est;
3982 if (new_strategy_delta > 0 && new_recent_alloc > 0)
3983 {
3984 scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3985 smoothed_density += (scans_per_alloc - smoothed_density) /
3986 smoothing_samples;
3987
3988#ifdef BGW_DEBUG
3989 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3990 new_recent_alloc, new_strategy_delta,
3991 scans_per_alloc, smoothed_density);
3992#endif
3993 }
3994
3995 /* Return true if OK to hibernate */
3996 return (bufs_to_lap == 0 && recent_alloc == 0);
3997}
int BgWriterDelay
Definition: bgwriter.c:58
#define BUF_REUSABLE
Definition: bufmgr.c:81
double bgwriter_lru_multiplier
Definition: bufmgr.c:168
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:4014
int bgwriter_lru_maxpages
Definition: bufmgr.c:167
#define BUF_WRITTEN
Definition: bufmgr.c:80
int32_t int32
Definition: c.h:548
uint32_t uint32
Definition: c.h:552
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:226
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:321
int NBuffers
Definition: globals.c:142
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition: pgstat.h:242
PgStat_Counter maxwritten_clean
Definition: pgstat.h:243
PgStat_Counter buf_alloc
Definition: pgstat.h:244

References Assert(), bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ BufferGetBlock()

static Block BufferGetBlock ( Buffer  buffer)
inlinestatic

Definition at line 403 of file bufmgr.h.

404{
405 Assert(BufferIsValid(buffer));
406
407 if (BufferIsLocal(buffer))
408 return LocalBufferBlockPointers[-buffer - 1];
409 else
410 return (Block) (BufferBlocks + ((Size) (buffer - 1)) * BLCKSZ);
411}
#define BufferIsLocal(buffer)
Definition: buf.h:37
PGDLLIMPORT Block * LocalBufferBlockPointers
Definition: localbuf.c:48
void * Block
Definition: bufmgr.h:26
PGDLLIMPORT char * BufferBlocks
Definition: buf_init.c:22
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:387
size_t Size
Definition: c.h:624

References Assert(), BufferBlocks, BufferIsLocal, BufferIsValid(), and LocalBufferBlockPointers.

Referenced by AsyncReadBuffers(), buffer_readv_complete_one(), BufferGetPage(), heap_inplace_update_and_unlock(), pg_buffercache_os_pages_internal(), read_rel_block_ll(), and XLogSaveBufferForHint().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 4318 of file bufmgr.c.

4319{
4320 BufferDesc *bufHdr;
4321
4322 Assert(BufferIsPinned(buffer));
4323
4324 if (BufferIsLocal(buffer))
4325 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4326 else
4327 bufHdr = GetBufferDescriptor(buffer - 1);
4328
4329 /* pinned, so OK to read tag without spinlock */
4330 return bufHdr->tag.blockNum;
4331}
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
static BufferDesc * GetBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:577
BufferTag tag
BlockNumber blockNum

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_finish_split(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), AsyncReadBuffers(), BitmapHeapScanNextBlock(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), btvacuumpage(), check_index_page(), CheckReadBuffersOperation(), collect_corrupt_items(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), gistvacuumpage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_would_be_all_visible(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_heap(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum_heap_rel(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), prune_freeze_plan(), read_stream_start_pending_read(), ReadBufferBI(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), StartReadBuffersImpl(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), verify_heapam(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and visibilitymap_set_vmbits().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 4594 of file bufmgr.c.

4595{
4596 char *page = BufferGetPage(buffer);
4597 BufferDesc *bufHdr;
4598 XLogRecPtr lsn;
4599
4600 /*
4601 * If we don't need locking for correctness, fastpath out.
4602 */
4603 if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
4604 return PageGetLSN(page);
4605
4606 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4607 Assert(BufferIsValid(buffer));
4608 Assert(BufferIsPinned(buffer));
4609
4610 bufHdr = GetBufferDescriptor(buffer - 1);
4611 LockBufHdr(bufHdr);
4612 lsn = PageGetLSN(page);
4613 UnlockBufHdr(bufHdr);
4614
4615 return lsn;
4616}
static void UnlockBufHdr(BufferDesc *desc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:6359
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
static XLogRecPtr PageGetLSN(const PageData *page)
Definition: bufpage.h:385
#define XLogHintBitIsNeeded()
Definition: xlog.h:122
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_killitems(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetPage()

static Page BufferGetPage ( Buffer  buffer)
inlinestatic

Definition at line 436 of file bufmgr.h.

437{
438 return (Page) BufferGetBlock(buffer);
439}
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:403
PageData * Page
Definition: bufpage.h:81

References BufferGetBlock().

Referenced by _bt_allocbuf(), _bt_binsrch(), _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_clear_incomplete_split(), _bt_conditionallockbuf(), _bt_dedup_pass(), _bt_delete_or_dedup_one_page(), _bt_delitems_delete(), _bt_delitems_delete_check(), _bt_delitems_vacuum(), _bt_endpoint(), _bt_findinsertloc(), _bt_finish_split(), _bt_get_endpoint(), _bt_getmeta(), _bt_getroot(), _bt_getstackbuf(), _bt_gettrueroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_killitems(), _bt_leftsib_splitflag(), _bt_lock_and_validate_left(), _bt_lock_subtree_parent(), _bt_lockbuf(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_rightsib_halfdeadflag(), _bt_search(), _bt_search_insert(), _bt_set_cleanup_info(), _bt_simpledel_pass(), _bt_split(), _bt_stepright(), _bt_unlink_halfdead_page(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _bt_vacuum_needs_cleanup(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_get_newblock_from_oldbucket(), _hash_get_oldblock_from_newbucket(), _hash_getbucketbuf_from_hashkey(), _hash_getcachedmetap(), _hash_getinitbuf(), _hash_getnewbuf(), _hash_init(), _hash_init_metabuffer(), _hash_initbitmapbuffer(), _hash_initbuf(), _hash_kill_items(), _hash_pgaddmultitup(), _hash_pgaddtup(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), allocNewBuffer(), BitmapHeapScanNextBlock(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_can_do_samepage_update(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_start_evacuating_page(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinSetHeapBlockItemptr(), bt_metap(), bt_page_items_internal(), bt_recheck_sibling_links(), bt_rootdescend(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), btvacuumpage(), BufferGetLSNAtomic(), check_index_page(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), collectMatchesForHeapRow(), count_nondeletable_pages(), createPostingTree(), dataBeginPlaceToPage(), dataBeginPlaceToPageInternal(), dataBeginPlaceToPageLeaf(), dataExecPlaceToPage(), dataExecPlaceToPageInternal(), dataLocateItem(), dataPlaceToPageLeafRecompress(), dataPrepareDownlink(), dataSplitPageInternal(), doPickSplit(), entryExecPlaceToPage(), entryIsEnoughSpace(), entryLoadMoreItems(), entryLocateEntry(), entryLocateLeafEntry(), entryPrepareDownlink(), entrySplitPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), generic_redo(), GenericXLogFinish(), GenericXLogRegisterBuffer(), get_raw_page_internal(), GetBTPageStatistics(), GetRecordedFreeSpace(), GetVisibilityMapPins(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginDeletePage(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), GinInitBuffer(), GinInitMetabuffer(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertData(), ginRedoInsertEntry(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumEntryPage(), ginVacuumPostingTreeLeaf(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), GISTInitBuffer(), gistkillitems(), gistMemorizeAllDownlinks(), gistNewBuffer(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_bitmap_info(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_freeze_prepared_tuples(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_index_delete_tuples(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_page_prune_execute(), heap_page_prune_opt(), heap_page_would_be_all_visible(), heap_pre_freeze_checks(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_validate_scan(), heapam_scan_analyze_next_tuple(), heapam_scan_bitmap_next_tuple(), heapam_scan_sample_next_tuple(), heapgettup_continue_page(), heapgettup_pagemode(), heapgettup_start_page(), index_compute_xid_horizon_for_tuples(), initBloomState(), lazy_scan_heap(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), log_heap_prune_and_freeze(), log_heap_update(), log_newpage_buffer(), log_newpage_range(), log_split_page(), MarkBufferDirtyHint(), modify_rel_block(), moveLeafs(), moveRightIfItNeeded(), nextval_internal(), palloc_btree_page(), pg_get_sequence_data(), pg_visibility(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), prune_freeze_plan(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistInitBuffer(), SpGistNewBuffer(), SpGistSetLastUsedPage(), SpGistUpdateMetaPage(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), terminate_brin_buildstate(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), vm_readbuf(), writeListPage(), XLogCheckBufferNeedsBackup(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), XLogRecordPageWithFreeSpace(), XLogRegisterBuffer(), XLogSaveBufferForHint(), xlogVacuumPage(), and ZeroAndLockBuffer().

◆ BufferGetPageSize()

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 4339 of file bufmgr.c.

4341{
4342 BufferDesc *bufHdr;
4343
4344 /* Do the same checks as BufferGetBlockNumber. */
4345 Assert(BufferIsPinned(buffer));
4346
4347 if (BufferIsLocal(buffer))
4348 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4349 else
4350 bufHdr = GetBufferDescriptor(buffer - 1);
4351
4352 /* pinned, so OK to read tag without spinlock */
4353 *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4354 *forknum = BufTagGetForkNum(&bufHdr->tag);
4355 *blknum = bufHdr->tag.blockNum;
4356}
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), heap_inplace_update_and_unlock(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 3005 of file bufmgr.c.

3006{
3007 BufferDesc *bufHdr;
3008
3009 Assert(BufferIsPinned(buffer));
3010
3011 if (BufferIsLocal(buffer))
3012 {
3013 int bufid = -buffer - 1;
3014
3015 bufHdr = GetLocalBufferDescriptor(bufid);
3016 /* Content locks are not maintained for local buffers. */
3017 }
3018 else
3019 {
3020 bufHdr = GetBufferDescriptor(buffer - 1);
3022 }
3023
3024 return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
3025}
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:237
#define BM_DIRTY
Definition: buf_internals.h:69
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:2963
pg_atomic_uint32 state

References Assert(), BM_DIRTY, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by heap_multi_insert(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), log_heap_prune_and_freeze(), and XLogRegisterBuffer().

◆ BufferIsLockedByMe()

bool BufferIsLockedByMe ( Buffer  buffer)

Definition at line 2937 of file bufmgr.c.

2938{
2939 BufferDesc *bufHdr;
2940
2941 Assert(BufferIsPinned(buffer));
2942
2943 if (BufferIsLocal(buffer))
2944 {
2945 /* Content locks are not maintained for local buffers. */
2946 return true;
2947 }
2948 else
2949 {
2950 bufHdr = GetBufferDescriptor(buffer - 1);
2952 }
2953}
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1981

References Assert(), PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), and LWLockHeldByMe().

Referenced by FlushOneBuffer(), and MarkBufferDirtyHint().

◆ BufferIsLockedByMeInMode()

bool BufferIsLockedByMeInMode ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 2963 of file bufmgr.c.

2964{
2965 BufferDesc *bufHdr;
2966
2967 Assert(BufferIsPinned(buffer));
2968
2969 if (BufferIsLocal(buffer))
2970 {
2971 /* Content locks are not maintained for local buffers. */
2972 return true;
2973 }
2974 else
2975 {
2976 LWLockMode lw_mode;
2977
2978 switch (mode)
2979 {
2981 lw_mode = LW_EXCLUSIVE;
2982 break;
2983 case BUFFER_LOCK_SHARE:
2984 lw_mode = LW_SHARED;
2985 break;
2986 default:
2988 }
2989
2990 bufHdr = GetBufferDescriptor(buffer - 1);
2992 lw_mode);
2993 }
2994}
#define pg_unreachable()
Definition: c.h:347
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2025
LWLockMode
Definition: lwlock.h:111
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
static PgChecksumMode mode
Definition: pg_checksums.c:56

References Assert(), PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockHeldByMeInMode(), mode, and pg_unreachable.

Referenced by BufferIsDirty(), IsBufferCleanupOK(), MarkBufferDirty(), visibilitymap_set(), visibilitymap_set_vmbits(), and XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 4564 of file bufmgr.c.

4565{
4566 BufferDesc *bufHdr;
4567
4568 /* Local buffers are used only for temp relations. */
4569 if (BufferIsLocal(buffer))
4570 return false;
4571
4572 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4573 Assert(BufferIsValid(buffer));
4574 Assert(BufferIsPinned(buffer));
4575
4576 /*
4577 * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
4578 * need not bother with the buffer header spinlock. Even if someone else
4579 * changes the buffer header state while we're doing this, the state is
4580 * changed atomically, so we'll read the old value or the new value, but
4581 * not random garbage.
4582 */
4583 bufHdr = GetBufferDescriptor(buffer - 1);
4584 return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
4585}
#define BM_PERMANENT
Definition: buf_internals.h:77

References Assert(), BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

◆ BufferIsValid()

static bool BufferIsValid ( Buffer  bufnum)
inlinestatic

Definition at line 387 of file bufmgr.h.

388{
389 Assert(bufnum <= NBuffers);
390 Assert(bufnum >= -NLocBuffer);
391
392 return bufnum != InvalidBuffer;
393}
#define InvalidBuffer
Definition: buf.h:25
PGDLLIMPORT int NBuffers
Definition: globals.c:142
PGDLLIMPORT int NLocBuffer
Definition: localbuf.c:45

References Assert(), InvalidBuffer, NBuffers, and NLocBuffer.

Referenced by _bt_clear_incomplete_split(), _bt_endpoint(), _bt_first(), _bt_get_endpoint(), _bt_insertonpg(), _bt_relandgetbuf(), _bt_search(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_dropscanbuf(), _hash_freeovflpage(), _hash_getbucketbuf_from_hashkey(), _hash_getcachedmetap(), _hash_readnext(), _hash_readpage(), _hash_readprev(), BitmapHeapScanNextBlock(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinGetTupleForHeapBlock(), brininsert(), brinsummarize(), bt_recheck_sibling_links(), bt_rootdescend(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), btvacuumscan(), buffer_readv_complete(), BufferGetBlock(), BufferGetLSNAtomic(), BufferGetPageSize(), BufferIsPermanent(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), doPickSplit(), entryGetItem(), entryLoadMoreItems(), EvictUnpinnedBuffer(), ExecStoreBufferHeapTuple(), ExecStorePinnedBufferHeapTuple(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_vacuum_page(), generic_redo(), GetPrivateRefCount(), GetPrivateRefCountEntry(), GetRecordedFreeSpace(), GetVisibilityMapPins(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoDeletePage(), ginRedoInsert(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginScanToDelete(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageUpdateRecord(), gistvacuumscan(), gistXLogSplit(), gistXLogUpdate(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_endscan(), heap_fetch_next_buffer(), heap_index_delete_tuples(), heap_inplace_lock(), heap_lock_tuple(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_analyze_next_block(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapam_tuple_satisfies_snapshot(), heapgettup(), heapgettup_continue_page(), heapgettup_pagemode(), heapgettup_start_page(), invalidate_rel_block(), IsBufferCleanupOK(), lazy_scan_heap(), lazy_vacuum_heap_rel(), log_heap_visible(), MarkBufferDirty(), MarkBufferDirtyHint(), read_stream_next_buffer(), ReadRecentBuffer(), ReleaseAndReadBuffer(), ReleaseBuffer(), ResOwnerReleaseBufferPin(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgvacuumscan(), statapprox_heap(), tts_buffer_heap_clear(), tts_buffer_heap_copyslot(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), XLogPrefetcherNextBlock(), XLogReadBufferExtended(), and XLogReadBufferForRedoExtended().

◆ BufferManagerShmemInit()

void BufferManagerShmemInit ( void  )

Definition at line 68 of file buf_init.c.

69{
70 bool foundBufs,
71 foundDescs,
72 foundIOCV,
73 foundBufCkpt;
74
75 /* Align descriptors to a cacheline boundary. */
77 ShmemInitStruct("Buffer Descriptors",
78 NBuffers * sizeof(BufferDescPadded),
79 &foundDescs);
80
81 /* Align buffer pool on IO page size boundary. */
82 BufferBlocks = (char *)
84 ShmemInitStruct("Buffer Blocks",
85 NBuffers * (Size) BLCKSZ + PG_IO_ALIGN_SIZE,
86 &foundBufs));
87
88 /* Align condition variables to cacheline boundary. */
90 ShmemInitStruct("Buffer IO Condition Variables",
92 &foundIOCV);
93
94 /*
95 * The array used to sort to-be-checkpointed buffer ids is located in
96 * shared memory, to avoid having to allocate significant amounts of
97 * memory at runtime. As that'd be in the middle of a checkpoint, or when
98 * the checkpointer is restarted, memory allocation failures would be
99 * painful.
100 */
102 ShmemInitStruct("Checkpoint BufferIds",
103 NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
104
105 if (foundDescs || foundBufs || foundIOCV || foundBufCkpt)
106 {
107 /* should find all of these, or none of them */
108 Assert(foundDescs && foundBufs && foundIOCV && foundBufCkpt);
109 /* note: this path is only taken in EXEC_BACKEND case */
110 }
111 else
112 {
113 int i;
114
115 /*
116 * Initialize all the buffer headers.
117 */
118 for (i = 0; i < NBuffers; i++)
119 {
121
122 ClearBufferTag(&buf->tag);
123
124 pg_atomic_init_u32(&buf->state, 0);
125 buf->wait_backend_pgprocno = INVALID_PROC_NUMBER;
126
127 buf->buf_id = i;
128
129 pgaio_wref_clear(&buf->io_wref);
130
132 LWTRANCHE_BUFFER_CONTENT);
133
135 }
136 }
137
138 /* Init other shared buffer-management stuff */
139 StrategyInitialize(!foundDescs);
140
141 /* Initialize per-backend file flush context */
144}
void pgaio_wref_clear(PgAioWaitRef *iow)
Definition: aio.c:964
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:219
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
char * BufferBlocks
Definition: buf_init.c:22
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
ConditionVariableMinimallyPadded * BufferIOCVArray
Definition: buf_init.c:23
BufferDescPadded * BufferDescriptors
Definition: buf_init.c:21
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
static void ClearBufferTag(BufferTag *tag)
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:6519
int backend_flush_after
Definition: bufmgr.c:202
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:817
void ConditionVariableInit(ConditionVariable *cv)
void StrategyInitialize(bool init)
Definition: freelist.c:401
int i
Definition: isn.c:77
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:698
#define PG_IO_ALIGN_SIZE
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:389

References Assert(), backend_flush_after, BackendWritebackContext, buf, BufferBlocks, BufferDescriptorGetContentLock(), BufferDescriptorGetIOCV(), BufferDescriptors, BufferIOCVArray, CkptBufferIds, ClearBufferTag(), ConditionVariableInit(), GetBufferDescriptor(), i, INVALID_PROC_NUMBER, LWLockInitialize(), NBuffers, pg_atomic_init_u32(), PG_IO_ALIGN_SIZE, pgaio_wref_clear(), ShmemInitStruct(), StrategyInitialize(), TYPEALIGN, and WritebackContextInit().

Referenced by CreateOrAttachShmemStructs().

◆ BufferManagerShmemSize()

Size BufferManagerShmemSize ( void  )

Definition at line 153 of file buf_init.c.

154{
155 Size size = 0;
156
157 /* size of buffer descriptors */
158 size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
159 /* to allow aligning buffer descriptors */
160 size = add_size(size, PG_CACHE_LINE_SIZE);
161
162 /* size of data pages, plus alignment padding */
163 size = add_size(size, PG_IO_ALIGN_SIZE);
164 size = add_size(size, mul_size(NBuffers, BLCKSZ));
165
166 /* size of stuff controlled by freelist.c */
167 size = add_size(size, StrategyShmemSize());
168
169 /* size of I/O condition variables */
170 size = add_size(size, mul_size(NBuffers,
172 /* to allow aligning the above */
173 size = add_size(size, PG_CACHE_LINE_SIZE);
174
175 /* size of checkpoint sort array in bufmgr.c */
176 size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem)));
177
178 return size;
179}
Size StrategyShmemSize(void)
Definition: freelist.c:380
#define PG_CACHE_LINE_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:495
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

References add_size(), mul_size(), NBuffers, PG_CACHE_LINE_SIZE, PG_IO_ALIGN_SIZE, and StrategyShmemSize().

Referenced by CalculateShmemSize().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 5746 of file bufmgr.c.

5747{
5748 if (BufferIsLocal(buffer))
5749 {
5750 if (LocalRefCount[-buffer - 1] != 1)
5751 elog(ERROR, "incorrect local pin count: %d",
5752 LocalRefCount[-buffer - 1]);
5753 }
5754 else
5755 {
5756 if (GetPrivateRefCount(buffer) != 1)
5757 elog(ERROR, "incorrect local pin count: %d",
5758 GetPrivateRefCount(buffer));
5759 }
5760}
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:517
#define ERROR
Definition: elog.h:39
int32 * LocalRefCount
Definition: localbuf.c:49

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), lazy_scan_heap(), and LockBufferForCleanup().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 4304 of file bufmgr.c.

4305{
4306 BufferSync(flags);
4307}
static void BufferSync(int flags)
Definition: bufmgr.c:3437

References BufferSync().

Referenced by CheckPointGuts().

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 5952 of file bufmgr.c.

5953{
5954 BufferDesc *bufHdr;
5955 uint32 buf_state,
5956 refcount;
5957
5958 Assert(BufferIsValid(buffer));
5959
5960 /* see AIO related comment in LockBufferForCleanup() */
5961
5962 if (BufferIsLocal(buffer))
5963 {
5964 refcount = LocalRefCount[-buffer - 1];
5965 /* There should be exactly one pin */
5966 Assert(refcount > 0);
5967 if (refcount != 1)
5968 return false;
5969 /* Nobody else to wait for */
5970 return true;
5971 }
5972
5973 /* There should be exactly one local pin */
5974 refcount = GetPrivateRefCount(buffer);
5975 Assert(refcount);
5976 if (refcount != 1)
5977 return false;
5978
5979 /* Try to acquire lock */
5980 if (!ConditionalLockBuffer(buffer))
5981 return false;
5982
5983 bufHdr = GetBufferDescriptor(buffer - 1);
5984 buf_state = LockBufHdr(bufHdr);
5985 refcount = BUF_STATE_GET_REFCOUNT(buf_state);
5986
5987 Assert(refcount > 0);
5988 if (refcount == 1)
5989 {
5990 /* Successfully acquired exclusive lock with pincount 1 */
5991 UnlockBufHdr(bufHdr);
5992 return true;
5993 }
5994
5995 /* Failed, so release the lock */
5996 UnlockBufHdr(bufHdr);
5998 return false;
5999}
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:59
void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:5699
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5725

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 5337 of file bufmgr.c.

5339{
5340 char relpersistence;
5341 SMgrRelation src_rel;
5342 SMgrRelation dst_rel;
5343
5344 /* Set the relpersistence. */
5345 relpersistence = permanent ?
5346 RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
5347
5348 src_rel = smgropen(src_rlocator, INVALID_PROC_NUMBER);
5349 dst_rel = smgropen(dst_rlocator, INVALID_PROC_NUMBER);
5350
5351 /*
5352 * Create and copy all forks of the relation. During create database we
5353 * have a separate cleanup mechanism which deletes complete database
5354 * directory. Therefore, each individual relation doesn't need to be
5355 * registered for cleanup.
5356 */
5357 RelationCreateStorage(dst_rlocator, relpersistence, false);
5358
5359 /* copy main fork. */
5360 RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM,
5361 permanent);
5362
5363 /* copy those extra forks that exist */
5364 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
5365 forkNum <= MAX_FORKNUM; forkNum++)
5366 {
5367 if (smgrexists(src_rel, forkNum))
5368 {
5369 smgrcreate(dst_rel, forkNum, false);
5370
5371 /*
5372 * WAL log creation if the relation is persistent, or this is the
5373 * init fork of an unlogged relation.
5374 */
5375 if (permanent || forkNum == INIT_FORKNUM)
5376 log_smgrcreate(&dst_rlocator, forkNum);
5377
5378 /* Copy a fork's data, block by block. */
5379 RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
5380 permanent);
5381 }
5382 }
5383}
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition: bufmgr.c:5223
ForkNumber
Definition: relpath.h:56
@ MAIN_FORKNUM
Definition: relpath.h:58
@ INIT_FORKNUM
Definition: relpath.h:61
#define MAX_FORKNUM
Definition: relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:240
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:481
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:462
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:122
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:187

References INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char * DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 4261 of file bufmgr.c.

4262{
4263 BufferDesc *buf;
4264 int32 loccount;
4265 char *result;
4266 ProcNumber backend;
4267 uint32 buf_state;
4268
4269 Assert(BufferIsValid(buffer));
4270 if (BufferIsLocal(buffer))
4271 {
4272 buf = GetLocalBufferDescriptor(-buffer - 1);
4273 loccount = LocalRefCount[-buffer - 1];
4274 backend = MyProcNumber;
4275 }
4276 else
4277 {
4278 buf = GetBufferDescriptor(buffer - 1);
4279 loccount = GetPrivateRefCount(buffer);
4280 backend = INVALID_PROC_NUMBER;
4281 }
4282
4283 /* theoretically we should lock the bufHdr here */
4284 buf_state = pg_atomic_read_u32(&buf->state);
4285
4286 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
4287 buffer,
4289 BufTagGetForkNum(&buf->tag)).str,
4290 buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
4291 BUF_STATE_GET_REFCOUNT(buf_state), loccount);
4292 return result;
4293}
#define BUF_FLAG_MASK
Definition: buf_internals.h:56
ProcNumber MyProcNumber
Definition: globals.c:90
int ProcNumber
Definition: procnumber.h:24
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:141

References Assert(), buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pg_atomic_read_u32(), psprintf(), and relpathbackend.

Referenced by buffer_call_start_io(), buffer_call_terminate_io(), CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBufferPin().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 4990 of file bufmgr.c.

4991{
4992 int i;
4993
4994 /*
4995 * We needn't consider local buffers, since by assumption the target
4996 * database isn't our own.
4997 */
4998
4999 for (i = 0; i < NBuffers; i++)
5000 {
5001 BufferDesc *bufHdr = GetBufferDescriptor(i);
5002
5003 /*
5004 * As in DropRelationBuffers, an unlocked precheck should be safe and
5005 * saves some cycles.
5006 */
5007 if (bufHdr->tag.dbOid != dbid)
5008 continue;
5009
5010 LockBufHdr(bufHdr);
5011 if (bufHdr->tag.dbOid == dbid)
5012 InvalidateBuffer(bufHdr); /* releases spinlock */
5013 else
5014 UnlockBufHdr(bufHdr);
5015 }
5016}
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:2248

References buftag::dbOid, GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, BufferDesc::tag, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 4640 of file bufmgr.c.

4642{
4643 int i;
4644 int j;
4645 RelFileLocatorBackend rlocator;
4646 BlockNumber nForkBlock[MAX_FORKNUM];
4647 uint64 nBlocksToInvalidate = 0;
4648
4649 rlocator = smgr_reln->smgr_rlocator;
4650
4651 /* If it's a local relation, it's localbuf.c's problem. */
4652 if (RelFileLocatorBackendIsTemp(rlocator))
4653 {
4654 if (rlocator.backend == MyProcNumber)
4655 DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
4656 firstDelBlock);
4657
4658 return;
4659 }
4660
4661 /*
4662 * To remove all the pages of the specified relation forks from the buffer
4663 * pool, we need to scan the entire buffer pool but we can optimize it by
4664 * finding the buffers from BufMapping table provided we know the exact
4665 * size of each fork of the relation. The exact size is required to ensure
4666 * that we don't leave any buffer for the relation being dropped as
4667 * otherwise the background writer or checkpointer can lead to a PANIC
4668 * error while flushing buffers corresponding to files that don't exist.
4669 *
4670 * To know the exact size, we rely on the size cached for each fork by us
4671 * during recovery which limits the optimization to recovery and on
4672 * standbys but we can easily extend it once we have shared cache for
4673 * relation size.
4674 *
4675 * In recovery, we cache the value returned by the first lseek(SEEK_END)
4676 * and the future writes keeps the cached value up-to-date. See
4677 * smgrextend. It is possible that the value of the first lseek is smaller
4678 * than the actual number of existing blocks in the file due to buggy
4679 * Linux kernels that might not have accounted for the recent write. But
4680 * that should be fine because there must not be any buffers after that
4681 * file size.
4682 */
4683 for (i = 0; i < nforks; i++)
4684 {
4685 /* Get the number of blocks for a relation's fork */
4686 nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
4687
4688 if (nForkBlock[i] == InvalidBlockNumber)
4689 {
4690 nBlocksToInvalidate = InvalidBlockNumber;
4691 break;
4692 }
4693
4694 /* calculate the number of blocks to be invalidated */
4695 nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
4696 }
4697
4698 /*
4699 * We apply the optimization iff the total number of blocks to invalidate
4700 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4701 */
4702 if (BlockNumberIsValid(nBlocksToInvalidate) &&
4703 nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4704 {
4705 for (j = 0; j < nforks; j++)
4706 FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4707 nForkBlock[j], firstDelBlock[j]);
4708 return;
4709 }
4710
4711 for (i = 0; i < NBuffers; i++)
4712 {
4713 BufferDesc *bufHdr = GetBufferDescriptor(i);
4714
4715 /*
4716 * We can make this a tad faster by prechecking the buffer tag before
4717 * we attempt to lock the buffer; this saves a lot of lock
4718 * acquisitions in typical cases. It should be safe because the
4719 * caller must have AccessExclusiveLock on the relation, or some other
4720 * reason to be certain that no one is loading new pages of the rel
4721 * into the buffer pool. (Otherwise we might well miss such pages
4722 * entirely.) Therefore, while the tag might be changing while we
4723 * look at it, it can't be changing *to* a value we care about, only
4724 * *away* from such a value. So false negatives are impossible, and
4725 * false positives are safe because we'll recheck after getting the
4726 * buffer lock.
4727 *
4728 * We could check forkNum and blockNum as well as the rlocator, but
4729 * the incremental win from doing so seems small.
4730 */
4731 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4732 continue;
4733
4734 LockBufHdr(bufHdr);
4735
4736 for (j = 0; j < nforks; j++)
4737 {
4738 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4739 BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4740 bufHdr->tag.blockNum >= firstDelBlock[j])
4741 {
4742 InvalidateBuffer(bufHdr); /* releases spinlock */
4743 break;
4744 }
4745 }
4746 if (j >= nforks)
4747 UnlockBufHdr(bufHdr);
4748 }
4749}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:91
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:4930
uint64_t uint64
Definition: c.h:553
int j
Definition: isn.c:78
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Definition: localbuf.c:665
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:847
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:38

References RelFileLocatorBackend::backend, buftag::blockNum, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, SMgrRelationData::smgr_rlocator, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4760 of file bufmgr.c.

4761{
4762 int i;
4763 int n = 0;
4764 SMgrRelation *rels;
4765 BlockNumber (*block)[MAX_FORKNUM + 1];
4766 uint64 nBlocksToInvalidate = 0;
4767 RelFileLocator *locators;
4768 bool cached = true;
4769 bool use_bsearch;
4770
4771 if (nlocators == 0)
4772 return;
4773
4774 rels = palloc_array(SMgrRelation, nlocators); /* non-local relations */
4775
4776 /* If it's a local relation, it's localbuf.c's problem. */
4777 for (i = 0; i < nlocators; i++)
4778 {
4779 if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4780 {
4781 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4782 DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4783 }
4784 else
4785 rels[n++] = smgr_reln[i];
4786 }
4787
4788 /*
4789 * If there are no non-local relations, then we're done. Release the
4790 * memory and return.
4791 */
4792 if (n == 0)
4793 {
4794 pfree(rels);
4795 return;
4796 }
4797
4798 /*
4799 * This is used to remember the number of blocks for all the relations
4800 * forks.
4801 */
4802 block = (BlockNumber (*)[MAX_FORKNUM + 1])
4803 palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4804
4805 /*
4806 * We can avoid scanning the entire buffer pool if we know the exact size
4807 * of each of the given relation forks. See DropRelationBuffers.
4808 */
4809 for (i = 0; i < n && cached; i++)
4810 {
4811 for (int j = 0; j <= MAX_FORKNUM; j++)
4812 {
4813 /* Get the number of blocks for a relation's fork. */
4814 block[i][j] = smgrnblocks_cached(rels[i], j);
4815
4816 /* We need to only consider the relation forks that exists. */
4817 if (block[i][j] == InvalidBlockNumber)
4818 {
4819 if (!smgrexists(rels[i], j))
4820 continue;
4821 cached = false;
4822 break;
4823 }
4824
4825 /* calculate the total number of blocks to be invalidated */
4826 nBlocksToInvalidate += block[i][j];
4827 }
4828 }
4829
4830 /*
4831 * We apply the optimization iff the total number of blocks to invalidate
4832 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4833 */
4834 if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4835 {
4836 for (i = 0; i < n; i++)
4837 {
4838 for (int j = 0; j <= MAX_FORKNUM; j++)
4839 {
4840 /* ignore relation forks that doesn't exist */
4841 if (!BlockNumberIsValid(block[i][j]))
4842 continue;
4843
4844 /* drop all the buffers for a particular relation fork */
4845 FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4846 j, block[i][j], 0);
4847 }
4848 }
4849
4850 pfree(block);
4851 pfree(rels);
4852 return;
4853 }
4854
4855 pfree(block);
4856 locators = palloc_array(RelFileLocator, n); /* non-local relations */
4857 for (i = 0; i < n; i++)
4858 locators[i] = rels[i]->smgr_rlocator.locator;
4859
4860 /*
4861 * For low number of relations to drop just use a simple walk through, to
4862 * save the bsearch overhead. The threshold to use is rather a guess than
4863 * an exactly determined value, as it depends on many factors (CPU and RAM
4864 * speeds, amount of shared buffers etc.).
4865 */
4866 use_bsearch = n > RELS_BSEARCH_THRESHOLD;
4867
4868 /* sort the list of rlocators if necessary */
4869 if (use_bsearch)
4870 qsort(locators, n, sizeof(RelFileLocator), rlocator_comparator);
4871
4872 for (i = 0; i < NBuffers; i++)
4873 {
4874 RelFileLocator *rlocator = NULL;
4875 BufferDesc *bufHdr = GetBufferDescriptor(i);
4876
4877 /*
4878 * As in DropRelationBuffers, an unlocked precheck should be safe and
4879 * saves some cycles.
4880 */
4881
4882 if (!use_bsearch)
4883 {
4884 int j;
4885
4886 for (j = 0; j < n; j++)
4887 {
4888 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
4889 {
4890 rlocator = &locators[j];
4891 break;
4892 }
4893 }
4894 }
4895 else
4896 {
4897 RelFileLocator locator;
4898
4899 locator = BufTagGetRelFileLocator(&bufHdr->tag);
4900 rlocator = bsearch(&locator,
4901 locators, n, sizeof(RelFileLocator),
4903 }
4904
4905 /* buffer doesn't belong to any of the given relfilelocators; skip it */
4906 if (rlocator == NULL)
4907 continue;
4908
4909 LockBufHdr(bufHdr);
4910 if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4911 InvalidateBuffer(bufHdr); /* releases spinlock */
4912 else
4913 UnlockBufHdr(bufHdr);
4914 }
4915
4916 pfree(locators);
4917 pfree(rels);
4918}
static int rlocator_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:6332
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:83
#define palloc_array(type, count)
Definition: fe_memutils.h:76
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition: localbuf.c:702
void pfree(void *pointer)
Definition: mcxt.c:1616
void * palloc(Size size)
Definition: mcxt.c:1387
#define qsort(a, b, c, d)
Definition: port.h:499

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, if(), InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), palloc_array, pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictAllUnpinnedBuffers()

void EvictAllUnpinnedBuffers ( int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 6796 of file bufmgr.c.

6798{
6799 *buffers_evicted = 0;
6800 *buffers_skipped = 0;
6801 *buffers_flushed = 0;
6802
6803 for (int buf = 1; buf <= NBuffers; buf++)
6804 {
6805 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6806 uint32 buf_state;
6807 bool buffer_flushed;
6808
6810
6811 buf_state = pg_atomic_read_u32(&desc->state);
6812 if (!(buf_state & BM_VALID))
6813 continue;
6814
6817
6818 LockBufHdr(desc);
6819
6820 if (EvictUnpinnedBufferInternal(desc, &buffer_flushed))
6821 (*buffers_evicted)++;
6822 else
6823 (*buffers_skipped)++;
6824
6825 if (buffer_flushed)
6826 (*buffers_flushed)++;
6827 }
6828}
#define BM_VALID
Definition: buf_internals.h:70
static bool EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed)
Definition: bufmgr.c:6705
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:284
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition: resowner.c:449

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u32(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_evict_all().

◆ EvictRelUnpinnedBuffers()

void EvictRelUnpinnedBuffers ( Relation  rel,
int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 6846 of file bufmgr.c.

6848{
6850
6851 *buffers_skipped = 0;
6852 *buffers_evicted = 0;
6853 *buffers_flushed = 0;
6854
6855 for (int buf = 1; buf <= NBuffers; buf++)
6856 {
6857 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6858 uint32 buf_state = pg_atomic_read_u32(&(desc->state));
6859 bool buffer_flushed;
6860
6862
6863 /* An unlocked precheck should be safe and saves some cycles. */
6864 if ((buf_state & BM_VALID) == 0 ||
6866 continue;
6867
6868 /* Make sure we can pin the buffer. */
6871
6872 buf_state = LockBufHdr(desc);
6873
6874 /* recheck, could have changed without the lock */
6875 if ((buf_state & BM_VALID) == 0 ||
6877 {
6878 UnlockBufHdr(desc);
6879 continue;
6880 }
6881
6882 if (EvictUnpinnedBufferInternal(desc, &buffer_flushed))
6883 (*buffers_evicted)++;
6884 else
6885 (*buffers_skipped)++;
6886
6887 if (buffer_flushed)
6888 (*buffers_flushed)++;
6889 }
6890}
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:647
RelFileLocator rd_locator
Definition: rel.h:57

References Assert(), BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u32(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_evict_relation().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf,
bool *  buffer_flushed 
)

Definition at line 6767 of file bufmgr.c.

6768{
6769 BufferDesc *desc;
6770
6772
6773 /* Make sure we can pin the buffer. */
6776
6777 desc = GetBufferDescriptor(buf - 1);
6778 LockBufHdr(desc);
6779
6780 return EvictUnpinnedBufferInternal(desc, buffer_flushed);
6781}

References Assert(), buf, BufferIsLocal, BufferIsValid(), CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by invalidate_rel_block(), modify_rel_block(), and pg_buffercache_evict().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 939 of file bufmgr.c.

943{
944 Buffer buf;
945 uint32 extend_by = 1;
946
947 ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
948 &buf, &extend_by);
949
950 return buf;
951}
int Buffer
Definition: buf.h:23
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:971

References buf, and ExtendBufferedRelBy().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 971 of file bufmgr.c.

978{
979 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
980 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
981 Assert(extend_by > 0);
982
983 if (bmr.relpersistence == '\0')
984 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
985
986 return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
987 extend_by, InvalidBlockNumber,
988 buffers, extended_by);
989}
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2625
SMgrRelation smgr
Definition: bufmgr.h:110
Form_pg_class rd_rel
Definition: rel.h:111

References Assert(), ExtendBufferedRelCommon(), InvalidBlockNumber, RelationData::rd_rel, BufferManagerRelation::rel, BufferManagerRelation::relpersistence, and BufferManagerRelation::smgr.

Referenced by ExtendBufferedRel(), grow_rel(), and RelationAddBlocks().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 1000 of file bufmgr.c.

1006{
1008 uint32 extended_by = 0;
1009 Buffer buffer = InvalidBuffer;
1010 Buffer buffers[64];
1011
1012 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
1013 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
1014 Assert(extend_to != InvalidBlockNumber && extend_to > 0);
1015
1016 if (bmr.relpersistence == '\0')
1017 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
1018
1019 /*
1020 * If desired, create the file if it doesn't exist. If
1021 * smgr_cached_nblocks[fork] is positive then it must exist, no need for
1022 * an smgrexists call.
1023 */
1024 if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
1025 (BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == 0 ||
1026 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == InvalidBlockNumber) &&
1027 !smgrexists(BMR_GET_SMGR(bmr), fork))
1028 {
1030
1031 /* recheck, fork might have been created concurrently */
1032 if (!smgrexists(BMR_GET_SMGR(bmr), fork))
1033 smgrcreate(BMR_GET_SMGR(bmr), fork, flags & EB_PERFORMING_RECOVERY);
1034
1036 }
1037
1038 /*
1039 * If requested, invalidate size cache, so that smgrnblocks asks the
1040 * kernel.
1041 */
1042 if (flags & EB_CLEAR_SIZE_CACHE)
1043 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
1044
1045 /*
1046 * Estimate how many pages we'll need to extend by. This avoids acquiring
1047 * unnecessarily many victim buffers.
1048 */
1050
1051 /*
1052 * Since no-one else can be looking at the page contents yet, there is no
1053 * difference between an exclusive lock and a cleanup-strength lock. Note
1054 * that we pass the original mode to ReadBuffer_common() below, when
1055 * falling back to reading the buffer to a concurrent relation extension.
1056 */
1058 flags |= EB_LOCK_TARGET;
1059
1060 while (current_size < extend_to)
1061 {
1062 uint32 num_pages = lengthof(buffers);
1063 BlockNumber first_block;
1064
1065 if ((uint64) current_size + num_pages > extend_to)
1066 num_pages = extend_to - current_size;
1067
1068 first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1069 num_pages, extend_to,
1070 buffers, &extended_by);
1071
1072 current_size = first_block + extended_by;
1073 Assert(num_pages != 0 || current_size >= extend_to);
1074
1075 for (uint32 i = 0; i < extended_by; i++)
1076 {
1077 if (first_block + i != extend_to - 1)
1078 ReleaseBuffer(buffers[i]);
1079 else
1080 buffer = buffers[i];
1081 }
1082 }
1083
1084 /*
1085 * It's possible that another backend concurrently extended the relation.
1086 * In that case read the buffer.
1087 *
1088 * XXX: Should we control this via a flag?
1089 */
1090 if (buffer == InvalidBuffer)
1091 {
1092 Assert(extended_by == 0);
1093 buffer = ReadBuffer_common(bmr.rel, BMR_GET_SMGR(bmr), bmr.relpersistence,
1094 fork, extend_to - 1, mode, strategy);
1095 }
1096
1097 return buffer;
1098}
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:1268
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5461
#define BMR_GET_SMGR(bmr)
Definition: bufmgr.h:118
#define lengthof(array)
Definition: c.h:801
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:474
#define ExclusiveLock
Definition: lockdefs.h:42
static int64 current_size
Definition: pg_checksums.c:64
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:819

References Assert(), BMR_GET_SMGR, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RelationData::rd_rel, ReadBuffer_common(), BufferManagerRelation::rel, ReleaseBuffer(), BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 5401 of file bufmgr.c.

5402{
5403 int i;
5404 BufferDesc *bufHdr;
5405
5406 for (i = 0; i < NBuffers; i++)
5407 {
5408 uint32 buf_state;
5409
5410 bufHdr = GetBufferDescriptor(i);
5411
5412 /*
5413 * As in DropRelationBuffers, an unlocked precheck should be safe and
5414 * saves some cycles.
5415 */
5416 if (bufHdr->tag.dbOid != dbid)
5417 continue;
5418
5419 /* Make sure we can handle the pin */
5422
5423 buf_state = LockBufHdr(bufHdr);
5424 if (bufHdr->tag.dbOid == dbid &&
5425 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5426 {
5427 PinBuffer_Locked(bufHdr);
5429 UnpinBuffer(bufHdr);
5430 }
5431 else
5432 UnlockBufHdr(bufHdr);
5433 }
5434}
static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:4515
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:3273
static void UnpinBuffer(BufferDesc *buf)
Definition: bufmgr.c:3341
@ IOOBJECT_RELATION
Definition: pgstat.h:277
@ IOCONTEXT_NORMAL
Definition: pgstat.h:289

References BM_DIRTY, BM_VALID, CurrentResourceOwner, buftag::dbOid, FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 5441 of file bufmgr.c.

5442{
5443 BufferDesc *bufHdr;
5444
5445 /* currently not needed, but no fundamental reason not to support */
5446 Assert(!BufferIsLocal(buffer));
5447
5448 Assert(BufferIsPinned(buffer));
5449
5450 bufHdr = GetBufferDescriptor(buffer - 1);
5451
5452 Assert(BufferIsLockedByMe(buffer));
5453
5455}
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:4378
bool BufferIsLockedByMe(Buffer buffer)
Definition: bufmgr.c:2937

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsLockedByMe(), BufferIsPinned, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), invalidate_rel_block(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 5037 of file bufmgr.c.

5038{
5039 int i;
5040 BufferDesc *bufHdr;
5041 SMgrRelation srel = RelationGetSmgr(rel);
5042
5043 if (RelationUsesLocalBuffers(rel))
5044 {
5045 for (i = 0; i < NLocBuffer; i++)
5046 {
5047 uint32 buf_state;
5048
5049 bufHdr = GetLocalBufferDescriptor(i);
5050 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5051 ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
5052 (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5053 {
5054 ErrorContextCallback errcallback;
5055
5056 /* Setup error traceback support for ereport() */
5058 errcallback.arg = bufHdr;
5059 errcallback.previous = error_context_stack;
5060 error_context_stack = &errcallback;
5061
5062 /* Make sure we can handle the pin */
5065
5066 /*
5067 * Pin/unpin mostly to make valgrind work, but it also seems
5068 * like the right thing to do.
5069 */
5070 PinLocalBuffer(bufHdr, false);
5071
5072
5073 FlushLocalBuffer(bufHdr, srel);
5074
5076
5077 /* Pop the error context stack */
5078 error_context_stack = errcallback.previous;
5079 }
5080 }
5081
5082 return;
5083 }
5084
5085 for (i = 0; i < NBuffers; i++)
5086 {
5087 uint32 buf_state;
5088
5089 bufHdr = GetBufferDescriptor(i);
5090
5091 /*
5092 * As in DropRelationBuffers, an unlocked precheck should be safe and
5093 * saves some cycles.
5094 */
5095 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator))
5096 continue;
5097
5098 /* Make sure we can handle the pin */
5101
5102 buf_state = LockBufHdr(bufHdr);
5103 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5104 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5105 {
5106 PinBuffer_Locked(bufHdr);
5108 UnpinBuffer(bufHdr);
5109 }
5110 else
5111 UnlockBufHdr(bufHdr);
5112 }
5113}
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:6316
ErrorContextCallback * error_context_stack
Definition: elog.c:95
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
Definition: localbuf.c:183
void UnpinLocalBuffer(Buffer buffer)
Definition: localbuf.c:841
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition: localbuf.c:805
int NLocBuffer
Definition: localbuf.c:45
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:577
struct ErrorContextCallback * previous
Definition: elog.h:297
void(* callback)(void *arg)
Definition: elog.h:298

References ErrorContextCallback::arg, BM_DIRTY, BM_VALID, BufferDescriptorGetBuffer(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushLocalBuffer(), FlushUnlockedBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, local_buffer_write_error_callback(), LockBufHdr(), NBuffers, NLocBuffer, pg_atomic_read_u32(), PinBuffer_Locked(), PinLocalBuffer(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 5125 of file bufmgr.c.

5126{
5127 int i;
5128 SMgrSortArray *srels;
5129 bool use_bsearch;
5130
5131 if (nrels == 0)
5132 return;
5133
5134 /* fill-in array for qsort */
5135 srels = palloc_array(SMgrSortArray, nrels);
5136
5137 for (i = 0; i < nrels; i++)
5138 {
5139 Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
5140
5141 srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
5142 srels[i].srel = smgrs[i];
5143 }
5144
5145 /*
5146 * Save the bsearch overhead for low number of relations to sync. See
5147 * DropRelationsAllBuffers for details.
5148 */
5149 use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
5150
5151 /* sort the list of SMgrRelations if necessary */
5152 if (use_bsearch)
5153 qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
5154
5155 for (i = 0; i < NBuffers; i++)
5156 {
5157 SMgrSortArray *srelent = NULL;
5158 BufferDesc *bufHdr = GetBufferDescriptor(i);
5159 uint32 buf_state;
5160
5161 /*
5162 * As in DropRelationBuffers, an unlocked precheck should be safe and
5163 * saves some cycles.
5164 */
5165
5166 if (!use_bsearch)
5167 {
5168 int j;
5169
5170 for (j = 0; j < nrels; j++)
5171 {
5172 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
5173 {
5174 srelent = &srels[j];
5175 break;
5176 }
5177 }
5178 }
5179 else
5180 {
5181 RelFileLocator rlocator;
5182
5183 rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
5184 srelent = bsearch(&rlocator,
5185 srels, nrels, sizeof(SMgrSortArray),
5187 }
5188
5189 /* buffer doesn't belong to any of the given relfilelocators; skip it */
5190 if (srelent == NULL)
5191 continue;
5192
5193 /* Make sure we can handle the pin */
5196
5197 buf_state = LockBufHdr(bufHdr);
5198 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
5199 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5200 {
5201 PinBuffer_Locked(bufHdr);
5203 UnpinBuffer(bufHdr);
5204 }
5205 else
5206 UnlockBufHdr(bufHdr);
5207 }
5208
5209 pfree(srels);
5210}
SMgrRelation srel
Definition: bufmgr.c:162
RelFileLocator rlocator
Definition: bufmgr.c:161

References Assert(), BM_DIRTY, BM_VALID, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, RelFileLocatorBackend::locator, LockBufHdr(), NBuffers, palloc_array, pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrSortArray::rlocator, rlocator_comparator(), SMgrRelationData::smgr_rlocator, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ FreeAccessStrategy()

void FreeAccessStrategy ( BufferAccessStrategy  strategy)

Definition at line 643 of file freelist.c.

644{
645 /* don't crash if called on a "default" strategy */
646 if (strategy != NULL)
647 pfree(strategy);
648}

References pfree().

Referenced by blgetbitmap(), FreeBulkInsertState(), heap_endscan(), initscan(), parallel_vacuum_main(), and RelationCopyStorageUsingBuffer().

◆ GetAccessStrategy()

BufferAccessStrategy GetAccessStrategy ( BufferAccessStrategyType  btype)

Definition at line 461 of file freelist.c.

462{
463 int ring_size_kb;
464
465 /*
466 * Select ring size to use. See buffer/README for rationales.
467 *
468 * Note: if you change the ring size for BAS_BULKREAD, see also
469 * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
470 */
471 switch (btype)
472 {
473 case BAS_NORMAL:
474 /* if someone asks for NORMAL, just give 'em a "default" object */
475 return NULL;
476
477 case BAS_BULKREAD:
478 {
479 int ring_max_kb;
480
481 /*
482 * The ring always needs to be large enough to allow some
483 * separation in time between providing a buffer to the user
484 * of the strategy and that buffer being reused. Otherwise the
485 * user's pin will prevent reuse of the buffer, even without
486 * concurrent activity.
487 *
488 * We also need to ensure the ring always is large enough for
489 * SYNC_SCAN_REPORT_INTERVAL, as noted above.
490 *
491 * Thus we start out a minimal size and increase the size
492 * further if appropriate.
493 */
494 ring_size_kb = 256;
495
496 /*
497 * There's no point in a larger ring if we won't be allowed to
498 * pin sufficiently many buffers. But we never limit to less
499 * than the minimal size above.
500 */
501 ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
502 ring_max_kb = Max(ring_size_kb, ring_max_kb);
503
504 /*
505 * We would like the ring to additionally have space for the
506 * configured degree of IO concurrency. While being read in,
507 * buffers can obviously not yet be reused.
508 *
509 * Each IO can be up to io_combine_limit blocks large, and we
510 * want to start up to effective_io_concurrency IOs.
511 *
512 * Note that effective_io_concurrency may be 0, which disables
513 * AIO.
514 */
515 ring_size_kb += (BLCKSZ / 1024) *
517
518 if (ring_size_kb > ring_max_kb)
519 ring_size_kb = ring_max_kb;
520 break;
521 }
522 case BAS_BULKWRITE:
523 ring_size_kb = 16 * 1024;
524 break;
525 case BAS_VACUUM:
526 ring_size_kb = 2048;
527 break;
528
529 default:
530 elog(ERROR, "unrecognized buffer access strategy: %d",
531 (int) btype);
532 return NULL; /* keep compiler quiet */
533 }
534
535 return GetAccessStrategyWithSize(btype, ring_size_kb);
536}
int effective_io_concurrency
Definition: bufmgr.c:177
int io_combine_limit
Definition: bufmgr.c:192
uint32 GetPinLimit(void)
Definition: bufmgr.c:2569
#define Max(x, y)
Definition: c.h:989
BufferAccessStrategy GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
Definition: freelist.c:546

References BAS_BULKREAD, BAS_BULKWRITE, BAS_NORMAL, BAS_VACUUM, effective_io_concurrency, elog, ERROR, GetAccessStrategyWithSize(), GetPinLimit(), io_combine_limit, and Max.

Referenced by blgetbitmap(), bt_check_every_level(), collect_corrupt_items(), collect_visibility_data(), GetBulkInsertState(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), initscan(), pgstat_index(), pgstathashindex(), pgstatindex_impl(), RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), statapprox_heap(), and verify_heapam().

◆ GetAccessStrategyBufferCount()

int GetAccessStrategyBufferCount ( BufferAccessStrategy  strategy)

Definition at line 586 of file freelist.c.

587{
588 if (strategy == NULL)
589 return 0;
590
591 return strategy->nbuffers;
592}

References BufferAccessStrategyData::nbuffers.

Referenced by parallel_vacuum_init().

◆ GetAccessStrategyPinLimit()

int GetAccessStrategyPinLimit ( BufferAccessStrategy  strategy)

Definition at line 609 of file freelist.c.

610{
611 if (strategy == NULL)
612 return NBuffers;
613
614 switch (strategy->btype)
615 {
616 case BAS_BULKREAD:
617
618 /*
619 * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
620 * shouldn't be a problem and the caller is free to pin up to the
621 * entire ring at once.
622 */
623 return strategy->nbuffers;
624
625 default:
626
627 /*
628 * Tell caller not to pin more than half the buffers in the ring.
629 * This is a trade-off between look ahead distance and deferring
630 * writeback and associated WAL traffic.
631 */
632 return strategy->nbuffers / 2;
633 }
634}
BufferAccessStrategyType btype
Definition: freelist.c:67

References BAS_BULKREAD, BufferAccessStrategyData::btype, BufferAccessStrategyData::nbuffers, and NBuffers.

Referenced by read_stream_begin_impl().

◆ GetAccessStrategyWithSize()

BufferAccessStrategy GetAccessStrategyWithSize ( BufferAccessStrategyType  btype,
int  ring_size_kb 
)

Definition at line 546 of file freelist.c.

547{
548 int ring_buffers;
549 BufferAccessStrategy strategy;
550
551 Assert(ring_size_kb >= 0);
552
553 /* Figure out how many buffers ring_size_kb is */
554 ring_buffers = ring_size_kb / (BLCKSZ / 1024);
555
556 /* 0 means unlimited, so no BufferAccessStrategy required */
557 if (ring_buffers == 0)
558 return NULL;
559
560 /* Cap to 1/8th of shared_buffers */
561 ring_buffers = Min(NBuffers / 8, ring_buffers);
562
563 /* NBuffers should never be less than 16, so this shouldn't happen */
564 Assert(ring_buffers > 0);
565
566 /* Allocate the object and initialize all elements to zeroes */
567 strategy = (BufferAccessStrategy)
568 palloc0(offsetof(BufferAccessStrategyData, buffers) +
569 ring_buffers * sizeof(Buffer));
570
571 /* Set fields that don't start out zero */
572 strategy->btype = btype;
573 strategy->nbuffers = ring_buffers;
574
575 return strategy;
576}
struct BufferAccessStrategyData * BufferAccessStrategy
Definition: buf.h:44
#define Min(x, y)
Definition: c.h:995
void * palloc0(Size size)
Definition: mcxt.c:1417

References Assert(), BufferAccessStrategyData::btype, Min, BufferAccessStrategyData::nbuffers, NBuffers, and palloc0().

Referenced by do_autovacuum(), ExecVacuum(), GetAccessStrategy(), and parallel_vacuum_main().

◆ GetAdditionalLocalPinLimit()

uint32 GetAdditionalLocalPinLimit ( void  )

Definition at line 315 of file localbuf.c.

316{
319}
int num_temp_buffers
Definition: guc_tables.c:553
static int NLocalPinnedBuffers
Definition: localbuf.c:56

References Assert(), NLocalPinnedBuffers, and num_temp_buffers.

Referenced by read_stream_start_pending_read().

◆ GetAdditionalPinLimit()

uint32 GetAdditionalPinLimit ( void  )

Definition at line 2581 of file bufmgr.c.

2582{
2583 uint32 estimated_pins_held;
2584
2585 /*
2586 * We get the number of "overflowed" pins for free, but don't know the
2587 * number of pins in PrivateRefCountArray. The cost of calculating that
2588 * exactly doesn't seem worth it, so just assume the max.
2589 */
2590 estimated_pins_held = PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES;
2591
2592 /* Is this backend already holding more than its fair share? */
2593 if (estimated_pins_held > MaxProportionalPins)
2594 return 0;
2595
2596 return MaxProportionalPins - estimated_pins_held;
2597}
static uint32 MaxProportionalPins
Definition: bufmgr.c:246
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:122

References MaxProportionalPins, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by LimitAdditionalPins(), and read_stream_start_pending_read().

◆ GetLocalPinLimit()

uint32 GetLocalPinLimit ( void  )

Definition at line 307 of file localbuf.c.

308{
309 /* Every backend has its own temporary buffers, and can pin them all. */
310 return num_temp_buffers;
311}

References num_temp_buffers.

Referenced by read_stream_begin_impl().

◆ GetPinLimit()

uint32 GetPinLimit ( void  )

Definition at line 2569 of file bufmgr.c.

2570{
2571 return MaxProportionalPins;
2572}

References MaxProportionalPins.

Referenced by GetAccessStrategy(), and read_stream_begin_impl().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 5926 of file bufmgr.c.

5927{
5928 int bufid = GetStartupBufferPinWaitBufId();
5929
5930 /*
5931 * If we get woken slowly then it's possible that the Startup process was
5932 * already woken by other backends before we got here. Also possible that
5933 * we get here by multiple interrupts or interrupts at inappropriate
5934 * times, so make sure we do nothing if the bufid is not set.
5935 */
5936 if (bufid < 0)
5937 return false;
5938
5939 if (GetPrivateRefCount(bufid + 1) > 0)
5940 return true;
5941
5942 return false;
5943}
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:771

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 5493 of file bufmgr.c.

5494{
5495 Assert(BufferIsPinned(buffer));
5497 if (BufferIsLocal(buffer))
5498 LocalRefCount[-buffer - 1]++;
5499 else
5500 {
5502
5503 ref = GetPrivateRefCountEntry(buffer, true);
5504 Assert(ref != NULL);
5505 ref->data.refcount++;
5506 }
5508}
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:482
PrivateRefCountData data
Definition: bufmgr.c:118

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, PrivateRefCountEntry::data, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountData::refcount, ResourceOwnerEnlarge(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), RelationAddBlocks(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferManagerAccess()

void InitBufferManagerAccess ( void  )

Definition at line 4101 of file bufmgr.c.

4102{
4103 HASHCTL hash_ctl;
4104
4105 /*
4106 * An advisory limit on the number of pins each backend should hold, based
4107 * on shared_buffers and the maximum number of connections possible.
4108 * That's very pessimistic, but outside toy-sized shared_buffers it should
4109 * allow plenty of pins. LimitAdditionalPins() and
4110 * GetAdditionalPinLimit() can be used to check the remaining balance.
4111 */
4113
4114 memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
4116
4117 hash_ctl.keysize = sizeof(Buffer);
4118 hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
4119
4120 PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
4122
4123 /*
4124 * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
4125 * the corresponding phase of backend shutdown.
4126 */
4127 Assert(MyProc != NULL);
4129}
static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:238
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:4136
struct PrivateRefCountEntry PrivateRefCountEntry
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:239
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:240
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:358
int MaxBackends
Definition: globals.c:146
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
#define NUM_AUXILIARY_PROCS
Definition: proc.h:463
PGPROC * MyProc
Definition: proc.c:67
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76

References Assert(), AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MaxBackends, MaxProportionalPins, MyProc, NBuffers, NUM_AUXILIARY_PROCS, on_shmem_exit(), PrivateRefCountArray, PrivateRefCountArrayKeys, and PrivateRefCountHash.

Referenced by BaseInit().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 6010 of file bufmgr.c.

6011{
6012 BufferDesc *bufHdr;
6013 uint32 buf_state;
6014
6015 Assert(BufferIsValid(buffer));
6016
6017 /* see AIO related comment in LockBufferForCleanup() */
6018
6019 if (BufferIsLocal(buffer))
6020 {
6021 /* There should be exactly one pin */
6022 if (LocalRefCount[-buffer - 1] != 1)
6023 return false;
6024 /* Nobody else to wait for */
6025 return true;
6026 }
6027
6028 /* There should be exactly one local pin */
6029 if (GetPrivateRefCount(buffer) != 1)
6030 return false;
6031
6032 bufHdr = GetBufferDescriptor(buffer - 1);
6033
6034 /* caller must hold exclusive lock on buffer */
6036
6037 buf_state = LockBufHdr(bufHdr);
6038
6039 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
6040 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
6041 {
6042 /* pincount is OK. */
6043 UnlockBufHdr(bufHdr);
6044 return true;
6045 }
6046
6047 UnlockBufHdr(bufHdr);
6048 return false;
6049}

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsValid(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ LimitAdditionalLocalPins()

void LimitAdditionalLocalPins ( uint32 additional_pins)

Definition at line 323 of file localbuf.c.

324{
325 uint32 max_pins;
326
327 if (*additional_pins <= 1)
328 return;
329
330 /*
331 * In contrast to LimitAdditionalPins() other backends don't play a role
332 * here. We can allow up to NLocBuffer pins in total, but it might not be
333 * initialized yet so read num_temp_buffers.
334 */
336
337 if (*additional_pins >= max_pins)
338 *additional_pins = max_pins;
339}

References NLocalPinnedBuffers, and num_temp_buffers.

Referenced by ExtendBufferedRelLocal().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2607 of file bufmgr.c.

2608{
2609 uint32 limit;
2610
2611 if (*additional_pins <= 1)
2612 return;
2613
2614 limit = GetAdditionalPinLimit();
2615 limit = Max(limit, 1);
2616 if (limit < *additional_pins)
2617 *additional_pins = limit;
2618}
uint32 GetAdditionalPinLimit(void)
Definition: bufmgr.c:2581

References GetAdditionalPinLimit(), and Max.

Referenced by ExtendBufferedRelShared().

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 5699 of file bufmgr.c.

5700{
5701 BufferDesc *buf;
5702
5703 Assert(BufferIsPinned(buffer));
5704 if (BufferIsLocal(buffer))
5705 return; /* local buffers need no lock */
5706
5707 buf = GetBufferDescriptor(buffer - 1);
5708
5709 if (mode == BUFFER_LOCK_UNLOCK)
5711 else if (mode == BUFFER_LOCK_SHARE)
5713 else if (mode == BUFFER_LOCK_EXCLUSIVE)
5715 else
5716 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
5717}
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1178
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1898

References Assert(), buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), and mode.

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), BitmapHeapScanNextBlock(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), create_toy_buffer(), entryLoadMoreItems(), ExtendBufferedRelShared(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_lock(), heap_inplace_unlock(), heap_inplace_update_and_unlock(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_opt(), heap_prepare_pagescan(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgettup(), initBloomState(), invalidate_rel_block(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), ScanSourceDatabasePgClass(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), update_most_recent_deletion_info(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferForRedoExtended(), XLogRecordPageWithFreeSpace(), and ZeroAndLockBuffer().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 5779 of file bufmgr.c.

5780{
5781 BufferDesc *bufHdr;
5782 TimestampTz waitStart = 0;
5783 bool waiting = false;
5784 bool logged_recovery_conflict = false;
5785
5786 Assert(BufferIsPinned(buffer));
5787 Assert(PinCountWaitBuf == NULL);
5788
5790
5791 /*
5792 * We do not yet need to be worried about in-progress AIOs holding a pin,
5793 * as we, so far, only support doing reads via AIO and this function can
5794 * only be called once the buffer is valid (i.e. no read can be in
5795 * flight).
5796 */
5797
5798 /* Nobody else to wait for */
5799 if (BufferIsLocal(buffer))
5800 return;
5801
5802 bufHdr = GetBufferDescriptor(buffer - 1);
5803
5804 for (;;)
5805 {
5806 uint32 buf_state;
5807 uint32 unset_bits = 0;
5808
5809 /* Try to acquire lock */
5811 buf_state = LockBufHdr(bufHdr);
5812
5813 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5814 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5815 {
5816 /* Successfully acquired exclusive lock with pincount 1 */
5817 UnlockBufHdr(bufHdr);
5818
5819 /*
5820 * Emit the log message if recovery conflict on buffer pin was
5821 * resolved but the startup process waited longer than
5822 * deadlock_timeout for it.
5823 */
5824 if (logged_recovery_conflict)
5826 waitStart, GetCurrentTimestamp(),
5827 NULL, false);
5828
5829 if (waiting)
5830 {
5831 /* reset ps display to remove the suffix if we added one */
5833 waiting = false;
5834 }
5835 return;
5836 }
5837 /* Failed, so mark myself as waiting for pincount 1 */
5838 if (buf_state & BM_PIN_COUNT_WAITER)
5839 {
5840 UnlockBufHdr(bufHdr);
5842 elog(ERROR, "multiple backends attempting to wait for pincount 1");
5843 }
5845 PinCountWaitBuf = bufHdr;
5846 UnlockBufHdrExt(bufHdr, buf_state,
5848 0);
5850
5851 /* Wait to be signaled by UnpinBuffer() */
5852 if (InHotStandby)
5853 {
5854 if (!waiting)
5855 {
5856 /* adjust the process title to indicate that it's waiting */
5857 set_ps_display_suffix("waiting");
5858 waiting = true;
5859 }
5860
5861 /*
5862 * Emit the log message if the startup process is waiting longer
5863 * than deadlock_timeout for recovery conflict on buffer pin.
5864 *
5865 * Skip this if first time through because the startup process has
5866 * not started waiting yet in this case. So, the wait start
5867 * timestamp is set after this logic.
5868 */
5869 if (waitStart != 0 && !logged_recovery_conflict)
5870 {
5872
5873 if (TimestampDifferenceExceeds(waitStart, now,
5875 {
5877 waitStart, now, NULL, true);
5878 logged_recovery_conflict = true;
5879 }
5880 }
5881
5882 /*
5883 * Set the wait start timestamp if logging is enabled and first
5884 * time through.
5885 */
5886 if (log_recovery_conflict_waits && waitStart == 0)
5887 waitStart = GetCurrentTimestamp();
5888
5889 /* Publish the bufid that Startup process waits on */
5890 SetStartupBufferPinWaitBufId(buffer - 1);
5891 /* Set alarm and then wait to be signaled by UnpinBuffer() */
5893 /* Reset the published bufid */
5895 }
5896 else
5897 ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
5898
5899 /*
5900 * Remove flag marking us as waiter. Normally this will not be set
5901 * anymore, but ProcWaitForSignal() can return for other signals as
5902 * well. We take care to only reset the flag if we're the waiter, as
5903 * theoretically another backend could have started waiting. That's
5904 * impossible with the current usages due to table level locking, but
5905 * better be safe.
5906 */
5907 buf_state = LockBufHdr(bufHdr);
5908 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5910 unset_bits |= BM_PIN_COUNT_WAITER;
5911
5912 UnlockBufHdrExt(bufHdr, buf_state,
5913 0, unset_bits,
5914 0);
5915
5916 PinCountWaitBuf = NULL;
5917 /* Loop back and try again */
5918 }
5919}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:75
static uint32 UnlockBufHdrExt(BufferDesc *desc, uint32 old_buf_state, uint32 set_bits, uint32 unset_bits, int refcount_change)
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition: bufmgr.c:5746
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:205
int64 TimestampTz
Definition: timestamp.h:39
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:387
int DeadlockTimeout
Definition: proc.c:58
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:759
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1984
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:793
bool log_recovery_conflict_waits
Definition: standby.c:42
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:274
int wait_backend_pgprocno
static volatile sig_atomic_t waiting
Definition: waiteventset.c:171
#define InHotStandby
Definition: xlogutils.h:60

References Assert(), BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), UnlockBufHdrExt(), BufferDesc::wait_backend_pgprocno, and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroAndLockBuffer().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 3037 of file bufmgr.c.

3038{
3039 BufferDesc *bufHdr;
3040 uint32 buf_state;
3041 uint32 old_buf_state;
3042
3043 if (!BufferIsValid(buffer))
3044 elog(ERROR, "bad buffer ID: %d", buffer);
3045
3046 if (BufferIsLocal(buffer))
3047 {
3048 MarkLocalBufferDirty(buffer);
3049 return;
3050 }
3051
3052 bufHdr = GetBufferDescriptor(buffer - 1);
3053
3054 Assert(BufferIsPinned(buffer));
3056
3057 /*
3058 * NB: We have to wait for the buffer header spinlock to be not held, as
3059 * TerminateBufferIO() relies on the spinlock.
3060 */
3061 old_buf_state = pg_atomic_read_u32(&bufHdr->state);
3062 for (;;)
3063 {
3064 if (old_buf_state & BM_LOCKED)
3065 old_buf_state = WaitBufHdrUnlocked(bufHdr);
3066
3067 buf_state = old_buf_state;
3068
3069 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3070 buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
3071
3072 if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
3073 buf_state))
3074 break;
3075 }
3076
3077 /*
3078 * If the buffer was not dirty already, do vacuum accounting.
3079 */
3080 if (!(old_buf_state & BM_DIRTY))
3081 {
3083 if (VacuumCostActive)
3085 }
3086}
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:349
#define BM_LOCKED
Definition: buf_internals.h:68
#define BM_JUST_DIRTIED
Definition: buf_internals.h:74
pg_noinline uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:6407
bool VacuumCostActive
Definition: globals.c:158
int VacuumCostBalance
Definition: globals.c:157
int VacuumCostPageDirty
Definition: globals.c:153
BufferUsage pgBufferUsage
Definition: instrument.c:20
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:491
int64 shared_blks_dirtied
Definition: instrument.h:28

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), MarkDirtyUnpinnedBufferInternal(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 5525 of file bufmgr.c.

5526{
5527 BufferDesc *bufHdr;
5528 Page page = BufferGetPage(buffer);
5529
5530 if (!BufferIsValid(buffer))
5531 elog(ERROR, "bad buffer ID: %d", buffer);
5532
5533 if (BufferIsLocal(buffer))
5534 {
5535 MarkLocalBufferDirty(buffer);
5536 return;
5537 }
5538
5539 bufHdr = GetBufferDescriptor(buffer - 1);
5540
5541 Assert(GetPrivateRefCount(buffer) > 0);
5542 /* here, either share or exclusive lock is OK */
5543 Assert(BufferIsLockedByMe(buffer));
5544
5545 /*
5546 * This routine might get called many times on the same page, if we are
5547 * making the first scan after commit of an xact that added/deleted many
5548 * tuples. So, be as quick as we can if the buffer is already dirty. We
5549 * do this by not acquiring spinlock if it looks like the status bits are
5550 * already set. Since we make this test unlocked, there's a chance we
5551 * might fail to notice that the flags have just been cleared, and failed
5552 * to reset them, due to memory-ordering issues. But since this function
5553 * is only intended to be used in cases where failing to write out the
5554 * data would be harmless anyway, it doesn't really matter.
5555 */
5556 if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
5558 {
5560 bool dirtied = false;
5561 bool delayChkptFlags = false;
5562 uint32 buf_state;
5563
5564 /*
5565 * If we need to protect hint bit updates from torn writes, WAL-log a
5566 * full page image of the page. This full page image is only necessary
5567 * if the hint bit update is the first change to the page since the
5568 * last checkpoint.
5569 *
5570 * We don't check full_page_writes here because that logic is included
5571 * when we call XLogInsert() since the value changes dynamically.
5572 */
5573 if (XLogHintBitIsNeeded() &&
5575 {
5576 /*
5577 * If we must not write WAL, due to a relfilelocator-specific
5578 * condition or being in recovery, don't dirty the page. We can
5579 * set the hint, just not dirty the page as a result so the hint
5580 * is lost when we evict the page or shutdown.
5581 *
5582 * See src/backend/storage/page/README for longer discussion.
5583 */
5584 if (RecoveryInProgress() ||
5586 return;
5587
5588 /*
5589 * If the block is already dirty because we either made a change
5590 * or set a hint already, then we don't need to write a full page
5591 * image. Note that aggressive cleaning of blocks dirtied by hint
5592 * bit setting would increase the call rate. Bulk setting of hint
5593 * bits would reduce the call rate...
5594 *
5595 * We must issue the WAL record before we mark the buffer dirty.
5596 * Otherwise we might write the page before we write the WAL. That
5597 * causes a race condition, since a checkpoint might occur between
5598 * writing the WAL record and marking the buffer dirty. We solve
5599 * that with a kluge, but one that is already in use during
5600 * transaction commit to prevent race conditions. Basically, we
5601 * simply prevent the checkpoint WAL record from being written
5602 * until we have marked the buffer dirty. We don't start the
5603 * checkpoint flush until we have marked dirty, so our checkpoint
5604 * must flush the change to disk successfully or the checkpoint
5605 * never gets written, so crash recovery will fix.
5606 *
5607 * It's possible we may enter here without an xid, so it is
5608 * essential that CreateCheckPoint waits for virtual transactions
5609 * rather than full transactionids.
5610 */
5613 delayChkptFlags = true;
5614 lsn = XLogSaveBufferForHint(buffer, buffer_std);
5615 }
5616
5617 buf_state = LockBufHdr(bufHdr);
5618
5619 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5620
5621 if (!(buf_state & BM_DIRTY))
5622 {
5623 dirtied = true; /* Means "will be dirtied by this action" */
5624
5625 /*
5626 * Set the page LSN if we wrote a backup block. We aren't supposed
5627 * to set this when only holding a share lock but as long as we
5628 * serialise it somehow we're OK. We choose to set LSN while
5629 * holding the buffer header lock, which causes any reader of an
5630 * LSN who holds only a share lock to also obtain a buffer header
5631 * lock before using PageGetLSN(), which is enforced in
5632 * BufferGetLSNAtomic().
5633 *
5634 * If checksums are enabled, you might think we should reset the
5635 * checksum here. That will happen when the page is written
5636 * sometime later in this checkpoint cycle.
5637 */
5638 if (XLogRecPtrIsValid(lsn))
5639 PageSetLSN(page, lsn);
5640 }
5641
5642 UnlockBufHdrExt(bufHdr, buf_state,
5644 0, 0);
5645
5646 if (delayChkptFlags)
5647 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
5648
5649 if (dirtied)
5650 {
5652 if (VacuumCostActive)
5654 }
5655 }
5656}
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
#define DELAY_CHKPT_START
Definition: proc.h:135
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition: storage.c:573
int delayChkptFlags
Definition: proc.h:257
bool RecoveryInProgress(void)
Definition: xlog.c:6461
#define XLogRecPtrIsValid(r)
Definition: xlogdefs.h:29
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:1087

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsLockedByMe(), BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdrExt(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsValid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ MarkDirtyAllUnpinnedBuffers()

void MarkDirtyAllUnpinnedBuffers ( int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 7046 of file bufmgr.c.

7049{
7050 *buffers_dirtied = 0;
7051 *buffers_already_dirty = 0;
7052 *buffers_skipped = 0;
7053
7054 for (int buf = 1; buf <= NBuffers; buf++)
7055 {
7056 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7057 uint32 buf_state;
7058 bool buffer_already_dirty;
7059
7061
7062 buf_state = pg_atomic_read_u32(&desc->state);
7063 if (!(buf_state & BM_VALID))
7064 continue;
7065
7068
7069 LockBufHdr(desc);
7070
7071 if (MarkDirtyUnpinnedBufferInternal(buf, desc, &buffer_already_dirty))
7072 (*buffers_dirtied)++;
7073 else if (buffer_already_dirty)
7074 (*buffers_already_dirty)++;
7075 else
7076 (*buffers_skipped)++;
7077 }
7078}
static bool MarkDirtyUnpinnedBufferInternal(Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
Definition: bufmgr.c:6897

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u32(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_mark_dirty_all().

◆ MarkDirtyRelUnpinnedBuffers()

void MarkDirtyRelUnpinnedBuffers ( Relation  rel,
int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 6989 of file bufmgr.c.

6993{
6995
6996 *buffers_dirtied = 0;
6997 *buffers_already_dirty = 0;
6998 *buffers_skipped = 0;
6999
7000 for (int buf = 1; buf <= NBuffers; buf++)
7001 {
7002 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7003 uint32 buf_state = pg_atomic_read_u32(&(desc->state));
7004 bool buffer_already_dirty;
7005
7007
7008 /* An unlocked precheck should be safe and saves some cycles. */
7009 if ((buf_state & BM_VALID) == 0 ||
7011 continue;
7012
7013 /* Make sure we can pin the buffer. */
7016
7017 buf_state = LockBufHdr(desc);
7018
7019 /* recheck, could have changed without the lock */
7020 if ((buf_state & BM_VALID) == 0 ||
7022 {
7023 UnlockBufHdr(desc);
7024 continue;
7025 }
7026
7027 if (MarkDirtyUnpinnedBufferInternal(buf, desc, &buffer_already_dirty))
7028 (*buffers_dirtied)++;
7029 else if (buffer_already_dirty)
7030 (*buffers_already_dirty)++;
7031 else
7032 (*buffers_skipped)++;
7033 }
7034}

References Assert(), BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u32(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_mark_dirty_relation().

◆ MarkDirtyUnpinnedBuffer()

bool MarkDirtyUnpinnedBuffer ( Buffer  buf,
bool *  buffer_already_dirty 
)

Definition at line 6953 of file bufmgr.c.

6954{
6955 BufferDesc *desc;
6956 bool buffer_dirtied = false;
6957
6959
6960 /* Make sure we can pin the buffer. */
6963
6964 desc = GetBufferDescriptor(buf - 1);
6965 LockBufHdr(desc);
6966
6967 buffer_dirtied = MarkDirtyUnpinnedBufferInternal(buf, desc, buffer_already_dirty);
6968 /* Both can not be true at the same time */
6969 Assert(!(buffer_dirtied && *buffer_already_dirty));
6970
6971 return buffer_dirtied;
6972}

References Assert(), buf, BufferIsLocal, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by pg_buffercache_mark_dirty().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 747 of file bufmgr.c.

748{
749 Assert(RelationIsValid(reln));
750 Assert(BlockNumberIsValid(blockNum));
751
752 if (RelationUsesLocalBuffers(reln))
753 {
754 /* see comments in ReadBufferExtended */
755 if (RELATION_IS_OTHER_TEMP(reln))
757 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
758 errmsg("cannot access temporary tables of other sessions")));
759
760 /* pass it off to localbuf.c */
761 return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
762 }
763 else
764 {
765 /* pass it to the shared buffer version */
766 return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
767 }
768}
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:657
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ereport(elevel,...)
Definition: elog.h:150
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:72
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:668
#define RelationIsValid(relation)
Definition: rel.h:490

References Assert(), BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by count_nondeletable_pages(), invalidate_rel_block(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 657 of file bufmgr.c.

660{
661 PrefetchBufferResult result = {InvalidBuffer, false};
662 BufferTag newTag; /* identity of requested block */
663 uint32 newHash; /* hash value for newTag */
664 LWLock *newPartitionLock; /* buffer partition lock for it */
665 int buf_id;
666
667 Assert(BlockNumberIsValid(blockNum));
668
669 /* create a tag so we can lookup the buffer */
670 InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
671 forkNum, blockNum);
672
673 /* determine its hash code and partition lock ID */
674 newHash = BufTableHashCode(&newTag);
675 newPartitionLock = BufMappingPartitionLock(newHash);
676
677 /* see if the block is in the buffer pool already */
678 LWLockAcquire(newPartitionLock, LW_SHARED);
679 buf_id = BufTableLookup(&newTag, newHash);
680 LWLockRelease(newPartitionLock);
681
682 /* If not in buffers, initiate prefetch */
683 if (buf_id < 0)
684 {
685#ifdef USE_PREFETCH
686 /*
687 * Try to initiate an asynchronous read. This returns false in
688 * recovery if the relation file doesn't exist.
689 */
690 if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
691 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
692 {
693 result.initiated_io = true;
694 }
695#endif /* USE_PREFETCH */
696 }
697 else
698 {
699 /*
700 * Report the buffer it was in at that time. The caller may be able
701 * to avoid a buffer table lookup, but it's not pinned and it must be
702 * rechecked!
703 */
704 result.recent_buffer = buf_id + 1;
705 }
706
707 /*
708 * If the block *is* in buffers, we do nothing. This is not really ideal:
709 * the block might be just about to be evicted, which would be stupid
710 * since we know we are going to need it soon. But the only easy answer
711 * is to bump the usage_count, which does not seem like a great solution:
712 * when the caller does ultimately touch the block, usage_count would get
713 * bumped again, resulting in too much favoritism for blocks that are
714 * involved in a prefetch sequence. A real fix would involve some
715 * additional per-buffer state, and it's not clear that there's enough of
716 * a problem to justify that.
717 */
718
719 return result;
720}
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
static LWLock * BufMappingPartitionLock(uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:78
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_DATA
Definition: fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:678
Definition: lwlock.h:42
Buffer recent_buffer
Definition: bufmgr.h:61

References Assert(), BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, RelFileLocatorBackend::locator, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rlocator, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 839 of file bufmgr.c.

840{
841 return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
842}
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:886

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 886 of file bufmgr.c.

888{
889 Buffer buf;
890
891 /*
892 * Reject attempts to read non-local temporary relations; we would be
893 * likely to get wrong data since we have no visibility into the owning
894 * session's local buffers.
895 */
896 if (RELATION_IS_OTHER_TEMP(reln))
898 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
899 errmsg("cannot access temporary tables of other sessions")));
900
901 /*
902 * Read the buffer, and update pgstat counters to reflect a cache hit or
903 * miss.
904 */
905 buf = ReadBuffer_common(reln, RelationGetSmgr(reln), 0,
906 forkNum, blockNum, mode, strategy);
907
908 return buf;
909}

References buf, ereport, errcode(), errmsg(), ERROR, mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), bt_recheck_sibling_links(), btvacuumpage(), count_nondeletable_pages(), create_toy_buffer(), fsm_readbuf(), get_raw_page_internal(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), statapprox_heap(), and vm_readbuf().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 923 of file bufmgr.c.

926{
927 SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
928
929 return ReadBuffer_common(NULL, smgr,
930 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
931 forkNum, blockNum,
932 mode, strategy);
933}

References INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 778 of file bufmgr.c.

780{
781 BufferDesc *bufHdr;
782 BufferTag tag;
783 uint32 buf_state;
784
785 Assert(BufferIsValid(recent_buffer));
786
789 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
790
791 if (BufferIsLocal(recent_buffer))
792 {
793 int b = -recent_buffer - 1;
794
795 bufHdr = GetLocalBufferDescriptor(b);
796 buf_state = pg_atomic_read_u32(&bufHdr->state);
797
798 /* Is it still valid and holding the right tag? */
799 if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
800 {
801 PinLocalBuffer(bufHdr, true);
802
804
805 return true;
806 }
807 }
808 else
809 {
810 bufHdr = GetBufferDescriptor(recent_buffer - 1);
811
812 /*
813 * Is it still valid and holding the right tag? We do an unlocked tag
814 * comparison first, to make it unlikely that we'll increment the
815 * usage counter of the wrong buffer, if someone calls us with a very
816 * out of date recent_buffer. Then we'll check it again if we get the
817 * pin.
818 */
819 if (BufferTagsEqual(&tag, &bufHdr->tag) &&
820 PinBuffer(bufHdr, NULL, true))
821 {
822 if (BufferTagsEqual(&tag, &bufHdr->tag))
823 {
825 return true;
826 }
827 UnpinBuffer(bufHdr);
828 }
829 }
830
831 return false;
832}
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
Definition: bufmgr.c:3162
int b
Definition: isn.c:74
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_hit
Definition: instrument.h:26

References Assert(), b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), InitBufferTag(), BufferUsage::local_blks_hit, pg_atomic_read_u32(), pgBufferUsage, PinBuffer(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, BufferDesc::state, BufferDesc::tag, and UnpinBuffer().

Referenced by invalidate_rel_block(), and XLogReadBufferExtended().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 4532 of file bufmgr.c.

4533{
4534 if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
4535 {
4536 /*
4537 * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
4538 * tableam returns the size in bytes - but for the purpose of this
4539 * routine, we want the number of blocks. Therefore divide, rounding
4540 * up.
4541 */
4542 uint64 szbytes;
4543
4544 szbytes = table_relation_size(relation, forkNum);
4545
4546 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
4547 }
4548 else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
4549 {
4550 return smgrnblocks(RelationGetSmgr(relation), forkNum);
4551 }
4552 else
4553 Assert(false);
4554
4555 return 0; /* keep compiler quiet */
4556}
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1847

References Assert(), RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 3102 of file bufmgr.c.

3105{
3106 ForkNumber forkNum = MAIN_FORKNUM;
3107 BufferDesc *bufHdr;
3108
3109 if (BufferIsValid(buffer))
3110 {
3111 Assert(BufferIsPinned(buffer));
3112 if (BufferIsLocal(buffer))
3113 {
3114 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3115 if (bufHdr->tag.blockNum == blockNum &&
3116 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3117 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3118 return buffer;
3119 UnpinLocalBuffer(buffer);
3120 }
3121 else
3122 {
3123 bufHdr = GetBufferDescriptor(buffer - 1);
3124 /* we have pin, so it's ok to examine tag without spinlock */
3125 if (bufHdr->tag.blockNum == blockNum &&
3126 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3127 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3128 return buffer;
3129 UnpinBuffer(bufHdr);
3130 }
3131 }
3132
3133 return ReadBuffer(relation, blockNum);
3134}
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:839

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), BufferDesc::tag, UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), and heapam_index_fetch_tuple().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 5461 of file bufmgr.c.

5462{
5463 if (!BufferIsValid(buffer))
5464 elog(ERROR, "bad buffer ID: %d", buffer);
5465
5466 if (BufferIsLocal(buffer))
5467 UnpinLocalBuffer(buffer);
5468 else
5469 UnpinBuffer(GetBufferDescriptor(buffer - 1));
5470}

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), BitmapHeapScanNextBlock(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), buffer_create_toy(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndIndexOnlyScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_search(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), grow_rel(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_fetch_next_buffer(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgettup(), heapgettup_pagemode(), invalidate_rel_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), modify_rel_block(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pgstatindex_impl(), read_rel_block_ll(), read_stream_reset(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ StartReadBuffer()

bool StartReadBuffer ( ReadBuffersOperation operation,
Buffer buffer,
BlockNumber  blocknum,
int  flags 
)

Definition at line 1583 of file bufmgr.c.

1587{
1588 int nblocks = 1;
1589 bool result;
1590
1591 result = StartReadBuffersImpl(operation, buffer, blocknum, &nblocks, flags,
1592 false /* single block, no forwarding */ );
1593 Assert(nblocks == 1); /* single block can't be short */
1594
1595 return result;
1596}
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
Definition: bufmgr.c:1337

References Assert(), PrivateRefCountEntry::buffer, and StartReadBuffersImpl().

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ StartReadBuffers()

bool StartReadBuffers ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int *  nblocks,
int  flags 
)

Definition at line 1564 of file bufmgr.c.

1569{
1570 return StartReadBuffersImpl(operation, buffers, blockNum, nblocks, flags,
1571 true /* expect forwarded buffers */ );
1572}

References StartReadBuffersImpl().

Referenced by read_stream_start_pending_read().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 5668 of file bufmgr.c.

5669{
5671
5672 if (buf)
5673 {
5674 uint32 buf_state;
5675 uint32 unset_bits = 0;
5676
5677 buf_state = LockBufHdr(buf);
5678
5679 /*
5680 * Don't complain if flag bit not set; it could have been reset but we
5681 * got a cancel/die interrupt before getting the signal.
5682 */
5683 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5684 buf->wait_backend_pgprocno == MyProcNumber)
5685 unset_bits = BM_PIN_COUNT_WAITER;
5686
5687 UnlockBufHdrExt(buf, buf_state,
5688 0, unset_bits,
5689 0);
5690
5691 PinCountWaitBuf = NULL;
5692 }
5693}

References BM_PIN_COUNT_WAITER, buf, LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdrExt().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 5478 of file bufmgr.c.

5479{
5481 ReleaseBuffer(buffer);
5482}

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), gin_refind_parent(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_get_sequence_data(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ WaitReadBuffers()

void WaitReadBuffers ( ReadBuffersOperation operation)

Definition at line 1707 of file bufmgr.c.

1708{
1709 PgAioReturn *aio_ret = &operation->io_return;
1710 IOContext io_context;
1711 IOObject io_object;
1712
1713 if (operation->persistence == RELPERSISTENCE_TEMP)
1714 {
1715 io_context = IOCONTEXT_NORMAL;
1716 io_object = IOOBJECT_TEMP_RELATION;
1717 }
1718 else
1719 {
1720 io_context = IOContextForStrategy(operation->strategy);
1721 io_object = IOOBJECT_RELATION;
1722 }
1723
1724 /*
1725 * If we get here without an IO operation having been issued, the
1726 * io_method == IOMETHOD_SYNC path must have been used. Otherwise the
1727 * caller should not have called WaitReadBuffers().
1728 *
1729 * In the case of IOMETHOD_SYNC, we start - as we used to before the
1730 * introducing of AIO - the IO in WaitReadBuffers(). This is done as part
1731 * of the retry logic below, no extra code is required.
1732 *
1733 * This path is expected to eventually go away.
1734 */
1735 if (!pgaio_wref_valid(&operation->io_wref) && io_method != IOMETHOD_SYNC)
1736 elog(ERROR, "waiting for read operation that didn't read");
1737
1738 /*
1739 * To handle partial reads, and IOMETHOD_SYNC, we re-issue IO until we're
1740 * done. We may need multiple retries, not just because we could get
1741 * multiple partial reads, but also because some of the remaining
1742 * to-be-read buffers may have been read in by other backends, limiting
1743 * the IO size.
1744 */
1745 while (true)
1746 {
1747 int ignored_nblocks_progress;
1748
1749 CheckReadBuffersOperation(operation, false);
1750
1751 /*
1752 * If there is an IO associated with the operation, we may need to
1753 * wait for it.
1754 */
1755 if (pgaio_wref_valid(&operation->io_wref))
1756 {
1757 /*
1758 * Track the time spent waiting for the IO to complete. As
1759 * tracking a wait even if we don't actually need to wait
1760 *
1761 * a) is not cheap, due to the timestamping overhead
1762 *
1763 * b) reports some time as waiting, even if we never waited
1764 *
1765 * we first check if we already know the IO is complete.
1766 */
1767 if (aio_ret->result.status == PGAIO_RS_UNKNOWN &&
1768 !pgaio_wref_check_done(&operation->io_wref))
1769 {
1771
1772 pgaio_wref_wait(&operation->io_wref);
1773
1774 /*
1775 * The IO operation itself was already counted earlier, in
1776 * AsyncReadBuffers(), this just accounts for the wait time.
1777 */
1778 pgstat_count_io_op_time(io_object, io_context, IOOP_READ,
1779 io_start, 0, 0);
1780 }
1781 else
1782 {
1783 Assert(pgaio_wref_check_done(&operation->io_wref));
1784 }
1785
1786 /*
1787 * We now are sure the IO completed. Check the results. This
1788 * includes reporting on errors if there were any.
1789 */
1790 ProcessReadBuffersResult(operation);
1791 }
1792
1793 /*
1794 * Most of the time, the one IO we already started, will read in
1795 * everything. But we need to deal with partial reads and buffers not
1796 * needing IO anymore.
1797 */
1798 if (operation->nblocks_done == operation->nblocks)
1799 break;
1800
1802
1803 /*
1804 * This may only complete the IO partially, either because some
1805 * buffers were already valid, or because of a partial read.
1806 *
1807 * NB: In contrast to after the AsyncReadBuffers() call in
1808 * StartReadBuffers(), we do *not* reduce
1809 * ReadBuffersOperation->nblocks here, callers expect the full
1810 * operation to be completed at this point (as more operations may
1811 * have been queued).
1812 */
1813 AsyncReadBuffers(operation, &ignored_nblocks_progress);
1814 }
1815
1816 CheckReadBuffersOperation(operation, true);
1817
1818 /* NB: READ_DONE tracepoint was already executed in completion callback */
1819}
int io_method
Definition: aio.c:74
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition: aio.c:971
bool pgaio_wref_check_done(PgAioWaitRef *iow)
Definition: aio.c:1005
void pgaio_wref_wait(PgAioWaitRef *iow)
Definition: aio.c:991
@ IOMETHOD_SYNC
Definition: aio.h:34
@ PGAIO_RS_UNKNOWN
Definition: aio_types.h:80
bool track_io_timing
Definition: bufmgr.c:169
static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete)
Definition: bufmgr.c:1602
static void ProcessReadBuffersResult(ReadBuffersOperation *operation)
Definition: bufmgr.c:1668
static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
Definition: bufmgr.c:1839
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:747
IOObject
Definition: pgstat.h:276
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:278
IOContext
Definition: pgstat.h:285
@ IOOP_READ
Definition: pgstat.h:315
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:122
uint32 status
Definition: aio_types.h:108
PgAioResult result
Definition: aio_types.h:132
PgAioWaitRef io_wref
Definition: bufmgr.h:150
BufferAccessStrategy strategy
Definition: bufmgr.h:138
PgAioReturn io_return
Definition: bufmgr.h:151

References Assert(), AsyncReadBuffers(), CHECK_FOR_INTERRUPTS, CheckReadBuffersOperation(), elog, ERROR, io_method, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOMETHOD_SYNC, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_READ, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_RS_UNKNOWN, pgaio_wref_check_done(), pgaio_wref_valid(), pgaio_wref_wait(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), ProcessReadBuffersResult(), PgAioReturn::result, PgAioResult::status, ReadBuffersOperation::strategy, and track_io_timing.

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

Variable Documentation

◆ aio_local_buffer_readv_cb

PGDLLIMPORT const PgAioHandleCallbacks aio_local_buffer_readv_cb
extern

Definition at line 7758 of file bufmgr.c.

◆ aio_shared_buffer_readv_cb

PGDLLIMPORT const PgAioHandleCallbacks aio_shared_buffer_readv_cb
extern

Definition at line 7749 of file bufmgr.c.

◆ backend_flush_after

PGDLLIMPORT int backend_flush_after
extern

Definition at line 202 of file bufmgr.c.

Referenced by BufferManagerShmemInit().

◆ bgwriter_flush_after

PGDLLIMPORT int bgwriter_flush_after
extern

Definition at line 201 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

PGDLLIMPORT int bgwriter_lru_maxpages
extern

Definition at line 167 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

PGDLLIMPORT double bgwriter_lru_multiplier
extern

Definition at line 168 of file bufmgr.c.

Referenced by BgBufferSync().

◆ BufferBlocks

PGDLLIMPORT char* BufferBlocks
extern

Definition at line 22 of file buf_init.c.

Referenced by BufferGetBlock(), and BufferManagerShmemInit().

◆ checkpoint_flush_after

PGDLLIMPORT int checkpoint_flush_after
extern

Definition at line 200 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

PGDLLIMPORT int effective_io_concurrency
extern

◆ io_combine_limit

◆ io_combine_limit_guc

PGDLLIMPORT int io_combine_limit_guc
extern

Definition at line 193 of file bufmgr.c.

Referenced by assign_io_max_combine_limit().

◆ io_max_combine_limit

◆ LocalBufferBlockPointers

PGDLLIMPORT Block* LocalBufferBlockPointers
extern

Definition at line 48 of file localbuf.c.

Referenced by BufferGetBlock(), and InitLocalBuffers().

◆ LocalRefCount

◆ maintenance_io_concurrency

◆ NBuffers

◆ NLocBuffer

◆ track_io_timing

◆ zero_damaged_pages

PGDLLIMPORT bool zero_damaged_pages
extern

Definition at line 166 of file bufmgr.c.

Referenced by AsyncReadBuffers(), mdreadv(), and read_rel_block_ll().