PostgreSQL Source Code git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include <lib/sort_template.h>
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static void ResOwnerReleaseBufferIO (Datum res)
 
static char * ResOwnerPrintBufferIO (Datum res)
 
static void ResOwnerReleaseBufferPin (Datum res)
 
static char * ResOwnerPrintBufferPin (Datum res)
 
static Buffer ReadBuffer_common (Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static BlockNumber ExtendBufferedRelCommon (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static BlockNumber ExtendBufferedRelShared (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void UnpinBufferNoOwner (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput, bool nowait)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner)
 
static void AbortBufferIO (Buffer buffer)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
 
static Buffer GetVictimBuffer (BufferAccessStrategy strategy, IOContext io_context)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
static void ZeroAndLockBuffer (Buffer buffer, ReadBufferMode mode, bool already_valid)
 
static pg_attribute_always_inline Buffer PinBufferForBlock (Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static pg_attribute_always_inline bool StartReadBuffersImpl (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
static bool WaitReadBuffersCanStartIO (Buffer buffer, bool nowait)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
static void InvalidateBuffer (BufferDesc *buf)
 
static bool InvalidateVictimBuffer (BufferDesc *buf_hdr)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
bool BufferIsExclusiveLocked (Buffer buffer)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferManagerAccess (void)
 
char * DebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *wb_context, IOContext io_context)
 
bool EvictUnpinnedBuffer (Buffer buf)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int io_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 
const ResourceOwnerDesc buffer_io_resowner_desc
 
const ResourceOwnerDesc buffer_pin_resowner_desc
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 87 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 77 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 76 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 69 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:416
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
int32 * LocalRefCount
Definition: localbuf.c:46

Definition at line 474 of file bufmgr.c.

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 68 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 72 of file bufmgr.c.

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 96 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 79 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 5952 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 5952 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 5954 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 5954 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 5951 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 5951 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 5953 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 5953 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 5950 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 5950 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

static void AbortBufferIO ( Buffer  buffer)
static

Definition at line 5652 of file bufmgr.c.

5653{
5654 BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1);
5655 uint32 buf_state;
5656
5657 buf_state = LockBufHdr(buf_hdr);
5658 Assert(buf_state & (BM_IO_IN_PROGRESS | BM_TAG_VALID));
5659
5660 if (!(buf_state & BM_VALID))
5661 {
5662 Assert(!(buf_state & BM_DIRTY));
5663 UnlockBufHdr(buf_hdr, buf_state);
5664 }
5665 else
5666 {
5667 Assert(buf_state & BM_DIRTY);
5668 UnlockBufHdr(buf_hdr, buf_state);
5669
5670 /* Issue notice if this is not the first failure... */
5671 if (buf_state & BM_IO_ERROR)
5672 {
5673 /* Buffer is pinned, so we can read tag without spinlock */
5674 char *path;
5675
5676 path = relpathperm(BufTagGetRelFileLocator(&buf_hdr->tag),
5677 BufTagGetForkNum(&buf_hdr->tag));
5679 (errcode(ERRCODE_IO_ERROR),
5680 errmsg("could not write block %u of %s",
5681 buf_hdr->tag.blockNum, path),
5682 errdetail("Multiple failures --- write error might be permanent.")));
5683 pfree(path);
5684 }
5685 }
5686
5687 TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false);
5688}
#define BM_TAG_VALID
Definition: buf_internals.h:62
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
#define BM_DIRTY
Definition: buf_internals.h:60
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:63
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
Definition: buf_internals.h:61
#define BM_IO_ERROR
Definition: buf_internals.h:64
static BufferDesc * GetBufferDescriptor(uint32 id)
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner)
Definition: bufmgr.c:5615
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:5761
#define Assert(condition)
Definition: c.h:815
uint32_t uint32
Definition: c.h:488
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define WARNING
Definition: elog.h:36
#define ereport(elevel,...)
Definition: elog.h:149
void pfree(void *pointer)
Definition: mcxt.c:1521
#define relpathperm(rlocator, forknum)
Definition: relpath.h:98
BufferTag tag
BlockNumber blockNum
Definition: buf_internals.h:97

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), GetBufferDescriptor(), LockBufHdr(), pfree(), relpathperm, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by ResOwnerReleaseBufferIO().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 3559 of file bufmgr.c.

3560{
3562
3563 AtEOXact_LocalBuffers(isCommit);
3564
3566}
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:3619
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:210
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:820

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 3601 of file bufmgr.c.

3602{
3603 UnlockBuffers();
3604
3606
3607 /* localbuf.c needs a chance too */
3609}
void UnlockBuffers(void)
Definition: bufmgr.c:5130
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:831

References AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferManagerAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3188 of file bufmgr.c.

3189{
3190 /* info obtained from freelist.c */
3191 int strategy_buf_id;
3192 uint32 strategy_passes;
3193 uint32 recent_alloc;
3194
3195 /*
3196 * Information saved between calls so we can determine the strategy
3197 * point's advance rate and avoid scanning already-cleaned buffers.
3198 */
3199 static bool saved_info_valid = false;
3200 static int prev_strategy_buf_id;
3201 static uint32 prev_strategy_passes;
3202 static int next_to_clean;
3203 static uint32 next_passes;
3204
3205 /* Moving averages of allocation rate and clean-buffer density */
3206 static float smoothed_alloc = 0;
3207 static float smoothed_density = 10.0;
3208
3209 /* Potentially these could be tunables, but for now, not */
3210 float smoothing_samples = 16;
3211 float scan_whole_pool_milliseconds = 120000.0;
3212
3213 /* Used to compute how far we scan ahead */
3214 long strategy_delta;
3215 int bufs_to_lap;
3216 int bufs_ahead;
3217 float scans_per_alloc;
3218 int reusable_buffers_est;
3219 int upcoming_alloc_est;
3220 int min_scan_buffers;
3221
3222 /* Variables for the scanning loop proper */
3223 int num_to_scan;
3224 int num_written;
3225 int reusable_buffers;
3226
3227 /* Variables for final smoothed_density update */
3228 long new_strategy_delta;
3229 uint32 new_recent_alloc;
3230
3231 /*
3232 * Find out where the freelist clock sweep currently is, and how many
3233 * buffer allocations have happened since our last call.
3234 */
3235 strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
3236
3237 /* Report buffer alloc counts to pgstat */
3238 PendingBgWriterStats.buf_alloc += recent_alloc;
3239
3240 /*
3241 * If we're not running the LRU scan, just stop after doing the stats
3242 * stuff. We mark the saved state invalid so that we can recover sanely
3243 * if LRU scan is turned back on later.
3244 */
3245 if (bgwriter_lru_maxpages <= 0)
3246 {
3247 saved_info_valid = false;
3248 return true;
3249 }
3250
3251 /*
3252 * Compute strategy_delta = how many buffers have been scanned by the
3253 * clock sweep since last time. If first time through, assume none. Then
3254 * see if we are still ahead of the clock sweep, and if so, how many
3255 * buffers we could scan before we'd catch up with it and "lap" it. Note:
3256 * weird-looking coding of xxx_passes comparisons are to avoid bogus
3257 * behavior when the passes counts wrap around.
3258 */
3259 if (saved_info_valid)
3260 {
3261 int32 passes_delta = strategy_passes - prev_strategy_passes;
3262
3263 strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3264 strategy_delta += (long) passes_delta * NBuffers;
3265
3266 Assert(strategy_delta >= 0);
3267
3268 if ((int32) (next_passes - strategy_passes) > 0)
3269 {
3270 /* we're one pass ahead of the strategy point */
3271 bufs_to_lap = strategy_buf_id - next_to_clean;
3272#ifdef BGW_DEBUG
3273 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3274 next_passes, next_to_clean,
3275 strategy_passes, strategy_buf_id,
3276 strategy_delta, bufs_to_lap);
3277#endif
3278 }
3279 else if (next_passes == strategy_passes &&
3280 next_to_clean >= strategy_buf_id)
3281 {
3282 /* on same pass, but ahead or at least not behind */
3283 bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
3284#ifdef BGW_DEBUG
3285 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3286 next_passes, next_to_clean,
3287 strategy_passes, strategy_buf_id,
3288 strategy_delta, bufs_to_lap);
3289#endif
3290 }
3291 else
3292 {
3293 /*
3294 * We're behind, so skip forward to the strategy point and start
3295 * cleaning from there.
3296 */
3297#ifdef BGW_DEBUG
3298 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3299 next_passes, next_to_clean,
3300 strategy_passes, strategy_buf_id,
3301 strategy_delta);
3302#endif
3303 next_to_clean = strategy_buf_id;
3304 next_passes = strategy_passes;
3305 bufs_to_lap = NBuffers;
3306 }
3307 }
3308 else
3309 {
3310 /*
3311 * Initializing at startup or after LRU scanning had been off. Always
3312 * start at the strategy point.
3313 */
3314#ifdef BGW_DEBUG
3315 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3316 strategy_passes, strategy_buf_id);
3317#endif
3318 strategy_delta = 0;
3319 next_to_clean = strategy_buf_id;
3320 next_passes = strategy_passes;
3321 bufs_to_lap = NBuffers;
3322 }
3323
3324 /* Update saved info for next time */
3325 prev_strategy_buf_id = strategy_buf_id;
3326 prev_strategy_passes = strategy_passes;
3327 saved_info_valid = true;
3328
3329 /*
3330 * Compute how many buffers had to be scanned for each new allocation, ie,
3331 * 1/density of reusable buffers, and track a moving average of that.
3332 *
3333 * If the strategy point didn't move, we don't update the density estimate
3334 */
3335 if (strategy_delta > 0 && recent_alloc > 0)
3336 {
3337 scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
3338 smoothed_density += (scans_per_alloc - smoothed_density) /
3339 smoothing_samples;
3340 }
3341
3342 /*
3343 * Estimate how many reusable buffers there are between the current
3344 * strategy point and where we've scanned ahead to, based on the smoothed
3345 * density estimate.
3346 */
3347 bufs_ahead = NBuffers - bufs_to_lap;
3348 reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3349
3350 /*
3351 * Track a moving average of recent buffer allocations. Here, rather than
3352 * a true average we want a fast-attack, slow-decline behavior: we
3353 * immediately follow any increase.
3354 */
3355 if (smoothed_alloc <= (float) recent_alloc)
3356 smoothed_alloc = recent_alloc;
3357 else
3358 smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3359 smoothing_samples;
3360
3361 /* Scale the estimate by a GUC to allow more aggressive tuning. */
3362 upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
3363
3364 /*
3365 * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3366 * eventually underflow to zero, and the underflows produce annoying
3367 * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3368 * zero, there's no point in tracking smaller and smaller values of
3369 * smoothed_alloc, so just reset it to exactly zero to avoid this
3370 * syndrome. It will pop back up as soon as recent_alloc increases.
3371 */
3372 if (upcoming_alloc_est == 0)
3373 smoothed_alloc = 0;
3374
3375 /*
3376 * Even in cases where there's been little or no buffer allocation
3377 * activity, we want to make a small amount of progress through the buffer
3378 * cache so that as many reusable buffers as possible are clean after an
3379 * idle period.
3380 *
3381 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3382 * the BGW will be called during the scan_whole_pool time; slice the
3383 * buffer pool into that many sections.
3384 */
3385 min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3386
3387 if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3388 {
3389#ifdef BGW_DEBUG
3390 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3391 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3392#endif
3393 upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3394 }
3395
3396 /*
3397 * Now write out dirty reusable buffers, working forward from the
3398 * next_to_clean point, until we have lapped the strategy scan, or cleaned
3399 * enough buffers to match our estimate of the next cycle's allocation
3400 * requirements, or hit the bgwriter_lru_maxpages limit.
3401 */
3402
3403 num_to_scan = bufs_to_lap;
3404 num_written = 0;
3405 reusable_buffers = reusable_buffers_est;
3406
3407 /* Execute the LRU scan */
3408 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3409 {
3410 int sync_state = SyncOneBuffer(next_to_clean, true,
3411 wb_context);
3412
3413 if (++next_to_clean >= NBuffers)
3414 {
3415 next_to_clean = 0;
3416 next_passes++;
3417 }
3418 num_to_scan--;
3419
3420 if (sync_state & BUF_WRITTEN)
3421 {
3422 reusable_buffers++;
3423 if (++num_written >= bgwriter_lru_maxpages)
3424 {
3426 break;
3427 }
3428 }
3429 else if (sync_state & BUF_REUSABLE)
3430 reusable_buffers++;
3431 }
3432
3434
3435#ifdef BGW_DEBUG
3436 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3437 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3438 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3439 bufs_to_lap - num_to_scan,
3440 num_written,
3441 reusable_buffers - reusable_buffers_est);
3442#endif
3443
3444 /*
3445 * Consider the above scan as being like a new allocation scan.
3446 * Characterize its density and update the smoothed one based on it. This
3447 * effectively halves the moving average period in cases where both the
3448 * strategy and the background writer are doing some useful scanning,
3449 * which is helpful because a long memory isn't as desirable on the
3450 * density estimates.
3451 */
3452 new_strategy_delta = bufs_to_lap - num_to_scan;
3453 new_recent_alloc = reusable_buffers - reusable_buffers_est;
3454 if (new_strategy_delta > 0 && new_recent_alloc > 0)
3455 {
3456 scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3457 smoothed_density += (scans_per_alloc - smoothed_density) /
3458 smoothing_samples;
3459
3460#ifdef BGW_DEBUG
3461 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3462 new_recent_alloc, new_strategy_delta,
3463 scans_per_alloc, smoothed_density);
3464#endif
3465 }
3466
3467 /* Return true if OK to hibernate */
3468 return (bufs_to_lap == 0 && recent_alloc == 0);
3469}
int BgWriterDelay
Definition: bgwriter.c:57
#define BUF_REUSABLE
Definition: bufmgr.c:77
double bgwriter_lru_multiplier
Definition: bufmgr.c:142
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:3486
int bgwriter_lru_maxpages
Definition: bufmgr.c:141
#define BUF_WRITTEN
Definition: bufmgr.c:76
int32_t int32
Definition: c.h:484
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:225
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:394
int NBuffers
Definition: globals.c:141
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition: pgstat.h:240
PgStat_Counter maxwritten_clean
Definition: pgstat.h:241
PgStat_Counter buf_alloc
Definition: pgstat.h:242

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ BufferAlloc()

static pg_attribute_always_inline BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool *  foundPtr,
IOContext  io_context 
)
inlinestatic

Definition at line 1606 of file bufmgr.c.

1610{
1611 BufferTag newTag; /* identity of requested block */
1612 uint32 newHash; /* hash value for newTag */
1613 LWLock *newPartitionLock; /* buffer partition lock for it */
1614 int existing_buf_id;
1615 Buffer victim_buffer;
1616 BufferDesc *victim_buf_hdr;
1617 uint32 victim_buf_state;
1618
1619 /* Make sure we will have room to remember the buffer pin */
1622
1623 /* create a tag so we can lookup the buffer */
1624 InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
1625
1626 /* determine its hash code and partition lock ID */
1627 newHash = BufTableHashCode(&newTag);
1628 newPartitionLock = BufMappingPartitionLock(newHash);
1629
1630 /* see if the block is in the buffer pool already */
1631 LWLockAcquire(newPartitionLock, LW_SHARED);
1632 existing_buf_id = BufTableLookup(&newTag, newHash);
1633 if (existing_buf_id >= 0)
1634 {
1635 BufferDesc *buf;
1636 bool valid;
1637
1638 /*
1639 * Found it. Now, pin the buffer so no one can steal it from the
1640 * buffer pool, and check to see if the correct data has been loaded
1641 * into the buffer.
1642 */
1643 buf = GetBufferDescriptor(existing_buf_id);
1644
1645 valid = PinBuffer(buf, strategy);
1646
1647 /* Can release the mapping lock as soon as we've pinned it */
1648 LWLockRelease(newPartitionLock);
1649
1650 *foundPtr = true;
1651
1652 if (!valid)
1653 {
1654 /*
1655 * We can only get here if (a) someone else is still reading in
1656 * the page, (b) a previous read attempt failed, or (c) someone
1657 * called StartReadBuffers() but not yet WaitReadBuffers().
1658 */
1659 *foundPtr = false;
1660 }
1661
1662 return buf;
1663 }
1664
1665 /*
1666 * Didn't find it in the buffer pool. We'll have to initialize a new
1667 * buffer. Remember to unlock the mapping lock while doing the work.
1668 */
1669 LWLockRelease(newPartitionLock);
1670
1671 /*
1672 * Acquire a victim buffer. Somebody else might try to do the same, we
1673 * don't hold any conflicting locks. If so we'll have to undo our work
1674 * later.
1675 */
1676 victim_buffer = GetVictimBuffer(strategy, io_context);
1677 victim_buf_hdr = GetBufferDescriptor(victim_buffer - 1);
1678
1679 /*
1680 * Try to make a hashtable entry for the buffer under its new tag. If
1681 * somebody else inserted another buffer for the tag, we'll release the
1682 * victim buffer we acquired and use the already inserted one.
1683 */
1684 LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1685 existing_buf_id = BufTableInsert(&newTag, newHash, victim_buf_hdr->buf_id);
1686 if (existing_buf_id >= 0)
1687 {
1688 BufferDesc *existing_buf_hdr;
1689 bool valid;
1690
1691 /*
1692 * Got a collision. Someone has already done what we were about to do.
1693 * We'll just handle this as if it were found in the buffer pool in
1694 * the first place. First, give up the buffer we were planning to
1695 * use.
1696 *
1697 * We could do this after releasing the partition lock, but then we'd
1698 * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry()
1699 * before acquiring the lock, for the rare case of such a collision.
1700 */
1701 UnpinBuffer(victim_buf_hdr);
1702
1703 /*
1704 * The victim buffer we acquired previously is clean and unused, let
1705 * it be found again quickly
1706 */
1707 StrategyFreeBuffer(victim_buf_hdr);
1708
1709 /* remaining code should match code at top of routine */
1710
1711 existing_buf_hdr = GetBufferDescriptor(existing_buf_id);
1712
1713 valid = PinBuffer(existing_buf_hdr, strategy);
1714
1715 /* Can release the mapping lock as soon as we've pinned it */
1716 LWLockRelease(newPartitionLock);
1717
1718 *foundPtr = true;
1719
1720 if (!valid)
1721 {
1722 /*
1723 * We can only get here if (a) someone else is still reading in
1724 * the page, (b) a previous read attempt failed, or (c) someone
1725 * called StartReadBuffers() but not yet WaitReadBuffers().
1726 */
1727 *foundPtr = false;
1728 }
1729
1730 return existing_buf_hdr;
1731 }
1732
1733 /*
1734 * Need to lock the buffer header too in order to change its tag.
1735 */
1736 victim_buf_state = LockBufHdr(victim_buf_hdr);
1737
1738 /* some sanity checks while we hold the buffer header lock */
1739 Assert(BUF_STATE_GET_REFCOUNT(victim_buf_state) == 1);
1740 Assert(!(victim_buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY | BM_IO_IN_PROGRESS)));
1741
1742 victim_buf_hdr->tag = newTag;
1743
1744 /*
1745 * Make sure BM_PERMANENT is set for buffers that must be written at every
1746 * checkpoint. Unlogged buffers only need to be written at shutdown
1747 * checkpoints, except for their "init" forks, which need to be treated
1748 * just like permanent relations.
1749 */
1750 victim_buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1751 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1752 victim_buf_state |= BM_PERMANENT;
1753
1754 UnlockBufHdr(victim_buf_hdr, victim_buf_state);
1755
1756 LWLockRelease(newPartitionLock);
1757
1758 /*
1759 * Buffer contents are currently invalid.
1760 */
1761 *foundPtr = false;
1762
1763 return victim_buf_hdr;
1764}
int Buffer
Definition: buf.h:23
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_PERMANENT
Definition: buf_internals.h:68
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:50
static LWLock * BufMappingPartitionLock(uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:78
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:118
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:2652
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
Definition: bufmgr.c:1950
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:250
static void UnpinBuffer(BufferDesc *buf)
Definition: bufmgr.c:2806
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:363
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
static char * buf
Definition: pg_test_fsync.c:72
@ INIT_FORKNUM
Definition: relpath.h:61
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition: resowner.c:442
Definition: lwlock.h:42
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), CurrentResourceOwner, GetBufferDescriptor(), GetVictimBuffer(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrRelationData::smgr_rlocator, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by PinBufferForBlock().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 3724 of file bufmgr.c.

3725{
3726 BufferDesc *bufHdr;
3727
3728 Assert(BufferIsPinned(buffer));
3729
3730 if (BufferIsLocal(buffer))
3731 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3732 else
3733 bufHdr = GetBufferDescriptor(buffer - 1);
3734
3735 /* pinned, so OK to read tag without spinlock */
3736 return bufHdr->tag.blockNum;
3737}
#define BufferIsLocal(buffer)
Definition: buf.h:37
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:474

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_finish_split(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), collect_corrupt_items(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_page_prune_and_freeze(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_noprune(), lazy_scan_prune(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and WaitReadBuffers().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 3985 of file bufmgr.c.

3986{
3987 BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
3988 char *page = BufferGetPage(buffer);
3989 XLogRecPtr lsn;
3990 uint32 buf_state;
3991
3992 /*
3993 * If we don't need locking for correctness, fastpath out.
3994 */
3995 if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
3996 return PageGetLSN(page);
3997
3998 /* Make sure we've got a real buffer, and that we hold a pin on it. */
3999 Assert(BufferIsValid(buffer));
4000 Assert(BufferIsPinned(buffer));
4001
4002 buf_state = LockBufHdr(bufHdr);
4003 lsn = PageGetLSN(page);
4004 UnlockBufHdr(bufHdr, buf_state);
4005
4006 return lsn;
4007}
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
static XLogRecPtr PageGetLSN(const char *page)
Definition: bufpage.h:386
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 3745 of file bufmgr.c.

3747{
3748 BufferDesc *bufHdr;
3749
3750 /* Do the same checks as BufferGetBlockNumber. */
3751 Assert(BufferIsPinned(buffer));
3752
3753 if (BufferIsLocal(buffer))
3754 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3755 else
3756 bufHdr = GetBufferDescriptor(buffer - 1);
3757
3758 /* pinned, so OK to read tag without spinlock */
3759 *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
3760 *forknum = BufTagGetForkNum(&bufHdr->tag);
3761 *blknum = bufHdr->tag.blockNum;
3762}

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), heap_inplace_update_and_unlock(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 2500 of file bufmgr.c.

2501{
2502 BufferDesc *bufHdr;
2503
2504 if (BufferIsLocal(buffer))
2505 {
2506 int bufid = -buffer - 1;
2507
2508 bufHdr = GetLocalBufferDescriptor(bufid);
2509 }
2510 else
2511 {
2512 bufHdr = GetBufferDescriptor(buffer - 1);
2513 }
2514
2515 Assert(BufferIsPinned(buffer));
2517 LW_EXCLUSIVE));
2518
2519 return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
2520}
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:239
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1937
pg_atomic_uint32 state

References Assert, BM_DIRTY, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by XLogRegisterBuffer().

◆ BufferIsExclusiveLocked()

bool BufferIsExclusiveLocked ( Buffer  buffer)

Definition at line 2471 of file bufmgr.c.

2472{
2473 BufferDesc *bufHdr;
2474
2475 if (BufferIsLocal(buffer))
2476 {
2477 int bufid = -buffer - 1;
2478
2479 bufHdr = GetLocalBufferDescriptor(bufid);
2480 }
2481 else
2482 {
2483 bufHdr = GetBufferDescriptor(buffer - 1);
2484 }
2485
2486 Assert(BufferIsPinned(buffer));
2488 LW_EXCLUSIVE);
2489}

References Assert, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), LW_EXCLUSIVE, and LWLockHeldByMeInMode().

Referenced by XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 3955 of file bufmgr.c.

3956{
3957 BufferDesc *bufHdr;
3958
3959 /* Local buffers are used only for temp relations. */
3960 if (BufferIsLocal(buffer))
3961 return false;
3962
3963 /* Make sure we've got a real buffer, and that we hold a pin on it. */
3964 Assert(BufferIsValid(buffer));
3965 Assert(BufferIsPinned(buffer));
3966
3967 /*
3968 * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
3969 * need not bother with the buffer header spinlock. Even if someone else
3970 * changes the buffer header state while we're doing this, the state is
3971 * changed atomically, so we'll read the old value or the new value, but
3972 * not random garbage.
3973 */
3974 bufHdr = GetBufferDescriptor(buffer - 1);
3975 return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
3976}

References Assert, BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 2912 of file bufmgr.c.

2913{
2914 uint32 buf_state;
2915 int buf_id;
2916 int num_to_scan;
2917 int num_spaces;
2918 int num_processed;
2919 int num_written;
2920 CkptTsStatus *per_ts_stat = NULL;
2921 Oid last_tsid;
2922 binaryheap *ts_heap;
2923 int i;
2924 int mask = BM_DIRTY;
2925 WritebackContext wb_context;
2926
2927 /*
2928 * Unless this is a shutdown checkpoint or we have been explicitly told,
2929 * we write only permanent, dirty buffers. But at shutdown or end of
2930 * recovery, we write all dirty buffers.
2931 */
2934 mask |= BM_PERMANENT;
2935
2936 /*
2937 * Loop over all buffers, and mark the ones that need to be written with
2938 * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
2939 * can estimate how much work needs to be done.
2940 *
2941 * This allows us to write only those pages that were dirty when the
2942 * checkpoint began, and not those that get dirtied while it proceeds.
2943 * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
2944 * later in this function, or by normal backends or the bgwriter cleaning
2945 * scan, the flag is cleared. Any buffer dirtied after this point won't
2946 * have the flag set.
2947 *
2948 * Note that if we fail to write some buffer, we may leave buffers with
2949 * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
2950 * certainly need to be written for the next checkpoint attempt, too.
2951 */
2952 num_to_scan = 0;
2953 for (buf_id = 0; buf_id < NBuffers; buf_id++)
2954 {
2955 BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
2956
2957 /*
2958 * Header spinlock is enough to examine BM_DIRTY, see comment in
2959 * SyncOneBuffer.
2960 */
2961 buf_state = LockBufHdr(bufHdr);
2962
2963 if ((buf_state & mask) == mask)
2964 {
2965 CkptSortItem *item;
2966
2967 buf_state |= BM_CHECKPOINT_NEEDED;
2968
2969 item = &CkptBufferIds[num_to_scan++];
2970 item->buf_id = buf_id;
2971 item->tsId = bufHdr->tag.spcOid;
2972 item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
2973 item->forkNum = BufTagGetForkNum(&bufHdr->tag);
2974 item->blockNum = bufHdr->tag.blockNum;
2975 }
2976
2977 UnlockBufHdr(bufHdr, buf_state);
2978
2979 /* Check for barrier events in case NBuffers is large. */
2982 }
2983
2984 if (num_to_scan == 0)
2985 return; /* nothing to do */
2986
2988
2989 TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
2990
2991 /*
2992 * Sort buffers that need to be written to reduce the likelihood of random
2993 * IO. The sorting is also important for the implementation of balancing
2994 * writes between tablespaces. Without balancing writes we'd potentially
2995 * end up writing to the tablespaces one-by-one; possibly overloading the
2996 * underlying system.
2997 */
2998 sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
2999
3000 num_spaces = 0;
3001
3002 /*
3003 * Allocate progress status for each tablespace with buffers that need to
3004 * be flushed. This requires the to-be-flushed array to be sorted.
3005 */
3006 last_tsid = InvalidOid;
3007 for (i = 0; i < num_to_scan; i++)
3008 {
3009 CkptTsStatus *s;
3010 Oid cur_tsid;
3011
3012 cur_tsid = CkptBufferIds[i].tsId;
3013
3014 /*
3015 * Grow array of per-tablespace status structs, every time a new
3016 * tablespace is found.
3017 */
3018 if (last_tsid == InvalidOid || last_tsid != cur_tsid)
3019 {
3020 Size sz;
3021
3022 num_spaces++;
3023
3024 /*
3025 * Not worth adding grow-by-power-of-2 logic here - even with a
3026 * few hundred tablespaces this should be fine.
3027 */
3028 sz = sizeof(CkptTsStatus) * num_spaces;
3029
3030 if (per_ts_stat == NULL)
3031 per_ts_stat = (CkptTsStatus *) palloc(sz);
3032 else
3033 per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
3034
3035 s = &per_ts_stat[num_spaces - 1];
3036 memset(s, 0, sizeof(*s));
3037 s->tsId = cur_tsid;
3038
3039 /*
3040 * The first buffer in this tablespace. As CkptBufferIds is sorted
3041 * by tablespace all (s->num_to_scan) buffers in this tablespace
3042 * will follow afterwards.
3043 */
3044 s->index = i;
3045
3046 /*
3047 * progress_slice will be determined once we know how many buffers
3048 * are in each tablespace, i.e. after this loop.
3049 */
3050
3051 last_tsid = cur_tsid;
3052 }
3053 else
3054 {
3055 s = &per_ts_stat[num_spaces - 1];
3056 }
3057
3058 s->num_to_scan++;
3059
3060 /* Check for barrier events. */
3063 }
3064
3065 Assert(num_spaces > 0);
3066
3067 /*
3068 * Build a min-heap over the write-progress in the individual tablespaces,
3069 * and compute how large a portion of the total progress a single
3070 * processed buffer is.
3071 */
3072 ts_heap = binaryheap_allocate(num_spaces,
3074 NULL);
3075
3076 for (i = 0; i < num_spaces; i++)
3077 {
3078 CkptTsStatus *ts_stat = &per_ts_stat[i];
3079
3080 ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
3081
3082 binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
3083 }
3084
3085 binaryheap_build(ts_heap);
3086
3087 /*
3088 * Iterate through to-be-checkpointed buffers and write the ones (still)
3089 * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
3090 * tablespaces; otherwise the sorting would lead to only one tablespace
3091 * receiving writes at a time, making inefficient use of the hardware.
3092 */
3093 num_processed = 0;
3094 num_written = 0;
3095 while (!binaryheap_empty(ts_heap))
3096 {
3097 BufferDesc *bufHdr = NULL;
3098 CkptTsStatus *ts_stat = (CkptTsStatus *)
3100
3101 buf_id = CkptBufferIds[ts_stat->index].buf_id;
3102 Assert(buf_id != -1);
3103
3104 bufHdr = GetBufferDescriptor(buf_id);
3105
3106 num_processed++;
3107
3108 /*
3109 * We don't need to acquire the lock here, because we're only looking
3110 * at a single bit. It's possible that someone else writes the buffer
3111 * and clears the flag right after we check, but that doesn't matter
3112 * since SyncOneBuffer will then do nothing. However, there is a
3113 * further race condition: it's conceivable that between the time we
3114 * examine the bit here and the time SyncOneBuffer acquires the lock,
3115 * someone else not only wrote the buffer but replaced it with another
3116 * page and dirtied it. In that improbable case, SyncOneBuffer will
3117 * write the buffer though we didn't need to. It doesn't seem worth
3118 * guarding against this, though.
3119 */
3121 {
3122 if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
3123 {
3124 TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
3126 num_written++;
3127 }
3128 }
3129
3130 /*
3131 * Measure progress independent of actually having to flush the buffer
3132 * - otherwise writing become unbalanced.
3133 */
3134 ts_stat->progress += ts_stat->progress_slice;
3135 ts_stat->num_scanned++;
3136 ts_stat->index++;
3137
3138 /* Have all the buffers from the tablespace been processed? */
3139 if (ts_stat->num_scanned == ts_stat->num_to_scan)
3140 {
3141 binaryheap_remove_first(ts_heap);
3142 }
3143 else
3144 {
3145 /* update heap with the new progress */
3146 binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
3147 }
3148
3149 /*
3150 * Sleep to throttle our I/O rate.
3151 *
3152 * (This will check for barrier events even if it doesn't sleep.)
3153 */
3154 CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
3155 }
3156
3157 /*
3158 * Issue all pending flushes. Only checkpointer calls BufferSync(), so
3159 * IOContext will always be IOCONTEXT_NORMAL.
3160 */
3162
3163 pfree(per_ts_stat);
3164 per_ts_stat = NULL;
3165 binaryheap_free(ts_heap);
3166
3167 /*
3168 * Update checkpoint statistics. As noted above, this doesn't include
3169 * buffers written by other backends or bgwriter scan.
3170 */
3171 CheckpointStats.ckpt_bufs_written += num_written;
3172
3173 TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
3174}
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:138
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:255
bh_node_type binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:177
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:192
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:75
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:116
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:39
#define binaryheap_empty(h)
Definition: binaryheap.h:65
CkptSortItem * CkptBufferIds
Definition: buf_init.c:24
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:67
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:5880
int checkpoint_flush_after
Definition: bufmgr.c:171
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:5903
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
Definition: bufmgr.c:5965
struct CkptTsStatus CkptTsStatus
double float8
Definition: c.h:587
size_t Size
Definition: c.h:562
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:722
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:39
int i
Definition: isn.c:72
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
@ IOCONTEXT_NORMAL
Definition: pgstat.h:285
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:496
int ckpt_bufs_written
Definition: xlog.h:167
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition: bufmgr.c:115
int index
Definition: bufmgr.c:123
int num_scanned
Definition: bufmgr.c:120
float8 progress
Definition: bufmgr.c:114
int num_to_scan
Definition: bufmgr.c:118
Oid tsId
Definition: bufmgr.c:105
PgStat_Counter buffers_written
Definition: pgstat.h:264
Oid spcOid
Definition: buf_internals.h:93
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:143
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139

References Assert, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buffers_written, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, DatumGetPointer(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IOCONTEXT_NORMAL, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u32(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), buftag::spcOid, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 5815 of file bufmgr.c.

5816{
5817 int ret;
5818 RelFileLocator rlocatora;
5819 RelFileLocator rlocatorb;
5820
5821 rlocatora = BufTagGetRelFileLocator(ba);
5822 rlocatorb = BufTagGetRelFileLocator(bb);
5823
5824 ret = rlocator_comparator(&rlocatora, &rlocatorb);
5825
5826 if (ret != 0)
5827 return ret;
5828
5829 if (BufTagGetForkNum(ba) < BufTagGetForkNum(bb))
5830 return -1;
5831 if (BufTagGetForkNum(ba) > BufTagGetForkNum(bb))
5832 return 1;
5833
5834 if (ba->blockNum < bb->blockNum)
5835 return -1;
5836 if (ba->blockNum > bb->blockNum)
5837 return 1;
5838
5839 return 0;
5840}
static int rlocator_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:5734

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), and rlocator_comparator().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 5205 of file bufmgr.c.

5206{
5207 if (BufferIsLocal(buffer))
5208 {
5209 if (LocalRefCount[-buffer - 1] != 1)
5210 elog(ERROR, "incorrect local pin count: %d",
5211 LocalRefCount[-buffer - 1]);
5212 }
5213 else
5214 {
5215 if (GetPrivateRefCount(buffer) != 1)
5216 elog(ERROR, "incorrect local pin count: %d",
5217 GetPrivateRefCount(buffer));
5218 }
5219}
#define ERROR
Definition: elog.h:39

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), and LockBufferForCleanup().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 3619 of file bufmgr.c.

3620{
3621#ifdef USE_ASSERT_CHECKING
3622 int RefCountErrors = 0;
3624 int i;
3625 char *s;
3626
3627 /* check the array */
3628 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
3629 {
3631
3632 if (res->buffer != InvalidBuffer)
3633 {
3634 s = DebugPrintBufferRefcount(res->buffer);
3635 elog(WARNING, "buffer refcount leak: %s", s);
3636 pfree(s);
3637
3638 RefCountErrors++;
3639 }
3640 }
3641
3642 /* if necessary search the hash */
3644 {
3645 HASH_SEQ_STATUS hstat;
3646
3648 while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
3649 {
3650 s = DebugPrintBufferRefcount(res->buffer);
3651 elog(WARNING, "buffer refcount leak: %s", s);
3652 pfree(s);
3653 RefCountErrors++;
3654 }
3655 }
3656
3657 Assert(RefCountErrors == 0);
3658#endif
3659}
#define InvalidBuffer
Definition: buf.h:25
char * DebugPrintBufferRefcount(Buffer buffer)
Definition: bufmgr.c:3665
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:96
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:208
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:209
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385

References Assert, DebugPrintBufferRefcount(), elog, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, pfree(), PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, and WARNING.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 3710 of file bufmgr.c.

3711{
3712 BufferSync(flags);
3713}
static void BufferSync(int flags)
Definition: bufmgr.c:2912

References BufferSync().

Referenced by CheckPointGuts().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 5849 of file bufmgr.c.

5850{
5851 /* compare tablespace */
5852 if (a->tsId < b->tsId)
5853 return -1;
5854 else if (a->tsId > b->tsId)
5855 return 1;
5856 /* compare relation */
5857 if (a->relNumber < b->relNumber)
5858 return -1;
5859 else if (a->relNumber > b->relNumber)
5860 return 1;
5861 /* compare fork */
5862 else if (a->forkNum < b->forkNum)
5863 return -1;
5864 else if (a->forkNum > b->forkNum)
5865 return 1;
5866 /* compare block number */
5867 else if (a->blockNum < b->blockNum)
5868 return -1;
5869 else if (a->blockNum > b->blockNum)
5870 return 1;
5871 /* equal page IDs are unlikely, but not impossible */
5872 return 0;
5873}
int b
Definition: isn.c:69
int a
Definition: isn.c:68

References a, and b.

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 5399 of file bufmgr.c.

5400{
5401 BufferDesc *bufHdr;
5402 uint32 buf_state,
5403 refcount;
5404
5405 Assert(BufferIsValid(buffer));
5406
5407 if (BufferIsLocal(buffer))
5408 {
5409 refcount = LocalRefCount[-buffer - 1];
5410 /* There should be exactly one pin */
5411 Assert(refcount > 0);
5412 if (refcount != 1)
5413 return false;
5414 /* Nobody else to wait for */
5415 return true;
5416 }
5417
5418 /* There should be exactly one local pin */
5419 refcount = GetPrivateRefCount(buffer);
5420 Assert(refcount);
5421 if (refcount != 1)
5422 return false;
5423
5424 /* Try to acquire lock */
5425 if (!ConditionalLockBuffer(buffer))
5426 return false;
5427
5428 bufHdr = GetBufferDescriptor(buffer - 1);
5429 buf_state = LockBufHdr(bufHdr);
5430 refcount = BUF_STATE_GET_REFCOUNT(buf_state);
5431
5432 Assert(refcount > 0);
5433 if (refcount == 1)
5434 {
5435 /* Successfully acquired exclusive lock with pincount 1 */
5436 UnlockBufHdr(bufHdr, buf_state);
5437 return true;
5438 }
5439
5440 /* Failed, so release the lock */
5441 UnlockBufHdr(bufHdr, buf_state);
5443 return false;
5444}
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5184
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 4798 of file bufmgr.c.

4800{
4801 char relpersistence;
4802 SMgrRelation src_rel;
4803 SMgrRelation dst_rel;
4804
4805 /* Set the relpersistence. */
4806 relpersistence = permanent ?
4807 RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
4808
4809 src_rel = smgropen(src_rlocator, INVALID_PROC_NUMBER);
4810 dst_rel = smgropen(dst_rlocator, INVALID_PROC_NUMBER);
4811
4812 /*
4813 * Create and copy all forks of the relation. During create database we
4814 * have a separate cleanup mechanism which deletes complete database
4815 * directory. Therefore, each individual relation doesn't need to be
4816 * registered for cleanup.
4817 */
4818 RelationCreateStorage(dst_rlocator, relpersistence, false);
4819
4820 /* copy main fork. */
4821 RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM,
4822 permanent);
4823
4824 /* copy those extra forks that exist */
4825 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
4826 forkNum <= MAX_FORKNUM; forkNum++)
4827 {
4828 if (smgrexists(src_rel, forkNum))
4829 {
4830 smgrcreate(dst_rel, forkNum, false);
4831
4832 /*
4833 * WAL log creation if the relation is persistent, or this is the
4834 * init fork of an unlogged relation.
4835 */
4836 if (permanent || forkNum == INIT_FORKNUM)
4837 log_smgrcreate(&dst_rlocator, forkNum);
4838
4839 /* Copy a fork's data, block by block. */
4840 RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
4841 permanent);
4842 }
4843 }
4844}
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition: bufmgr.c:4690
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
ForkNumber
Definition: relpath.h:56
@ MAIN_FORKNUM
Definition: relpath.h:58
#define MAX_FORKNUM
Definition: relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:201
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:414
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:401
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:121
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:186

References INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char * DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 3665 of file bufmgr.c.

3666{
3667 BufferDesc *buf;
3668 int32 loccount;
3669 char *path;
3670 char *result;
3671 ProcNumber backend;
3672 uint32 buf_state;
3673
3674 Assert(BufferIsValid(buffer));
3675 if (BufferIsLocal(buffer))
3676 {
3677 buf = GetLocalBufferDescriptor(-buffer - 1);
3678 loccount = LocalRefCount[-buffer - 1];
3679 backend = MyProcNumber;
3680 }
3681 else
3682 {
3683 buf = GetBufferDescriptor(buffer - 1);
3684 loccount = GetPrivateRefCount(buffer);
3685 backend = INVALID_PROC_NUMBER;
3686 }
3687
3688 /* theoretically we should lock the bufhdr here */
3689 path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend,
3690 BufTagGetForkNum(&buf->tag));
3691 buf_state = pg_atomic_read_u32(&buf->state);
3692
3693 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
3694 buffer, path,
3695 buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
3696 BUF_STATE_GET_REFCOUNT(buf_state), loccount);
3697 pfree(path);
3698 return result;
3699}
#define BUF_FLAG_MASK
Definition: buf_internals.h:47
ProcNumber MyProcNumber
Definition: globals.c:89
int ProcNumber
Definition: procnumber.h:24
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:93

References Assert, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pfree(), pg_atomic_read_u32(), psprintf(), and relpathbackend.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBufferPin().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 4386 of file bufmgr.c.

4387{
4388 int i;
4389
4390 /*
4391 * We needn't consider local buffers, since by assumption the target
4392 * database isn't our own.
4393 */
4394
4395 for (i = 0; i < NBuffers; i++)
4396 {
4397 BufferDesc *bufHdr = GetBufferDescriptor(i);
4398 uint32 buf_state;
4399
4400 /*
4401 * As in DropRelationBuffers, an unlocked precheck should be safe and
4402 * saves some cycles.
4403 */
4404 if (bufHdr->tag.dbOid != dbid)
4405 continue;
4406
4407 buf_state = LockBufHdr(bufHdr);
4408 if (bufHdr->tag.dbOid == dbid)
4409 InvalidateBuffer(bufHdr); /* releases spinlock */
4410 else
4411 UnlockBufHdr(bufHdr, buf_state);
4412 }
4413}
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1784
Oid dbOid
Definition: buf_internals.h:94

References buftag::dbOid, GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, BufferDesc::tag, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 4031 of file bufmgr.c.

4033{
4034 int i;
4035 int j;
4036 RelFileLocatorBackend rlocator;
4037 BlockNumber nForkBlock[MAX_FORKNUM];
4038 uint64 nBlocksToInvalidate = 0;
4039
4040 rlocator = smgr_reln->smgr_rlocator;
4041
4042 /* If it's a local relation, it's localbuf.c's problem. */
4043 if (RelFileLocatorBackendIsTemp(rlocator))
4044 {
4045 if (rlocator.backend == MyProcNumber)
4046 {
4047 for (j = 0; j < nforks; j++)
4048 DropRelationLocalBuffers(rlocator.locator, forkNum[j],
4049 firstDelBlock[j]);
4050 }
4051 return;
4052 }
4053
4054 /*
4055 * To remove all the pages of the specified relation forks from the buffer
4056 * pool, we need to scan the entire buffer pool but we can optimize it by
4057 * finding the buffers from BufMapping table provided we know the exact
4058 * size of each fork of the relation. The exact size is required to ensure
4059 * that we don't leave any buffer for the relation being dropped as
4060 * otherwise the background writer or checkpointer can lead to a PANIC
4061 * error while flushing buffers corresponding to files that don't exist.
4062 *
4063 * To know the exact size, we rely on the size cached for each fork by us
4064 * during recovery which limits the optimization to recovery and on
4065 * standbys but we can easily extend it once we have shared cache for
4066 * relation size.
4067 *
4068 * In recovery, we cache the value returned by the first lseek(SEEK_END)
4069 * and the future writes keeps the cached value up-to-date. See
4070 * smgrextend. It is possible that the value of the first lseek is smaller
4071 * than the actual number of existing blocks in the file due to buggy
4072 * Linux kernels that might not have accounted for the recent write. But
4073 * that should be fine because there must not be any buffers after that
4074 * file size.
4075 */
4076 for (i = 0; i < nforks; i++)
4077 {
4078 /* Get the number of blocks for a relation's fork */
4079 nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
4080
4081 if (nForkBlock[i] == InvalidBlockNumber)
4082 {
4083 nBlocksToInvalidate = InvalidBlockNumber;
4084 break;
4085 }
4086
4087 /* calculate the number of blocks to be invalidated */
4088 nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
4089 }
4090
4091 /*
4092 * We apply the optimization iff the total number of blocks to invalidate
4093 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4094 */
4095 if (BlockNumberIsValid(nBlocksToInvalidate) &&
4096 nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4097 {
4098 for (j = 0; j < nforks; j++)
4099 FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4100 nForkBlock[j], firstDelBlock[j]);
4101 return;
4102 }
4103
4104 for (i = 0; i < NBuffers; i++)
4105 {
4106 BufferDesc *bufHdr = GetBufferDescriptor(i);
4107 uint32 buf_state;
4108
4109 /*
4110 * We can make this a tad faster by prechecking the buffer tag before
4111 * we attempt to lock the buffer; this saves a lot of lock
4112 * acquisitions in typical cases. It should be safe because the
4113 * caller must have AccessExclusiveLock on the relation, or some other
4114 * reason to be certain that no one is loading new pages of the rel
4115 * into the buffer pool. (Otherwise we might well miss such pages
4116 * entirely.) Therefore, while the tag might be changing while we
4117 * look at it, it can't be changing *to* a value we care about, only
4118 * *away* from such a value. So false negatives are impossible, and
4119 * false positives are safe because we'll recheck after getting the
4120 * buffer lock.
4121 *
4122 * We could check forkNum and blockNum as well as the rlocator, but
4123 * the incremental win from doing so seems small.
4124 */
4125 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4126 continue;
4127
4128 buf_state = LockBufHdr(bufHdr);
4129
4130 for (j = 0; j < nforks; j++)
4131 {
4132 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4133 BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4134 bufHdr->tag.blockNum >= firstDelBlock[j])
4135 {
4136 InvalidateBuffer(bufHdr); /* releases spinlock */
4137 break;
4138 }
4139 }
4140 if (j >= nforks)
4141 UnlockBufHdr(bufHdr, buf_state);
4142 }
4143}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:87
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:4325
uint64_t uint64
Definition: c.h:489
int j
Definition: isn.c:73
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:490
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:701

References RelFileLocatorBackend::backend, buftag::blockNum, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, SMgrRelationData::smgr_rlocator, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4154 of file bufmgr.c.

4155{
4156 int i;
4157 int n = 0;
4158 SMgrRelation *rels;
4159 BlockNumber (*block)[MAX_FORKNUM + 1];
4160 uint64 nBlocksToInvalidate = 0;
4161 RelFileLocator *locators;
4162 bool cached = true;
4163 bool use_bsearch;
4164
4165 if (nlocators == 0)
4166 return;
4167
4168 rels = palloc(sizeof(SMgrRelation) * nlocators); /* non-local relations */
4169
4170 /* If it's a local relation, it's localbuf.c's problem. */
4171 for (i = 0; i < nlocators; i++)
4172 {
4173 if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4174 {
4175 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4176 DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4177 }
4178 else
4179 rels[n++] = smgr_reln[i];
4180 }
4181
4182 /*
4183 * If there are no non-local relations, then we're done. Release the
4184 * memory and return.
4185 */
4186 if (n == 0)
4187 {
4188 pfree(rels);
4189 return;
4190 }
4191
4192 /*
4193 * This is used to remember the number of blocks for all the relations
4194 * forks.
4195 */
4196 block = (BlockNumber (*)[MAX_FORKNUM + 1])
4197 palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4198
4199 /*
4200 * We can avoid scanning the entire buffer pool if we know the exact size
4201 * of each of the given relation forks. See DropRelationBuffers.
4202 */
4203 for (i = 0; i < n && cached; i++)
4204 {
4205 for (int j = 0; j <= MAX_FORKNUM; j++)
4206 {
4207 /* Get the number of blocks for a relation's fork. */
4208 block[i][j] = smgrnblocks_cached(rels[i], j);
4209
4210 /* We need to only consider the relation forks that exists. */
4211 if (block[i][j] == InvalidBlockNumber)
4212 {
4213 if (!smgrexists(rels[i], j))
4214 continue;
4215 cached = false;
4216 break;
4217 }
4218
4219 /* calculate the total number of blocks to be invalidated */
4220 nBlocksToInvalidate += block[i][j];
4221 }
4222 }
4223
4224 /*
4225 * We apply the optimization iff the total number of blocks to invalidate
4226 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4227 */
4228 if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4229 {
4230 for (i = 0; i < n; i++)
4231 {
4232 for (int j = 0; j <= MAX_FORKNUM; j++)
4233 {
4234 /* ignore relation forks that doesn't exist */
4235 if (!BlockNumberIsValid(block[i][j]))
4236 continue;
4237
4238 /* drop all the buffers for a particular relation fork */
4239 FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4240 j, block[i][j], 0);
4241 }
4242 }
4243
4244 pfree(block);
4245 pfree(rels);
4246 return;
4247 }
4248
4249 pfree(block);
4250 locators = palloc(sizeof(RelFileLocator) * n); /* non-local relations */
4251 for (i = 0; i < n; i++)
4252 locators[i] = rels[i]->smgr_rlocator.locator;
4253
4254 /*
4255 * For low number of relations to drop just use a simple walk through, to
4256 * save the bsearch overhead. The threshold to use is rather a guess than
4257 * an exactly determined value, as it depends on many factors (CPU and RAM
4258 * speeds, amount of shared buffers etc.).
4259 */
4260 use_bsearch = n > RELS_BSEARCH_THRESHOLD;
4261
4262 /* sort the list of rlocators if necessary */
4263 if (use_bsearch)
4264 qsort(locators, n, sizeof(RelFileLocator), rlocator_comparator);
4265
4266 for (i = 0; i < NBuffers; i++)
4267 {
4268 RelFileLocator *rlocator = NULL;
4269 BufferDesc *bufHdr = GetBufferDescriptor(i);
4270 uint32 buf_state;
4271
4272 /*
4273 * As in DropRelationBuffers, an unlocked precheck should be safe and
4274 * saves some cycles.
4275 */
4276
4277 if (!use_bsearch)
4278 {
4279 int j;
4280
4281 for (j = 0; j < n; j++)
4282 {
4283 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
4284 {
4285 rlocator = &locators[j];
4286 break;
4287 }
4288 }
4289 }
4290 else
4291 {
4292 RelFileLocator locator;
4293
4294 locator = BufTagGetRelFileLocator(&bufHdr->tag);
4295 rlocator = bsearch(&locator,
4296 locators, n, sizeof(RelFileLocator),
4298 }
4299
4300 /* buffer doesn't belong to any of the given relfilelocators; skip it */
4301 if (rlocator == NULL)
4302 continue;
4303
4304 buf_state = LockBufHdr(bufHdr);
4305 if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4306 InvalidateBuffer(bufHdr); /* releases spinlock */
4307 else
4308 UnlockBufHdr(bufHdr, buf_state);
4309 }
4310
4311 pfree(locators);
4312 pfree(rels);
4313}
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:79
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition: localbuf.c:538
#define qsort(a, b, c, d)
Definition: port.h:474

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, if(), InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf)

Definition at line 6101 of file bufmgr.c.

6102{
6103 BufferDesc *desc;
6104 uint32 buf_state;
6105 bool result;
6106
6107 /* Make sure we can pin the buffer. */
6110
6112 desc = GetBufferDescriptor(buf - 1);
6113
6114 /* Lock the header and check if it's valid. */
6115 buf_state = LockBufHdr(desc);
6116 if ((buf_state & BM_VALID) == 0)
6117 {
6118 UnlockBufHdr(desc, buf_state);
6119 return false;
6120 }
6121
6122 /* Check that it's not pinned already. */
6123 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
6124 {
6125 UnlockBufHdr(desc, buf_state);
6126 return false;
6127 }
6128
6129 PinBuffer_Locked(desc); /* releases spinlock */
6130
6131 /* If it was dirty, try to clean it once. */
6132 if (buf_state & BM_DIRTY)
6133 {
6137 }
6138
6139 /* This will return false if it becomes dirty or someone else pins it. */
6140 result = InvalidateVictimBuffer(desc);
6141
6142 UnpinBuffer(desc);
6143
6144 return result;
6145}
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:3784
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:2763
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
Definition: bufmgr.c:1882
@ IOOBJECT_RELATION
Definition: pgstat.h:275

References Assert, BM_DIRTY, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock(), BufferIsLocal, CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), InvalidateVictimBuffer(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), and UnpinBuffer().

Referenced by pg_buffercache_evict().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 846 of file bufmgr.c.

850{
851 Buffer buf;
852 uint32 extend_by = 1;
853
854 ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
855 &buf, &extend_by);
856
857 return buf;
858}
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:878

References buf, and ExtendBufferedRelBy().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 878 of file bufmgr.c.

885{
886 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
887 Assert(bmr.smgr == NULL || bmr.relpersistence != 0);
888 Assert(extend_by > 0);
889
890 if (bmr.smgr == NULL)
891 {
892 bmr.smgr = RelationGetSmgr(bmr.rel);
893 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
894 }
895
896 return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
897 extend_by, InvalidBlockNumber,
898 buffers, extended_by);
899}
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2147
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
struct SMgrRelationData * smgr
Definition: bufmgr.h:103
Form_pg_class rd_rel
Definition: rel.h:111

References Assert, ExtendBufferedRelCommon(), InvalidBlockNumber, RelationData::rd_rel, BufferManagerRelation::rel, RelationGetSmgr(), BufferManagerRelation::relpersistence, and BufferManagerRelation::smgr.

Referenced by ExtendBufferedRel(), and RelationAddBlocks().

◆ ExtendBufferedRelCommon()

static BlockNumber ExtendBufferedRelCommon ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2147 of file bufmgr.c.

2155{
2156 BlockNumber first_block;
2157
2158 TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
2163 extend_by);
2164
2165 if (bmr.relpersistence == RELPERSISTENCE_TEMP)
2166 first_block = ExtendBufferedRelLocal(bmr, fork, flags,
2167 extend_by, extend_upto,
2168 buffers, &extend_by);
2169 else
2170 first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags,
2171 extend_by, extend_upto,
2172 buffers, &extend_by);
2173 *extended_by = extend_by;
2174
2175 TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
2180 *extended_by,
2181 first_block);
2182
2183 return first_block;
2184}
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2191
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: localbuf.c:314
RelFileNumber relNumber

References RelFileLocatorBackend::backend, RelFileLocator::dbOid, ExtendBufferedRelLocal(), ExtendBufferedRelShared(), RelFileLocatorBackend::locator, RelFileLocator::relNumber, BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, SMgrRelationData::smgr_rlocator, and RelFileLocator::spcOid.

Referenced by ExtendBufferedRelBy(), and ExtendBufferedRelTo().

◆ ExtendBufferedRelShared()

static BlockNumber ExtendBufferedRelShared ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2191 of file bufmgr.c.

2199{
2200 BlockNumber first_block;
2201 IOContext io_context = IOContextForStrategy(strategy);
2202 instr_time io_start;
2203
2204 LimitAdditionalPins(&extend_by);
2205
2206 /*
2207 * Acquire victim buffers for extension without holding extension lock.
2208 * Writing out victim buffers is the most expensive part of extending the
2209 * relation, particularly when doing so requires WAL flushes. Zeroing out
2210 * the buffers is also quite expensive, so do that before holding the
2211 * extension lock as well.
2212 *
2213 * These pages are pinned by us and not valid. While we hold the pin they
2214 * can't be acquired as victim buffers by another backend.
2215 */
2216 for (uint32 i = 0; i < extend_by; i++)
2217 {
2218 Block buf_block;
2219
2220 buffers[i] = GetVictimBuffer(strategy, io_context);
2221 buf_block = BufHdrGetBlock(GetBufferDescriptor(buffers[i] - 1));
2222
2223 /* new buffers are zero-filled */
2224 MemSet((char *) buf_block, 0, BLCKSZ);
2225 }
2226
2227 /*
2228 * Lock relation against concurrent extensions, unless requested not to.
2229 *
2230 * We use the same extension lock for all forks. That's unnecessarily
2231 * restrictive, but currently extensions for forks don't happen often
2232 * enough to make it worth locking more granularly.
2233 *
2234 * Note that another backend might have extended the relation by the time
2235 * we get the lock.
2236 */
2237 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2239
2240 /*
2241 * If requested, invalidate size cache, so that smgrnblocks asks the
2242 * kernel.
2243 */
2244 if (flags & EB_CLEAR_SIZE_CACHE)
2246
2247 first_block = smgrnblocks(bmr.smgr, fork);
2248
2249 /*
2250 * Now that we have the accurate relation size, check if the caller wants
2251 * us to extend to only up to a specific size. If there were concurrent
2252 * extensions, we might have acquired too many buffers and need to release
2253 * them.
2254 */
2255 if (extend_upto != InvalidBlockNumber)
2256 {
2257 uint32 orig_extend_by = extend_by;
2258
2259 if (first_block > extend_upto)
2260 extend_by = 0;
2261 else if ((uint64) first_block + extend_by > extend_upto)
2262 extend_by = extend_upto - first_block;
2263
2264 for (uint32 i = extend_by; i < orig_extend_by; i++)
2265 {
2266 BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
2267
2268 /*
2269 * The victim buffer we acquired previously is clean and unused,
2270 * let it be found again quickly
2271 */
2272 StrategyFreeBuffer(buf_hdr);
2273 UnpinBuffer(buf_hdr);
2274 }
2275
2276 if (extend_by == 0)
2277 {
2278 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2280 *extended_by = extend_by;
2281 return first_block;
2282 }
2283 }
2284
2285 /* Fail if relation is already at maximum possible length */
2286 if ((uint64) first_block + extend_by >= MaxBlockNumber)
2287 ereport(ERROR,
2288 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2289 errmsg("cannot extend relation %s beyond %u blocks",
2290 relpath(bmr.smgr->smgr_rlocator, fork),
2291 MaxBlockNumber)));
2292
2293 /*
2294 * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
2295 *
2296 * This needs to happen before we extend the relation, because as soon as
2297 * we do, other backends can start to read in those pages.
2298 */
2299 for (uint32 i = 0; i < extend_by; i++)
2300 {
2301 Buffer victim_buf = buffers[i];
2302 BufferDesc *victim_buf_hdr = GetBufferDescriptor(victim_buf - 1);
2303 BufferTag tag;
2304 uint32 hash;
2305 LWLock *partition_lock;
2306 int existing_id;
2307
2308 /* in case we need to pin an existing buffer below */
2311
2312 InitBufferTag(&tag, &bmr.smgr->smgr_rlocator.locator, fork, first_block + i);
2313 hash = BufTableHashCode(&tag);
2314 partition_lock = BufMappingPartitionLock(hash);
2315
2316 LWLockAcquire(partition_lock, LW_EXCLUSIVE);
2317
2318 existing_id = BufTableInsert(&tag, hash, victim_buf_hdr->buf_id);
2319
2320 /*
2321 * We get here only in the corner case where we are trying to extend
2322 * the relation but we found a pre-existing buffer. This can happen
2323 * because a prior attempt at extending the relation failed, and
2324 * because mdread doesn't complain about reads beyond EOF (when
2325 * zero_damaged_pages is ON) and so a previous attempt to read a block
2326 * beyond EOF could have left a "valid" zero-filled buffer.
2327 * Unfortunately, we have also seen this case occurring because of
2328 * buggy Linux kernels that sometimes return an lseek(SEEK_END) result
2329 * that doesn't account for a recent write. In that situation, the
2330 * pre-existing buffer would contain valid data that we don't want to
2331 * overwrite. Since the legitimate cases should always have left a
2332 * zero-filled buffer, complain if not PageIsNew.
2333 */
2334 if (existing_id >= 0)
2335 {
2336 BufferDesc *existing_hdr = GetBufferDescriptor(existing_id);
2337 Block buf_block;
2338 bool valid;
2339
2340 /*
2341 * Pin the existing buffer before releasing the partition lock,
2342 * preventing it from being evicted.
2343 */
2344 valid = PinBuffer(existing_hdr, strategy);
2345
2346 LWLockRelease(partition_lock);
2347
2348 /*
2349 * The victim buffer we acquired previously is clean and unused,
2350 * let it be found again quickly
2351 */
2352 StrategyFreeBuffer(victim_buf_hdr);
2353 UnpinBuffer(victim_buf_hdr);
2354
2355 buffers[i] = BufferDescriptorGetBuffer(existing_hdr);
2356 buf_block = BufHdrGetBlock(existing_hdr);
2357
2358 if (valid && !PageIsNew((Page) buf_block))
2359 ereport(ERROR,
2360 (errmsg("unexpected data beyond EOF in block %u of relation %s",
2361 existing_hdr->tag.blockNum, relpath(bmr.smgr->smgr_rlocator, fork)),
2362 errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
2363
2364 /*
2365 * We *must* do smgr[zero]extend before succeeding, else the page
2366 * will not be reserved by the kernel, and the next P_NEW call
2367 * will decide to return the same page. Clear the BM_VALID bit,
2368 * do StartBufferIO() and proceed.
2369 *
2370 * Loop to handle the very small possibility that someone re-sets
2371 * BM_VALID between our clearing it and StartBufferIO inspecting
2372 * it.
2373 */
2374 do
2375 {
2376 uint32 buf_state = LockBufHdr(existing_hdr);
2377
2378 buf_state &= ~BM_VALID;
2379 UnlockBufHdr(existing_hdr, buf_state);
2380 } while (!StartBufferIO(existing_hdr, true, false));
2381 }
2382 else
2383 {
2384 uint32 buf_state;
2385
2386 buf_state = LockBufHdr(victim_buf_hdr);
2387
2388 /* some sanity checks while we hold the buffer header lock */
2389 Assert(!(buf_state & (BM_VALID | BM_TAG_VALID | BM_DIRTY | BM_JUST_DIRTIED)));
2390 Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2391
2392 victim_buf_hdr->tag = tag;
2393
2394 buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
2395 if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
2396 buf_state |= BM_PERMANENT;
2397
2398 UnlockBufHdr(victim_buf_hdr, buf_state);
2399
2400 LWLockRelease(partition_lock);
2401
2402 /* XXX: could combine the locked operations in it with the above */
2403 StartBufferIO(victim_buf_hdr, true, false);
2404 }
2405 }
2406
2408
2409 /*
2410 * Note: if smgrzeroextend fails, we will end up with buffers that are
2411 * allocated but not marked BM_VALID. The next relation extension will
2412 * still select the same block number (because the relation didn't get any
2413 * longer on disk) and so future attempts to extend the relation will find
2414 * the same buffers (if they have not been recycled) but come right back
2415 * here to try smgrzeroextend again.
2416 *
2417 * We don't need to set checksum for all-zero pages.
2418 */
2419 smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);
2420
2421 /*
2422 * Release the file-extension lock; it's now OK for someone else to extend
2423 * the relation some more.
2424 *
2425 * We remove IO_IN_PROGRESS after this, as waking up waiting backends can
2426 * take noticeable time.
2427 */
2428 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2430
2432 io_start, 1, extend_by * BLCKSZ);
2433
2434 /* Set BM_VALID, terminate IO, and wake up any waiters */
2435 for (uint32 i = 0; i < extend_by; i++)
2436 {
2437 Buffer buf = buffers[i];
2438 BufferDesc *buf_hdr = GetBufferDescriptor(buf - 1);
2439 bool lock = false;
2440
2441 if (flags & EB_LOCK_FIRST && i == 0)
2442 lock = true;
2443 else if (flags & EB_LOCK_TARGET)
2444 {
2445 Assert(extend_upto != InvalidBlockNumber);
2446 if (first_block + i + 1 == extend_upto)
2447 lock = true;
2448 }
2449
2450 if (lock)
2452
2453 TerminateBufferIO(buf_hdr, false, BM_VALID, true);
2454 }
2455
2457
2458 *extended_by = extend_by;
2459
2460 return first_block;
2461}
#define MaxBlockNumber
Definition: block.h:35
#define BM_JUST_DIRTIED
Definition: buf_internals.h:65
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
bool track_io_timing
Definition: bufmgr.c:143
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:68
void LimitAdditionalPins(uint32 *additional_pins)
Definition: bufmgr.c:2116
static bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
Definition: bufmgr.c:5558
void * Block
Definition: bufmgr.h:25
@ EB_LOCK_TARGET
Definition: bufmgr.h:92
@ EB_CLEAR_SIZE_CACHE
Definition: bufmgr.h:89
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:74
@ EB_LOCK_FIRST
Definition: bufmgr.h:86
Pointer Page
Definition: bufpage.h:81
static bool PageIsNew(Page page)
Definition: bufpage.h:233
#define MemSet(start, val, len)
Definition: c.h:977
int errhint(const char *fmt,...)
Definition: elog.c:1317
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:758
BufferUsage pgBufferUsage
Definition: instrument.c:20
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:419
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:469
#define ExclusiveLock
Definition: lockdefs.h:42
IOContext
Definition: pgstat.h:282
@ IOOP_EXTEND
Definition: pgstat.h:310
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:96
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:118
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
#define relpath(rlocator, forknum)
Definition: relpath.h:102
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:677
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:563
int64 shared_blks_written
Definition: instrument.h:29
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46

References Assert, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufHdrGetBlock, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), CurrentResourceOwner, EB_CLEAR_SIZE_CACHE, EB_LOCK_FIRST, EB_LOCK_TARGET, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), errhint(), errmsg(), ERROR, ExclusiveLock, GetBufferDescriptor(), GetVictimBuffer(), hash(), i, INIT_FORKNUM, InitBufferTag(), InvalidBlockNumber, IOContextForStrategy(), IOOBJECT_RELATION, IOOP_EXTEND, LimitAdditionalPins(), RelFileLocatorBackend::locator, LockBufHdr(), LockRelationForExtension(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MaxBlockNumber, MemSet, PageIsNew(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer(), BufferManagerRelation::rel, relpath, BufferManagerRelation::relpersistence, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_written, BufferManagerRelation::smgr, SMgrRelationData::smgr_cached_nblocks, SMgrRelationData::smgr_rlocator, smgrnblocks(), smgrzeroextend(), StartBufferIO(), StrategyFreeBuffer(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr(), UnlockRelationForExtension(), and UnpinBuffer().

Referenced by ExtendBufferedRelCommon().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 910 of file bufmgr.c.

916{
918 uint32 extended_by = 0;
919 Buffer buffer = InvalidBuffer;
920 Buffer buffers[64];
921
922 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
923 Assert(bmr.smgr == NULL || bmr.relpersistence != 0);
924 Assert(extend_to != InvalidBlockNumber && extend_to > 0);
925
926 if (bmr.smgr == NULL)
927 {
928 bmr.smgr = RelationGetSmgr(bmr.rel);
929 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
930 }
931
932 /*
933 * If desired, create the file if it doesn't exist. If
934 * smgr_cached_nblocks[fork] is positive then it must exist, no need for
935 * an smgrexists call.
936 */
937 if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
938 (bmr.smgr->smgr_cached_nblocks[fork] == 0 ||
940 !smgrexists(bmr.smgr, fork))
941 {
943
944 /* recheck, fork might have been created concurrently */
945 if (!smgrexists(bmr.smgr, fork))
946 smgrcreate(bmr.smgr, fork, flags & EB_PERFORMING_RECOVERY);
947
949 }
950
951 /*
952 * If requested, invalidate size cache, so that smgrnblocks asks the
953 * kernel.
954 */
955 if (flags & EB_CLEAR_SIZE_CACHE)
957
958 /*
959 * Estimate how many pages we'll need to extend by. This avoids acquiring
960 * unnecessarily many victim buffers.
961 */
962 current_size = smgrnblocks(bmr.smgr, fork);
963
964 /*
965 * Since no-one else can be looking at the page contents yet, there is no
966 * difference between an exclusive lock and a cleanup-strength lock. Note
967 * that we pass the original mode to ReadBuffer_common() below, when
968 * falling back to reading the buffer to a concurrent relation extension.
969 */
971 flags |= EB_LOCK_TARGET;
972
973 while (current_size < extend_to)
974 {
975 uint32 num_pages = lengthof(buffers);
976 BlockNumber first_block;
977
978 if ((uint64) current_size + num_pages > extend_to)
979 num_pages = extend_to - current_size;
980
981 first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
982 num_pages, extend_to,
983 buffers, &extended_by);
984
985 current_size = first_block + extended_by;
986 Assert(num_pages != 0 || current_size >= extend_to);
987
988 for (uint32 i = 0; i < extended_by; i++)
989 {
990 if (first_block + i != extend_to - 1)
991 ReleaseBuffer(buffers[i]);
992 else
993 buffer = buffers[i];
994 }
995 }
996
997 /*
998 * It's possible that another backend concurrently extended the relation.
999 * In that case read the buffer.
1000 *
1001 * XXX: Should we control this via a flag?
1002 */
1003 if (buffer == InvalidBuffer)
1004 {
1005 Assert(extended_by == 0);
1006 buffer = ReadBuffer_common(bmr.rel, bmr.smgr, bmr.relpersistence,
1007 fork, extend_to - 1, mode, strategy);
1008 }
1009
1010 return buffer;
1011}
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:1189
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
@ EB_PERFORMING_RECOVERY
Definition: bufmgr.h:77
@ EB_CREATE_FORK_IF_NEEDED
Definition: bufmgr.h:83
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:48
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:46
#define lengthof(array)
Definition: c.h:745
static PgChecksumMode mode
Definition: pg_checksums.c:55
static int64 current_size
Definition: pg_checksums.c:63

References Assert, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RelationData::rd_rel, ReadBuffer_common(), BufferManagerRelation::rel, RelationGetSmgr(), ReleaseBuffer(), BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, SMgrRelationData::smgr_cached_nblocks, smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 4325 of file bufmgr.c.

4328{
4329 BlockNumber curBlock;
4330
4331 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4332 {
4333 uint32 bufHash; /* hash value for tag */
4334 BufferTag bufTag; /* identity of requested block */
4335 LWLock *bufPartitionLock; /* buffer partition lock for it */
4336 int buf_id;
4337 BufferDesc *bufHdr;
4338 uint32 buf_state;
4339
4340 /* create a tag so we can lookup the buffer */
4341 InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4342
4343 /* determine its hash code and partition lock ID */
4344 bufHash = BufTableHashCode(&bufTag);
4345 bufPartitionLock = BufMappingPartitionLock(bufHash);
4346
4347 /* Check that it is in the buffer pool. If not, do nothing. */
4348 LWLockAcquire(bufPartitionLock, LW_SHARED);
4349 buf_id = BufTableLookup(&bufTag, bufHash);
4350 LWLockRelease(bufPartitionLock);
4351
4352 if (buf_id < 0)
4353 continue;
4354
4355 bufHdr = GetBufferDescriptor(buf_id);
4356
4357 /*
4358 * We need to lock the buffer header and recheck if the buffer is
4359 * still associated with the same block because the buffer could be
4360 * evicted by some other backend loading blocks for a different
4361 * relation after we release lock on the BufMapping table.
4362 */
4363 buf_state = LockBufHdr(bufHdr);
4364
4365 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
4366 BufTagGetForkNum(&bufHdr->tag) == forkNum &&
4367 bufHdr->tag.blockNum >= firstDelBlock)
4368 InvalidateBuffer(bufHdr); /* releases spinlock */
4369 else
4370 UnlockBufHdr(bufHdr, buf_state);
4371 }
4372}

References buftag::blockNum, BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), BufferDesc::tag, and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 3784 of file bufmgr.c.

3786{
3787 XLogRecPtr recptr;
3788 ErrorContextCallback errcallback;
3789 instr_time io_start;
3790 Block bufBlock;
3791 char *bufToWrite;
3792 uint32 buf_state;
3793
3794 /*
3795 * Try to start an I/O operation. If StartBufferIO returns false, then
3796 * someone else flushed the buffer before we could, so we need not do
3797 * anything.
3798 */
3799 if (!StartBufferIO(buf, false, false))
3800 return;
3801
3802 /* Setup error traceback support for ereport() */
3804 errcallback.arg = buf;
3805 errcallback.previous = error_context_stack;
3806 error_context_stack = &errcallback;
3807
3808 /* Find smgr relation for buffer */
3809 if (reln == NULL)
3811
3812 TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag),
3813 buf->tag.blockNum,
3817
3818 buf_state = LockBufHdr(buf);
3819
3820 /*
3821 * Run PageGetLSN while holding header lock, since we don't have the
3822 * buffer locked exclusively in all cases.
3823 */
3824 recptr = BufferGetLSN(buf);
3825
3826 /* To check if block content changes while flushing. - vadim 01/17/97 */
3827 buf_state &= ~BM_JUST_DIRTIED;
3828 UnlockBufHdr(buf, buf_state);
3829
3830 /*
3831 * Force XLOG flush up to buffer's LSN. This implements the basic WAL
3832 * rule that log updates must hit disk before any of the data-file changes
3833 * they describe do.
3834 *
3835 * However, this rule does not apply to unlogged relations, which will be
3836 * lost after a crash anyway. Most unlogged relation pages do not bear
3837 * LSNs since we never emit WAL records for them, and therefore flushing
3838 * up through the buffer LSN would be useless, but harmless. However,
3839 * GiST indexes use LSNs internally to track page-splits, and therefore
3840 * unlogged GiST pages bear "fake" LSNs generated by
3841 * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
3842 * LSN counter could advance past the WAL insertion point; and if it did
3843 * happen, attempting to flush WAL through that location would fail, with
3844 * disastrous system-wide consequences. To make sure that can't happen,
3845 * skip the flush if the buffer isn't permanent.
3846 */
3847 if (buf_state & BM_PERMANENT)
3848 XLogFlush(recptr);
3849
3850 /*
3851 * Now it's safe to write buffer to disk. Note that no one else should
3852 * have been able to write it while we were busy with log flushing because
3853 * only one process at a time can set the BM_IO_IN_PROGRESS bit.
3854 */
3855 bufBlock = BufHdrGetBlock(buf);
3856
3857 /*
3858 * Update page checksum if desired. Since we have only shared lock on the
3859 * buffer, other processes might be updating hint bits in it, so we must
3860 * copy the page to private storage if we do checksumming.
3861 */
3862 bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
3863
3865
3866 /*
3867 * bufToWrite is either the shared buffer or a copy, as appropriate.
3868 */
3869 smgrwrite(reln,
3870 BufTagGetForkNum(&buf->tag),
3871 buf->tag.blockNum,
3872 bufToWrite,
3873 false);
3874
3875 /*
3876 * When a strategy is in use, only flushes of dirty buffers already in the
3877 * strategy ring are counted as strategy writes (IOCONTEXT
3878 * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
3879 * statistics tracking.
3880 *
3881 * If a shared buffer initially added to the ring must be flushed before
3882 * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
3883 *
3884 * If a shared buffer which was added to the ring later because the
3885 * current strategy buffer is pinned or in use or because all strategy
3886 * buffers were dirty and rejected (for BAS_BULKREAD operations only)
3887 * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
3888 * (from_ring will be false).
3889 *
3890 * When a strategy is not in use, the write can only be a "regular" write
3891 * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
3892 */
3894 IOOP_WRITE, io_start, 1, BLCKSZ);
3895
3897
3898 /*
3899 * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
3900 * end the BM_IO_IN_PROGRESS state.
3901 */
3902 TerminateBufferIO(buf, true, 0, true);
3903
3904 TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(BufTagGetForkNum(&buf->tag),
3905 buf->tag.blockNum,
3909
3910 /* Pop the error context stack */
3911 error_context_stack = errcallback.previous;
3912}
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:69
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:5694
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1499
ErrorContextCallback * error_context_stack
Definition: elog.c:94
@ IOOP_WRITE
Definition: pgstat.h:312
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.h:124
struct ErrorContextCallback * previous
Definition: elog.h:296
void(* callback)(void *arg)
Definition: elog.h:297
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2802

References ErrorContextCallback::arg, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, RelFileLocator::dbOid, error_context_stack, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITE, RelFileLocatorBackend::locator, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), ErrorContextCallback::previous, RelFileLocator::relNumber, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rlocator, smgropen(), smgrwrite(), RelFileLocator::spcOid, StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdr(), and XLogFlush().

Referenced by EvictUnpinnedBuffer(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), and SyncOneBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 4862 of file bufmgr.c.

4863{
4864 int i;
4865 BufferDesc *bufHdr;
4866
4867 for (i = 0; i < NBuffers; i++)
4868 {
4869 uint32 buf_state;
4870
4871 bufHdr = GetBufferDescriptor(i);
4872
4873 /*
4874 * As in DropRelationBuffers, an unlocked precheck should be safe and
4875 * saves some cycles.
4876 */
4877 if (bufHdr->tag.dbOid != dbid)
4878 continue;
4879
4880 /* Make sure we can handle the pin */
4883
4884 buf_state = LockBufHdr(bufHdr);
4885 if (bufHdr->tag.dbOid == dbid &&
4886 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4887 {
4888 PinBuffer_Locked(bufHdr);
4892 UnpinBuffer(bufHdr);
4893 }
4894 else
4895 UnlockBufHdr(bufHdr, buf_state);
4896 }
4897}

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), CurrentResourceOwner, buftag::dbOid, FlushBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 4904 of file bufmgr.c.

4905{
4906 BufferDesc *bufHdr;
4907
4908 /* currently not needed, but no fundamental reason not to support */
4909 Assert(!BufferIsLocal(buffer));
4910
4911 Assert(BufferIsPinned(buffer));
4912
4913 bufHdr = GetBufferDescriptor(buffer - 1);
4914
4916
4918}
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1893

References Assert, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 4492 of file bufmgr.c.

4493{
4494 int i;
4495 BufferDesc *bufHdr;
4496 SMgrRelation srel = RelationGetSmgr(rel);
4497
4498 if (RelationUsesLocalBuffers(rel))
4499 {
4500 for (i = 0; i < NLocBuffer; i++)
4501 {
4502 uint32 buf_state;
4503 instr_time io_start;
4504
4505 bufHdr = GetLocalBufferDescriptor(i);
4506 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4507 ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
4508 (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4509 {
4510 ErrorContextCallback errcallback;
4511 Page localpage;
4512
4513 localpage = (char *) LocalBufHdrGetBlock(bufHdr);
4514
4515 /* Setup error traceback support for ereport() */
4517 errcallback.arg = bufHdr;
4518 errcallback.previous = error_context_stack;
4519 error_context_stack = &errcallback;
4520
4521 PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
4522
4524
4525 smgrwrite(srel,
4526 BufTagGetForkNum(&bufHdr->tag),
4527 bufHdr->tag.blockNum,
4528 localpage,
4529 false);
4530
4533 io_start, 1, BLCKSZ);
4534
4535 buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
4536 pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
4537
4539
4540 /* Pop the error context stack */
4541 error_context_stack = errcallback.previous;
4542 }
4543 }
4544
4545 return;
4546 }
4547
4548 for (i = 0; i < NBuffers; i++)
4549 {
4550 uint32 buf_state;
4551
4552 bufHdr = GetBufferDescriptor(i);
4553
4554 /*
4555 * As in DropRelationBuffers, an unlocked precheck should be safe and
4556 * saves some cycles.
4557 */
4558 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator))
4559 continue;
4560
4561 /* Make sure we can handle the pin */
4564
4565 buf_state = LockBufHdr(bufHdr);
4566 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4567 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4568 {
4569 PinBuffer_Locked(bufHdr);
4573 UnpinBuffer(bufHdr);
4574 }
4575 else
4576 UnlockBufHdr(bufHdr, buf_state);
4577 }
4578}
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:295
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:72
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:5714
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1531
int NLocBuffer
Definition: localbuf.c:42
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:276
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:637
int64 local_blks_written
Definition: instrument.h:33
RelFileLocator rd_locator
Definition: rel.h:57

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_WRITE, BufferUsage::local_blks_written, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), smgrwrite(), BufferDesc::state, BufferDesc::tag, track_io_timing, UnlockBufHdr(), and UnpinBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 4590 of file bufmgr.c.

4591{
4592 int i;
4593 SMgrSortArray *srels;
4594 bool use_bsearch;
4595
4596 if (nrels == 0)
4597 return;
4598
4599 /* fill-in array for qsort */
4600 srels = palloc(sizeof(SMgrSortArray) * nrels);
4601
4602 for (i = 0; i < nrels; i++)
4603 {
4604 Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
4605
4606 srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
4607 srels[i].srel = smgrs[i];
4608 }
4609
4610 /*
4611 * Save the bsearch overhead for low number of relations to sync. See
4612 * DropRelationsAllBuffers for details.
4613 */
4614 use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
4615
4616 /* sort the list of SMgrRelations if necessary */
4617 if (use_bsearch)
4618 qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
4619
4620 for (i = 0; i < NBuffers; i++)
4621 {
4622 SMgrSortArray *srelent = NULL;
4623 BufferDesc *bufHdr = GetBufferDescriptor(i);
4624 uint32 buf_state;
4625
4626 /*
4627 * As in DropRelationBuffers, an unlocked precheck should be safe and
4628 * saves some cycles.
4629 */
4630
4631 if (!use_bsearch)
4632 {
4633 int j;
4634
4635 for (j = 0; j < nrels; j++)
4636 {
4637 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
4638 {
4639 srelent = &srels[j];
4640 break;
4641 }
4642 }
4643 }
4644 else
4645 {
4646 RelFileLocator rlocator;
4647
4648 rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4649 srelent = bsearch(&rlocator,
4650 srels, nrels, sizeof(SMgrSortArray),
4652 }
4653
4654 /* buffer doesn't belong to any of the given relfilelocators; skip it */
4655 if (srelent == NULL)
4656 continue;
4657
4658 /* Make sure we can handle the pin */
4661
4662 buf_state = LockBufHdr(bufHdr);
4663 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
4664 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4665 {
4666 PinBuffer_Locked(bufHdr);
4670 UnpinBuffer(bufHdr);
4671 }
4672 else
4673 UnlockBufHdr(bufHdr, buf_state);
4674 }
4675
4676 pfree(srels);
4677}
SMgrRelation srel
Definition: bufmgr.c:136
RelFileLocator rlocator
Definition: bufmgr.c:135

References Assert, BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, RelFileLocatorBackend::locator, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, palloc(), pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrSortArray::rlocator, rlocator_comparator(), SMgrRelationData::smgr_rlocator, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 439 of file bufmgr.c.

440{
441 Assert(ref->refcount == 0);
442
443 if (ref >= &PrivateRefCountArray[0] &&
445 {
446 ref->buffer = InvalidBuffer;
447
448 /*
449 * Mark the just used entry as reserved - in many scenarios that
450 * allows us to avoid ever having to search the array/hash for free
451 * entries.
452 */
454 }
455 else
456 {
457 bool found;
458 Buffer buffer = ref->buffer;
459
461 Assert(found);
464 }
465}
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:212
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_REMOVE
Definition: hsearch.h:115

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountEntry.

Referenced by UnpinBufferNoOwner().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 416 of file bufmgr.c.

417{
419
420 Assert(BufferIsValid(buffer));
421 Assert(!BufferIsLocal(buffer));
422
423 /*
424 * Not moving the entry - that's ok for the current users, but we might
425 * want to change this one day.
426 */
427 ref = GetPrivateRefCountEntry(buffer, false);
428
429 if (ref == NULL)
430 return 0;
431 return ref->refcount;
432}
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:342

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), MarkBufferDirtyHint(), and ReadRecentBuffer().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 342 of file bufmgr.c.

343{
345 int i;
346
347 Assert(BufferIsValid(buffer));
348 Assert(!BufferIsLocal(buffer));
349
350 /*
351 * First search for references in the array, that'll be sufficient in the
352 * majority of cases.
353 */
354 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
355 {
357
358 if (res->buffer == buffer)
359 return res;
360 }
361
362 /*
363 * By here we know that the buffer, if already pinned, isn't residing in
364 * the array.
365 *
366 * Only look up the buffer in the hashtable if we've previously overflowed
367 * into it.
368 */
370 return NULL;
371
372 res = hash_search(PrivateRefCountHash, &buffer, HASH_FIND, NULL);
373
374 if (res == NULL)
375 return NULL;
376 else if (!do_move)
377 {
378 /* caller doesn't want us to move the hash entry into the array */
379 return res;
380 }
381 else
382 {
383 /* move buffer from hashtable into the free array slot */
384 bool found;
386
387 /* Ensure there's a free array slot */
389
390 /* Use up the reserved slot */
394 Assert(free->buffer == InvalidBuffer);
395
396 /* and fill it */
397 free->buffer = buffer;
398 free->refcount = res->refcount;
399
400 /* delete from hashtable */
402 Assert(found);
405
406 return free;
407 }
408}
#define free(a)
Definition: header.h:65
@ HASH_FIND
Definition: hsearch.h:113

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBufferNoOwner().

◆ GetVictimBuffer()

static Buffer GetVictimBuffer ( BufferAccessStrategy  strategy,
IOContext  io_context 
)
static

Definition at line 1950 of file bufmgr.c.

1951{
1952 BufferDesc *buf_hdr;
1953 Buffer buf;
1954 uint32 buf_state;
1955 bool from_ring;
1956
1957 /*
1958 * Ensure, while the spinlock's not yet held, that there's a free refcount
1959 * entry, and a resource owner slot for the pin.
1960 */
1963
1964 /* we return here if a prospective victim buffer gets used concurrently */
1965again:
1966
1967 /*
1968 * Select a victim buffer. The buffer is returned with its header
1969 * spinlock still held!
1970 */
1971 buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring);
1972 buf = BufferDescriptorGetBuffer(buf_hdr);
1973
1974 Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1975
1976 /* Pin the buffer and then release the buffer spinlock */
1977 PinBuffer_Locked(buf_hdr);
1978
1979 /*
1980 * We shouldn't have any other pins for this buffer.
1981 */
1983
1984 /*
1985 * If the buffer was dirty, try to write it out. There is a race
1986 * condition here, in that someone might dirty it after we released the
1987 * buffer header lock above, or even while we are writing it out (since
1988 * our share-lock won't prevent hint-bit updates). We will recheck the
1989 * dirty bit after re-locking the buffer header.
1990 */
1991 if (buf_state & BM_DIRTY)
1992 {
1993 LWLock *content_lock;
1994
1995 Assert(buf_state & BM_TAG_VALID);
1996 Assert(buf_state & BM_VALID);
1997
1998 /*
1999 * We need a share-lock on the buffer contents to write it out (else
2000 * we might write invalid data, eg because someone else is compacting
2001 * the page contents while we write). We must use a conditional lock
2002 * acquisition here to avoid deadlock. Even though the buffer was not
2003 * pinned (and therefore surely not locked) when StrategyGetBuffer
2004 * returned it, someone else could have pinned and exclusive-locked it
2005 * by the time we get here. If we try to get the lock unconditionally,
2006 * we'd block waiting for them; if they later block waiting for us,
2007 * deadlock ensues. (This has been observed to happen when two
2008 * backends are both trying to split btree index pages, and the second
2009 * one just happens to be trying to split the page the first one got
2010 * from StrategyGetBuffer.)
2011 */
2012 content_lock = BufferDescriptorGetContentLock(buf_hdr);
2013 if (!LWLockConditionalAcquire(content_lock, LW_SHARED))
2014 {
2015 /*
2016 * Someone else has locked the buffer, so give it up and loop back
2017 * to get another one.
2018 */
2019 UnpinBuffer(buf_hdr);
2020 goto again;
2021 }
2022
2023 /*
2024 * If using a nondefault strategy, and writing the buffer would
2025 * require a WAL flush, let the strategy decide whether to go ahead
2026 * and write/reuse the buffer or to choose another victim. We need a
2027 * lock to inspect the page LSN, so this can't be done inside
2028 * StrategyGetBuffer.
2029 */
2030 if (strategy != NULL)
2031 {
2032 XLogRecPtr lsn;
2033
2034 /* Read the LSN while holding buffer header lock */
2035 buf_state = LockBufHdr(buf_hdr);
2036 lsn = BufferGetLSN(buf_hdr);
2037 UnlockBufHdr(buf_hdr, buf_state);
2038
2039 if (XLogNeedsFlush(lsn)
2040 && StrategyRejectBuffer(strategy, buf_hdr, from_ring))
2041 {
2042 LWLockRelease(content_lock);
2043 UnpinBuffer(buf_hdr);
2044 goto again;
2045 }
2046 }
2047
2048 /* OK, do the I/O */
2049 FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
2050 LWLockRelease(content_lock);
2051
2053 &buf_hdr->tag);
2054 }
2055
2056
2057 if (buf_state & BM_VALID)
2058 {
2059 /*
2060 * When a BufferAccessStrategy is in use, blocks evicted from shared
2061 * buffers are counted as IOOP_EVICT in the corresponding context
2062 * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
2063 * strategy in two cases: 1) while initially claiming buffers for the
2064 * strategy ring 2) to replace an existing strategy ring buffer
2065 * because it is pinned or in use and cannot be reused.
2066 *
2067 * Blocks evicted from buffers already in the strategy ring are
2068 * counted as IOOP_REUSE in the corresponding strategy context.
2069 *
2070 * At this point, we can accurately count evictions and reuses,
2071 * because we have successfully claimed the valid buffer. Previously,
2072 * we may have been forced to release the buffer due to concurrent
2073 * pinners or erroring out.
2074 */
2076 from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0);
2077 }
2078
2079 /*
2080 * If the buffer has an entry in the buffer mapping table, delete it. This
2081 * can fail because another backend could have pinned or dirtied the
2082 * buffer.
2083 */
2084 if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr))
2085 {
2086 UnpinBuffer(buf_hdr);
2087 goto again;
2088 }
2089
2090 /* a final set of sanity checks */
2091#ifdef USE_ASSERT_CHECKING
2092 buf_state = pg_atomic_read_u32(&buf_hdr->state);
2093
2094 Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2095 Assert(!(buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY)));
2096
2098#endif
2099
2100 return buf;
2101}
WritebackContext BackendWritebackContext
Definition: buf_init.c:23
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition: bufmgr.c:5205
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
Definition: bufmgr.c:5915
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
Definition: freelist.c:196
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
Definition: freelist.c:798
@ IOOP_EVICT
Definition: pgstat.h:303
@ IOOP_REUSE
Definition: pgstat.h:306
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:68
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3133

References Assert, BackendWritebackContext, BM_DIRTY, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferGetLSN, CheckBufferIsPinnedOnce(), CurrentResourceOwner, FlushBuffer(), InvalidateVictimBuffer(), IOOBJECT_RELATION, IOOP_EVICT, IOOP_REUSE, LockBufHdr(), LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), pg_atomic_read_u32(), pgstat_count_io_op(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::state, StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by BufferAlloc(), and ExtendBufferedRelShared().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 5373 of file bufmgr.c.

5374{
5375 int bufid = GetStartupBufferPinWaitBufId();
5376
5377 /*
5378 * If we get woken slowly then it's possible that the Startup process was
5379 * already woken by other backends before we got here. Also possible that
5380 * we get here by multiple interrupts or interrupts at inappropriate
5381 * times, so make sure we do nothing if the bufid is not set.
5382 */
5383 if (bufid < 0)
5384 return false;
5385
5386 if (GetPrivateRefCount(bufid + 1) > 0)
5387 return true;
5388
5389 return false;
5390}
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:717

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

◆ InitBufferManagerAccess()

void InitBufferManagerAccess ( void  )

Definition at line 3576 of file bufmgr.c.

3577{
3578 HASHCTL hash_ctl;
3579
3580 memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
3581
3582 hash_ctl.keysize = sizeof(int32);
3583 hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
3584
3585 PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
3587
3588 /*
3589 * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
3590 * the corresponding phase of backend shutdown.
3591 */
3592 Assert(MyProc != NULL);
3594}
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:3601
struct PrivateRefCountEntry PrivateRefCountEntry
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
PGPROC * MyProc
Definition: proc.c:66
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76

References Assert, AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MyProc, on_shmem_exit(), PrivateRefCountArray, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1784 of file bufmgr.c.

1785{
1786 BufferTag oldTag;
1787 uint32 oldHash; /* hash value for oldTag */
1788 LWLock *oldPartitionLock; /* buffer partition lock for it */
1789 uint32 oldFlags;
1790 uint32 buf_state;
1791
1792 /* Save the original buffer tag before dropping the spinlock */
1793 oldTag = buf->tag;
1794
1795 buf_state = pg_atomic_read_u32(&buf->state);
1796 Assert(buf_state & BM_LOCKED);
1797 UnlockBufHdr(buf, buf_state);
1798
1799 /*
1800 * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1801 * worth storing the hashcode in BufferDesc so we need not recompute it
1802 * here? Probably not.
1803 */
1804 oldHash = BufTableHashCode(&oldTag);
1805 oldPartitionLock = BufMappingPartitionLock(oldHash);
1806
1807retry:
1808
1809 /*
1810 * Acquire exclusive mapping lock in preparation for changing the buffer's
1811 * association.
1812 */
1813 LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1814
1815 /* Re-lock the buffer header */
1816 buf_state = LockBufHdr(buf);
1817
1818 /* If it's changed while we were waiting for lock, do nothing */
1819 if (!BufferTagsEqual(&buf->tag, &oldTag))
1820 {
1821 UnlockBufHdr(buf, buf_state);
1822 LWLockRelease(oldPartitionLock);
1823 return;
1824 }
1825
1826 /*
1827 * We assume the only reason for it to be pinned is that someone else is
1828 * flushing the page out. Wait for them to finish. (This could be an
1829 * infinite loop if the refcount is messed up... it would be nice to time
1830 * out after awhile, but there seems no way to be sure how many loops may
1831 * be needed. Note that if the other guy has pinned the buffer but not
1832 * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1833 * be busy-looping here.)
1834 */
1835 if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1836 {
1837 UnlockBufHdr(buf, buf_state);
1838 LWLockRelease(oldPartitionLock);
1839 /* safety check: should definitely not be our *own* pin */
1841 elog(ERROR, "buffer is pinned in InvalidateBuffer");
1842 WaitIO(buf);
1843 goto retry;
1844 }
1845
1846 /*
1847 * Clear out the buffer's tag and flags. We must do this to ensure that
1848 * linear scans of the buffer array don't think the buffer is valid.
1849 */
1850 oldFlags = buf_state & BUF_FLAG_MASK;
1851 ClearBufferTag(&buf->tag);
1852 buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1853 UnlockBufHdr(buf, buf_state);
1854
1855 /*
1856 * Remove the buffer from the lookup hashtable, if it was in there.
1857 */
1858 if (oldFlags & BM_TAG_VALID)
1859 BufTableDelete(&oldTag, oldHash);
1860
1861 /*
1862 * Done with mapping lock.
1863 */
1864 LWLockRelease(oldPartitionLock);
1865
1866 /*
1867 * Insert the buffer at the head of the list of free buffers.
1868 */
1870}
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:44
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
#define BM_LOCKED
Definition: buf_internals.h:59
static void ClearBufferTag(BufferTag *tag)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:148
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:5509

References Assert, BM_LOCKED, BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), StrategyFreeBuffer(), UnlockBufHdr(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ InvalidateVictimBuffer()

static bool InvalidateVictimBuffer ( BufferDesc buf_hdr)
static

Definition at line 1882 of file bufmgr.c.

1883{
1884 uint32 buf_state;
1885 uint32 hash;
1886 LWLock *partition_lock;
1887 BufferTag tag;
1888
1890
1891 /* have buffer pinned, so it's safe to read tag without lock */
1892 tag = buf_hdr->tag;
1893
1894 hash = BufTableHashCode(&tag);
1895 partition_lock = BufMappingPartitionLock(hash);
1896
1897 LWLockAcquire(partition_lock, LW_EXCLUSIVE);
1898
1899 /* lock the buffer header */
1900 buf_state = LockBufHdr(buf_hdr);
1901
1902 /*
1903 * We have the buffer pinned nobody else should have been able to unset
1904 * this concurrently.
1905 */
1906 Assert(buf_state & BM_TAG_VALID);
1907 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1908 Assert(BufferTagsEqual(&buf_hdr->tag, &tag));
1909
1910 /*
1911 * If somebody else pinned the buffer since, or even worse, dirtied it,
1912 * give up on this buffer: It's clearly in use.
1913 */
1914 if (BUF_STATE_GET_REFCOUNT(buf_state) != 1 || (buf_state & BM_DIRTY))
1915 {
1916 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1917
1918 UnlockBufHdr(buf_hdr, buf_state);
1919 LWLockRelease(partition_lock);
1920
1921 return false;
1922 }
1923
1924 /*
1925 * Clear out the buffer's tag and flags and usagecount. This is not
1926 * strictly required, as BM_TAG_VALID/BM_VALID needs to be checked before
1927 * doing anything with the buffer. But currently it's beneficial, as the
1928 * cheaper pre-check for several linear scans of shared buffers use the
1929 * tag (see e.g. FlushDatabaseBuffers()).
1930 */
1931 ClearBufferTag(&buf_hdr->tag);
1932 buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1933 UnlockBufHdr(buf_hdr, buf_state);
1934
1935 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1936
1937 /* finally delete buffer from the buffer mapping table */
1938 BufTableDelete(&tag, hash);
1939
1940 LWLockRelease(partition_lock);
1941
1942 Assert(!(buf_state & (BM_DIRTY | BM_VALID | BM_TAG_VALID)));
1943 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1945
1946 return true;
1947}

References Assert, BM_DIRTY, BM_TAG_VALID, BM_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), GetPrivateRefCount(), hash(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by EvictUnpinnedBuffer(), and GetVictimBuffer().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 5455 of file bufmgr.c.

5456{
5457 BufferDesc *bufHdr;
5458 uint32 buf_state;
5459
5460 Assert(BufferIsValid(buffer));
5461
5462 if (BufferIsLocal(buffer))
5463 {
5464 /* There should be exactly one pin */
5465 if (LocalRefCount[-buffer - 1] != 1)
5466 return false;
5467 /* Nobody else to wait for */
5468 return true;
5469 }
5470
5471 /* There should be exactly one local pin */
5472 if (GetPrivateRefCount(buffer) != 1)
5473 return false;
5474
5475 bufHdr = GetBufferDescriptor(buffer - 1);
5476
5477 /* caller must hold exclusive lock on buffer */
5479 LW_EXCLUSIVE));
5480
5481 buf_state = LockBufHdr(bufHdr);
5482
5483 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5484 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5485 {
5486 /* pincount is OK. */
5487 UnlockBufHdr(bufHdr, buf_state);
5488 return true;
5489 }
5490
5491 UnlockBufHdr(bufHdr, buf_state);
5492 return false;
5493}

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsValid(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext wb_context,
IOContext  io_context 
)

Definition at line 5965 of file bufmgr.c.

5966{
5967 instr_time io_start;
5968 int i;
5969
5970 if (wb_context->nr_pending == 0)
5971 return;
5972
5973 /*
5974 * Executing the writes in-order can make them a lot faster, and allows to
5975 * merge writeback requests to consecutive blocks into larger writebacks.
5976 */
5977 sort_pending_writebacks(wb_context->pending_writebacks,
5978 wb_context->nr_pending);
5979
5981
5982 /*
5983 * Coalesce neighbouring writes, but nothing else. For that we iterate
5984 * through the, now sorted, array of pending flushes, and look forward to
5985 * find all neighbouring (or identical) writes.
5986 */
5987 for (i = 0; i < wb_context->nr_pending; i++)
5988 {
5991 SMgrRelation reln;
5992 int ahead;
5993 BufferTag tag;
5994 RelFileLocator currlocator;
5995 Size nblocks = 1;
5996
5997 cur = &wb_context->pending_writebacks[i];
5998 tag = cur->tag;
5999 currlocator = BufTagGetRelFileLocator(&tag);
6000
6001 /*
6002 * Peek ahead, into following writeback requests, to see if they can
6003 * be combined with the current one.
6004 */
6005 for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
6006 {
6007
6008 next = &wb_context->pending_writebacks[i + ahead + 1];
6009
6010 /* different file, stop */
6011 if (!RelFileLocatorEquals(currlocator,
6012 BufTagGetRelFileLocator(&next->tag)) ||
6013 BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
6014 break;
6015
6016 /* ok, block queued twice, skip */
6017 if (cur->tag.blockNum == next->tag.blockNum)
6018 continue;
6019
6020 /* only merge consecutive writes */
6021 if (cur->tag.blockNum + 1 != next->tag.blockNum)
6022 break;
6023
6024 nblocks++;
6025 cur = next;
6026 }
6027
6028 i += ahead;
6029
6030 /* and finally tell the kernel to write the data to storage */
6031 reln = smgropen(currlocator, INVALID_PROC_NUMBER);
6032 smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
6033 }
6034
6035 /*
6036 * Assume that writeback requests are only issued for buffers containing
6037 * blocks of permanent relations.
6038 */
6040 IOOP_WRITEBACK, io_start, wb_context->nr_pending, 0);
6041
6042 wb_context->nr_pending = 0;
6043}
static int32 next
Definition: blutils.c:219
struct cursor * cur
Definition: ecpg.c:29
@ IOOP_WRITEBACK
Definition: pgstat.h:307
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:665
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, i, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITEBACK, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RelFileLocatorEquals, smgropen(), smgrwriteback(), and track_io_timing.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2116 of file bufmgr.c.

2117{
2118 uint32 max_backends;
2119 int max_proportional_pins;
2120
2121 if (*additional_pins <= 1)
2122 return;
2123
2124 max_backends = MaxBackends + NUM_AUXILIARY_PROCS;
2125 max_proportional_pins = NBuffers / max_backends;
2126
2127 /*
2128 * Subtract the approximate number of buffers already pinned by this
2129 * backend. We get the number of "overflowed" pins for free, but don't
2130 * know the number of pins in PrivateRefCountArray. The cost of
2131 * calculating that exactly doesn't seem worth it, so just assume the max.
2132 */
2133 max_proportional_pins -= PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES;
2134
2135 if (max_proportional_pins <= 0)
2136 max_proportional_pins = 1;
2137
2138 if (*additional_pins > max_proportional_pins)
2139 *additional_pins = max_proportional_pins;
2140}
int MaxBackends
Definition: globals.c:145
#define NUM_AUXILIARY_PROCS
Definition: proc.h:445

References MaxBackends, NBuffers, NUM_AUXILIARY_PROCS, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by ExtendBufferedRelShared(), and read_stream_begin_impl().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 5714 of file bufmgr.c.

5715{
5716 BufferDesc *bufHdr = (BufferDesc *) arg;
5717
5718 if (bufHdr != NULL)
5719 {
5720 char *path = relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
5722 BufTagGetForkNum(&bufHdr->tag));
5723
5724 errcontext("writing block %u of relation %s",
5725 bufHdr->tag.blockNum, path);
5726 pfree(path);
5727 }
5728}
#define errcontext
Definition: elog.h:196
void * arg

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, MyProcNumber, pfree(), relpathbackend, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 5158 of file bufmgr.c.

5159{
5160 BufferDesc *buf;
5161
5162 Assert(BufferIsPinned(buffer));
5163 if (BufferIsLocal(buffer))
5164 return; /* local buffers need no lock */
5165
5166 buf = GetBufferDescriptor(buffer - 1);
5167
5168 if (mode == BUFFER_LOCK_UNLOCK)
5170 else if (mode == BUFFER_LOCK_SHARE)
5172 else if (mode == BUFFER_LOCK_EXCLUSIVE)
5174 else
5175 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
5176}
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191

References Assert, buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), and mode.

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_lock(), heap_inplace_unlock(), heap_inplace_update_and_unlock(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_prepare_pagescan(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgettup(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), ScanSourceDatabasePgClass(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferForRedoExtended(), XLogRecordPageWithFreeSpace(), and ZeroAndLockBuffer().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 5238 of file bufmgr.c.

5239{
5240 BufferDesc *bufHdr;
5241 TimestampTz waitStart = 0;
5242 bool waiting = false;
5243 bool logged_recovery_conflict = false;
5244
5245 Assert(BufferIsPinned(buffer));
5246 Assert(PinCountWaitBuf == NULL);
5247
5249
5250 /* Nobody else to wait for */
5251 if (BufferIsLocal(buffer))
5252 return;
5253
5254 bufHdr = GetBufferDescriptor(buffer - 1);
5255
5256 for (;;)
5257 {
5258 uint32 buf_state;
5259
5260 /* Try to acquire lock */
5262 buf_state = LockBufHdr(bufHdr);
5263
5264 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5265 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5266 {
5267 /* Successfully acquired exclusive lock with pincount 1 */
5268 UnlockBufHdr(bufHdr, buf_state);
5269
5270 /*
5271 * Emit the log message if recovery conflict on buffer pin was
5272 * resolved but the startup process waited longer than
5273 * deadlock_timeout for it.
5274 */
5275 if (logged_recovery_conflict)
5277 waitStart, GetCurrentTimestamp(),
5278 NULL, false);
5279
5280 if (waiting)
5281 {
5282 /* reset ps display to remove the suffix if we added one */
5284 waiting = false;
5285 }
5286 return;
5287 }
5288 /* Failed, so mark myself as waiting for pincount 1 */
5289 if (buf_state & BM_PIN_COUNT_WAITER)
5290 {
5291 UnlockBufHdr(bufHdr, buf_state);
5293 elog(ERROR, "multiple backends attempting to wait for pincount 1");
5294 }
5296 PinCountWaitBuf = bufHdr;
5297 buf_state |= BM_PIN_COUNT_WAITER;
5298 UnlockBufHdr(bufHdr, buf_state);
5300
5301 /* Wait to be signaled by UnpinBuffer() */
5302 if (InHotStandby)
5303 {
5304 if (!waiting)
5305 {
5306 /* adjust the process title to indicate that it's waiting */
5307 set_ps_display_suffix("waiting");
5308 waiting = true;
5309 }
5310
5311 /*
5312 * Emit the log message if the startup process is waiting longer
5313 * than deadlock_timeout for recovery conflict on buffer pin.
5314 *
5315 * Skip this if first time through because the startup process has
5316 * not started waiting yet in this case. So, the wait start
5317 * timestamp is set after this logic.
5318 */
5319 if (waitStart != 0 && !logged_recovery_conflict)
5320 {
5322
5323 if (TimestampDifferenceExceeds(waitStart, now,
5325 {
5327 waitStart, now, NULL, true);
5328 logged_recovery_conflict = true;
5329 }
5330 }
5331
5332 /*
5333 * Set the wait start timestamp if logging is enabled and first
5334 * time through.
5335 */
5336 if (log_recovery_conflict_waits && waitStart == 0)
5337 waitStart = GetCurrentTimestamp();
5338
5339 /* Publish the bufid that Startup process waits on */
5340 SetStartupBufferPinWaitBufId(buffer - 1);
5341 /* Set alarm and then wait to be signaled by UnpinBuffer() */
5343 /* Reset the published bufid */
5345 }
5346 else
5347 ProcWaitForSignal(WAIT_EVENT_BUFFER_PIN);
5348
5349 /*
5350 * Remove flag marking us as waiter. Normally this will not be set
5351 * anymore, but ProcWaitForSignal() can return for other signals as
5352 * well. We take care to only reset the flag if we're the waiter, as
5353 * theoretically another backend could have started waiting. That's
5354 * impossible with the current usages due to table level locking, but
5355 * better be safe.
5356 */
5357 buf_state = LockBufHdr(bufHdr);
5358 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5360 buf_state &= ~BM_PIN_COUNT_WAITER;
5361 UnlockBufHdr(bufHdr, buf_state);
5362
5363 PinCountWaitBuf = NULL;
5364 /* Loop back and try again */
5365 }
5366}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1780
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:66
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:176
int64 TimestampTz
Definition: timestamp.h:39
static volatile sig_atomic_t waiting
Definition: latch.c:162
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:423
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:371
int DeadlockTimeout
Definition: proc.c:57
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:705
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1896
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:792
bool log_recovery_conflict_waits
Definition: standby.c:41
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:273
int wait_backend_pgprocno
#define InHotStandby
Definition: xlogutils.h:60

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), BufferDesc::wait_backend_pgprocno, and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroAndLockBuffer().

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 5761 of file bufmgr.c.

5762{
5763 SpinDelayStatus delayStatus;
5764 uint32 old_buf_state;
5765
5767
5768 init_local_spin_delay(&delayStatus);
5769
5770 while (true)
5771 {
5772 /* set BM_LOCKED flag */
5773 old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
5774 /* if it wasn't set before we're OK */
5775 if (!(old_buf_state & BM_LOCKED))
5776 break;
5777 perform_spin_delay(&delayStatus);
5778 }
5779 finish_spin_delay(&delayStatus);
5780 return old_buf_state | BM_LOCKED;
5781}
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:410
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:126
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:186
#define init_local_spin_delay(status)
Definition: s_lock.h:751

References Assert, BM_LOCKED, BufferDescriptorGetBuffer(), BufferIsLocal, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), GetVictimBuffer(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadRecentBuffer(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBufferNoOwner(), and WaitIO().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 2532 of file bufmgr.c.

2533{
2534 BufferDesc *bufHdr;
2535 uint32 buf_state;
2536 uint32 old_buf_state;
2537
2538 if (!BufferIsValid(buffer))
2539 elog(ERROR, "bad buffer ID: %d", buffer);
2540
2541 if (BufferIsLocal(buffer))
2542 {
2543 MarkLocalBufferDirty(buffer);
2544 return;
2545 }
2546
2547 bufHdr = GetBufferDescriptor(buffer - 1);
2548
2549 Assert(BufferIsPinned(buffer));
2551 LW_EXCLUSIVE));
2552
2553 old_buf_state = pg_atomic_read_u32(&bufHdr->state);
2554 for (;;)
2555 {
2556 if (old_buf_state & BM_LOCKED)
2557 old_buf_state = WaitBufHdrUnlocked(bufHdr);
2558
2559 buf_state = old_buf_state;
2560
2561 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2562 buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
2563
2564 if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
2565 buf_state))
2566 break;
2567 }
2568
2569 /*
2570 * If the buffer was not dirty already, do vacuum accounting.
2571 */
2572 if (!(old_buf_state & BM_DIRTY))
2573 {
2575 if (VacuumCostActive)
2577 }
2578}
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:349
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:5791
bool VacuumCostActive
Definition: globals.c:157
int VacuumCostBalance
Definition: globals.c:156
int VacuumCostPageDirty
Definition: globals.c:152
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:450
int64 shared_blks_dirtied
Definition: instrument.h:28

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), do_setval(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 4988 of file bufmgr.c.

4989{
4990 BufferDesc *bufHdr;
4991 Page page = BufferGetPage(buffer);
4992
4993 if (!BufferIsValid(buffer))
4994 elog(ERROR, "bad buffer ID: %d", buffer);
4995
4996 if (BufferIsLocal(buffer))
4997 {
4998 MarkLocalBufferDirty(buffer);
4999 return;
5000 }
5001
5002 bufHdr = GetBufferDescriptor(buffer - 1);
5003
5004 Assert(GetPrivateRefCount(buffer) > 0);
5005 /* here, either share or exclusive lock is OK */
5007
5008 /*
5009 * This routine might get called many times on the same page, if we are
5010 * making the first scan after commit of an xact that added/deleted many
5011 * tuples. So, be as quick as we can if the buffer is already dirty. We
5012 * do this by not acquiring spinlock if it looks like the status bits are
5013 * already set. Since we make this test unlocked, there's a chance we
5014 * might fail to notice that the flags have just been cleared, and failed
5015 * to reset them, due to memory-ordering issues. But since this function
5016 * is only intended to be used in cases where failing to write out the
5017 * data would be harmless anyway, it doesn't really matter.
5018 */
5019 if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
5021 {
5023 bool dirtied = false;
5024 bool delayChkptFlags = false;
5025 uint32 buf_state;
5026
5027 /*
5028 * If we need to protect hint bit updates from torn writes, WAL-log a
5029 * full page image of the page. This full page image is only necessary
5030 * if the hint bit update is the first change to the page since the
5031 * last checkpoint.
5032 *
5033 * We don't check full_page_writes here because that logic is included
5034 * when we call XLogInsert() since the value changes dynamically.
5035 */
5036 if (XLogHintBitIsNeeded() &&
5038 {
5039 /*
5040 * If we must not write WAL, due to a relfilelocator-specific
5041 * condition or being in recovery, don't dirty the page. We can
5042 * set the hint, just not dirty the page as a result so the hint
5043 * is lost when we evict the page or shutdown.
5044 *
5045 * See src/backend/storage/page/README for longer discussion.
5046 */
5047 if (RecoveryInProgress() ||
5049 return;
5050
5051 /*
5052 * If the block is already dirty because we either made a change
5053 * or set a hint already, then we don't need to write a full page
5054 * image. Note that aggressive cleaning of blocks dirtied by hint
5055 * bit setting would increase the call rate. Bulk setting of hint
5056 * bits would reduce the call rate...
5057 *
5058 * We must issue the WAL record before we mark the buffer dirty.
5059 * Otherwise we might write the page before we write the WAL. That
5060 * causes a race condition, since a checkpoint might occur between
5061 * writing the WAL record and marking the buffer dirty. We solve
5062 * that with a kluge, but one that is already in use during
5063 * transaction commit to prevent race conditions. Basically, we
5064 * simply prevent the checkpoint WAL record from being written
5065 * until we have marked the buffer dirty. We don't start the
5066 * checkpoint flush until we have marked dirty, so our checkpoint
5067 * must flush the change to disk successfully or the checkpoint
5068 * never gets written, so crash recovery will fix.
5069 *
5070 * It's possible we may enter here without an xid, so it is
5071 * essential that CreateCheckPoint waits for virtual transactions
5072 * rather than full transactionids.
5073 */
5076 delayChkptFlags = true;
5077 lsn = XLogSaveBufferForHint(buffer, buffer_std);
5078 }
5079
5080 buf_state = LockBufHdr(bufHdr);
5081
5082 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5083
5084 if (!(buf_state & BM_DIRTY))
5085 {
5086 dirtied = true; /* Means "will be dirtied by this action" */
5087
5088 /*
5089 * Set the page LSN if we wrote a backup block. We aren't supposed
5090 * to set this when only holding a share lock but as long as we
5091 * serialise it somehow we're OK. We choose to set LSN while
5092 * holding the buffer header lock, which causes any reader of an
5093 * LSN who holds only a share lock to also obtain a buffer header
5094 * lock before using PageGetLSN(), which is enforced in
5095 * BufferGetLSNAtomic().
5096 *
5097 * If checksums are enabled, you might think we should reset the
5098 * checksum here. That will happen when the page is written
5099 * sometime later in this checkpoint cycle.
5100 */
5101 if (!XLogRecPtrIsInvalid(lsn))
5102 PageSetLSN(page, lsn);
5103 }
5104
5105 buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
5106 UnlockBufHdr(bufHdr, buf_state);
5107
5108 if (delayChkptFlags)
5109 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
5110
5111 if (dirtied)
5112 {
5114 if (VacuumCostActive)
5116 }
5117 }
5118}
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
#define DELAY_CHKPT_START
Definition: proc.h:119
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition: storage.c:557
int delayChkptFlags
Definition: proc.h:240
bool RecoveryInProgress(void)
Definition: xlog.c:6334
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:1065

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferGetPage(), BufferIsLocal, BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 316 of file bufmgr.c.

317{
319
320 /* only allowed to be called when a reservation has been made */
322
323 /* use up the reserved entry */
326
327 /* and fill it */
328 res->buffer = buffer;
329 res->refcount = 0;
330
331 return res;
332}

References Assert, PrivateRefCountEntry::buffer, res, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 2652 of file bufmgr.c.

2653{
2655 bool result;
2657
2660
2661 ref = GetPrivateRefCountEntry(b, true);
2662
2663 if (ref == NULL)
2664 {
2665 uint32 buf_state;
2666 uint32 old_buf_state;
2667
2669
2670 old_buf_state = pg_atomic_read_u32(&buf->state);
2671 for (;;)
2672 {
2673 if (old_buf_state & BM_LOCKED)
2674 old_buf_state = WaitBufHdrUnlocked(buf);
2675
2676 buf_state = old_buf_state;
2677
2678 /* increase refcount */
2679 buf_state += BUF_REFCOUNT_ONE;
2680
2681 if (strategy == NULL)
2682 {
2683 /* Default case: increase usagecount unless already max. */
2685 buf_state += BUF_USAGECOUNT_ONE;
2686 }
2687 else
2688 {
2689 /*
2690 * Ring buffers shouldn't evict others from pool. Thus we
2691 * don't make usagecount more than 1.
2692 */
2693 if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
2694 buf_state += BUF_USAGECOUNT_ONE;
2695 }
2696
2697 if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
2698 buf_state))
2699 {
2700 result = (buf_state & BM_VALID) != 0;
2701
2702 /*
2703 * Assume that we acquired a buffer pin for the purposes of
2704 * Valgrind buffer client checks (even in !result case) to
2705 * keep things simple. Buffers that are unsafe to access are
2706 * not generally guaranteed to be marked undefined or
2707 * non-accessible in any case.
2708 */
2710 break;
2711 }
2712 }
2713 }
2714 else
2715 {
2716 /*
2717 * If we previously pinned the buffer, it is likely to be valid, but
2718 * it may not be if StartReadBuffers() was called and
2719 * WaitReadBuffers() hasn't been called yet. We'll check by loading
2720 * the flags without locking. This is racy, but it's OK to return
2721 * false spuriously: when WaitReadBuffers() calls StartBufferIO(),
2722 * it'll see that it's now valid.
2723 *
2724 * Note: We deliberately avoid a Valgrind client request here.
2725 * Individual access methods can optionally superimpose buffer page
2726 * client requests on top of our client requests to enforce that
2727 * buffers are only accessed while locked (and pinned). It's possible
2728 * that the buffer page is legitimately non-accessible here. We
2729 * cannot meddle with that.
2730 */
2731 result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0;
2732 }
2733
2734 ref->refcount++;
2735 Assert(ref->refcount > 0);
2737 return result;
2738}
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:77
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:42
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:51
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:316
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26

References Assert, b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservedRefCountEntry, ResourceOwnerRememberBuffer(), VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 2763 of file bufmgr.c.

2764{
2765 Buffer b;
2767 uint32 buf_state;
2768
2769 /*
2770 * As explained, We don't expect any preexisting pins. That allows us to
2771 * manipulate the PrivateRefCount after releasing the spinlock
2772 */
2774
2775 /*
2776 * Buffer can't have a preexisting pin, so mark its page as defined to
2777 * Valgrind (this is similar to the PinBuffer() case where the backend
2778 * doesn't already have a buffer pin)
2779 */
2781
2782 /*
2783 * Since we hold the buffer spinlock, we can update the buffer state and
2784 * release the lock in one operation.
2785 */
2786 buf_state = pg_atomic_read_u32(&buf->state);
2787 Assert(buf_state & BM_LOCKED);
2788 buf_state += BUF_REFCOUNT_ONE;
2789 UnlockBufHdr(buf, buf_state);
2790
2792
2794 ref->refcount++;
2795
2797}

References Assert, b, BM_LOCKED, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), UnlockBufHdr(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by EvictUnpinnedBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ PinBufferForBlock()

static pg_attribute_always_inline Buffer PinBufferForBlock ( Relation  rel,
SMgrRelation  smgr,
char  persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool *  foundPtr 
)
static

Definition at line 1106 of file bufmgr.c.

1113{
1114 BufferDesc *bufHdr;
1115 IOContext io_context;
1116 IOObject io_object;
1117
1118 Assert(blockNum != P_NEW);
1119
1120 /* Persistence should be set before */
1121 Assert((persistence == RELPERSISTENCE_TEMP ||
1122 persistence == RELPERSISTENCE_PERMANENT ||
1123 persistence == RELPERSISTENCE_UNLOGGED));
1124
1125 if (persistence == RELPERSISTENCE_TEMP)
1126 {
1127 io_context = IOCONTEXT_NORMAL;
1128 io_object = IOOBJECT_TEMP_RELATION;
1129 }
1130 else
1131 {
1132 io_context = IOContextForStrategy(strategy);
1133 io_object = IOOBJECT_RELATION;
1134 }
1135
1136 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1140 smgr->smgr_rlocator.backend);
1141
1142 if (persistence == RELPERSISTENCE_TEMP)
1143 {
1144 bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1145 if (*foundPtr)
1147 }
1148 else
1149 {
1150 bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1151 strategy, foundPtr, io_context);
1152 if (*foundPtr)
1154 }
1155 if (rel)
1156 {
1157 /*
1158 * While pgBufferUsage's "read" counter isn't bumped unless we reach
1159 * WaitReadBuffers() (so, not for hits, and not for buffers that are
1160 * zeroed instead), the per-relation stats always count them.
1161 */
1163 if (*foundPtr)
1165 }
1166 if (*foundPtr)
1167 {
1168 pgstat_count_io_op(io_object, io_context, IOOP_HIT, 1, 0);
1169 if (VacuumCostActive)
1171
1172 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1176 smgr->smgr_rlocator.backend,
1177 true);
1178 }
1179
1180 return BufferDescriptorGetBuffer(bufHdr);
1181}
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
Definition: bufmgr.c:1606
#define P_NEW
Definition: bufmgr.h:184
int VacuumCostPageHit
Definition: globals.c:150
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition: localbuf.c:116
IOObject
Definition: pgstat.h:274
#define pgstat_count_buffer_read(rel)
Definition: pgstat.h:684
@ IOOP_HIT
Definition: pgstat.h:305
#define pgstat_count_buffer_hit(rel)
Definition: pgstat.h:689
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_hit
Definition: instrument.h:26

References Assert, RelFileLocatorBackend::backend, BufferAlloc(), BufferDescriptorGetBuffer(), RelFileLocator::dbOid, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, BufferUsage::local_blks_hit, LocalBufferAlloc(), RelFileLocatorBackend::locator, P_NEW, pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_buffer_read, pgstat_count_io_op(), RelFileLocator::relNumber, BufferUsage::shared_blks_hit, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, VacuumCostActive, VacuumCostBalance, and VacuumCostPageHit.

Referenced by ReadBuffer_common(), and StartReadBuffersImpl().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 639 of file bufmgr.c.

640{
641 Assert(RelationIsValid(reln));
642 Assert(BlockNumberIsValid(blockNum));
643
644 if (RelationUsesLocalBuffers(reln))
645 {
646 /* see comments in ReadBufferExtended */
647 if (RELATION_IS_OTHER_TEMP(reln))
649 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
650 errmsg("cannot access temporary tables of other sessions")));
651
652 /* pass it off to localbuf.c */
653 return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
654 }
655 else
656 {
657 /* pass it to the shared buffer version */
658 return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
659 }
660}
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:549
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:69
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:658
#define RelationIsValid(relation)
Definition: rel.h:478

References Assert, BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by BitmapPrefetch(), count_nondeletable_pages(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 549 of file bufmgr.c.

552{
553 PrefetchBufferResult result = {InvalidBuffer, false};
554 BufferTag newTag; /* identity of requested block */
555 uint32 newHash; /* hash value for newTag */
556 LWLock *newPartitionLock; /* buffer partition lock for it */
557 int buf_id;
558
559 Assert(BlockNumberIsValid(blockNum));
560
561 /* create a tag so we can lookup the buffer */
562 InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
563 forkNum, blockNum);
564
565 /* determine its hash code and partition lock ID */
566 newHash = BufTableHashCode(&newTag);
567 newPartitionLock = BufMappingPartitionLock(newHash);
568
569 /* see if the block is in the buffer pool already */
570 LWLockAcquire(newPartitionLock, LW_SHARED);
571 buf_id = BufTableLookup(&newTag, newHash);
572 LWLockRelease(newPartitionLock);
573
574 /* If not in buffers, initiate prefetch */
575 if (buf_id < 0)
576 {
577#ifdef USE_PREFETCH
578 /*
579 * Try to initiate an asynchronous read. This returns false in
580 * recovery if the relation file doesn't exist.
581 */
582 if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
583 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
584 {
585 result.initiated_io = true;
586 }
587#endif /* USE_PREFETCH */
588 }
589 else
590 {
591 /*
592 * Report the buffer it was in at that time. The caller may be able
593 * to avoid a buffer table lookup, but it's not pinned and it must be
594 * rechecked!
595 */
596 result.recent_buffer = buf_id + 1;
597 }
598
599 /*
600 * If the block *is* in buffers, we do nothing. This is not really ideal:
601 * the block might be just about to be evicted, which would be stupid
602 * since we know we are going to need it soon. But the only easy answer
603 * is to bump the usage_count, which does not seem like a great solution:
604 * when the caller does ultimately touch the block, usage_count would get
605 * bumped again, resulting in too much favoritism for blocks that are
606 * involved in a prefetch sequence. A real fix would involve some
607 * additional per-buffer state, and it's not clear that there's enough of
608 * a problem to justify that.
609 */
610
611 return result;
612}
int io_direct_flags
Definition: fd.c:167
#define IO_DIRECT_DATA
Definition: fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:588
Buffer recent_buffer
Definition: bufmgr.h:60

References Assert, BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, RelFileLocatorBackend::locator, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rlocator, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 746 of file bufmgr.c.

747{
748 return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
749}
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:793
@ RBM_NORMAL
Definition: bufmgr.h:45

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static pg_attribute_always_inline Buffer ReadBuffer_common ( Relation  rel,
SMgrRelation  smgr,
char  smgr_persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
static

Definition at line 1189 of file bufmgr.c.

1193{
1194 ReadBuffersOperation operation;
1195 Buffer buffer;
1196 int flags;
1197 char persistence;
1198
1199 /*
1200 * Backward compatibility path, most code should use ExtendBufferedRel()
1201 * instead, as acquiring the extension lock inside ExtendBufferedRel()
1202 * scales a lot better.
1203 */
1204 if (unlikely(blockNum == P_NEW))
1205 {
1207
1208 /*
1209 * Since no-one else can be looking at the page contents yet, there is
1210 * no difference between an exclusive lock and a cleanup-strength
1211 * lock.
1212 */
1214 flags |= EB_LOCK_FIRST;
1215
1216 return ExtendBufferedRel(BMR_REL(rel), forkNum, strategy, flags);
1217 }
1218
1219 if (rel)
1220 persistence = rel->rd_rel->relpersistence;
1221 else
1222 persistence = smgr_persistence;
1223
1226 {
1227 bool found;
1228
1229 buffer = PinBufferForBlock(rel, smgr, persistence,
1230 forkNum, blockNum, strategy, &found);
1231 ZeroAndLockBuffer(buffer, mode, found);
1232 return buffer;
1233 }
1234
1235 if (mode == RBM_ZERO_ON_ERROR)
1237 else
1238 flags = 0;
1239 operation.smgr = smgr;
1240 operation.rel = rel;
1241 operation.persistence = persistence;
1242 operation.forknum = forkNum;
1243 operation.strategy = strategy;
1244 if (StartReadBuffer(&operation,
1245 &buffer,
1246 blockNum,
1247 flags))
1248 WaitReadBuffers(&operation);
1249
1250 return buffer;
1251}
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:846
static void ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
Definition: bufmgr.c:1019
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition: bufmgr.c:1106
void WaitReadBuffers(ReadBuffersOperation *operation)
Definition: bufmgr.c:1410
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Definition: bufmgr.c:1382
#define READ_BUFFERS_ZERO_ON_ERROR
Definition: bufmgr.h:111
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:50
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
#define unlikely(x)
Definition: c.h:333
ForkNumber forknum
Definition: bufmgr.h:121
BufferAccessStrategy strategy
Definition: bufmgr.h:122
struct SMgrRelationData * smgr
Definition: bufmgr.h:119

References BMR_REL, PrivateRefCountEntry::buffer, EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, ExtendBufferedRel(), ReadBuffersOperation::forknum, mode, P_NEW, ReadBuffersOperation::persistence, PinBufferForBlock(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelationData::rd_rel, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, StartReadBuffer(), ReadBuffersOperation::strategy, unlikely, WaitReadBuffers(), and ZeroAndLockBuffer().

Referenced by ExtendBufferedRelTo(), ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 793 of file bufmgr.c.

795{
796 Buffer buf;
797
798 /*
799 * Reject attempts to read non-local temporary relations; we would be
800 * likely to get wrong data since we have no visibility into the owning
801 * session's local buffers.
802 */
803 if (RELATION_IS_OTHER_TEMP(reln))
805 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
806 errmsg("cannot access temporary tables of other sessions")));
807
808 /*
809 * Read the buffer, and update pgstat counters to reflect a cache hit or
810 * miss.
811 */
812 buf = ReadBuffer_common(reln, RelationGetSmgr(reln), 0,
813 forkNum, blockNum, mode, strategy);
814
815 return buf;
816}

References buf, ereport, errcode(), errmsg(), ERROR, mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), autoprewarm_database_main(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), brin_vacuum_scan(), bt_recheck_sibling_links(), btvacuumpage(), count_nondeletable_pages(), fsm_readbuf(), get_raw_page_internal(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), log_newpage_range(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), spgvacuumpage(), statapprox_heap(), verify_heapam(), and vm_readbuf().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 830 of file bufmgr.c.

833{
834 SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
835
836 return ReadBuffer_common(NULL, smgr,
837 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
838 forkNum, blockNum,
839 mode, strategy);
840}

References INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 670 of file bufmgr.c.

672{
673 BufferDesc *bufHdr;
674 BufferTag tag;
675 uint32 buf_state;
676 bool have_private_ref;
677
678 Assert(BufferIsValid(recent_buffer));
679
682 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
683
684 if (BufferIsLocal(recent_buffer))
685 {
686 int b = -recent_buffer - 1;
687
688 bufHdr = GetLocalBufferDescriptor(b);
689 buf_state = pg_atomic_read_u32(&bufHdr->state);
690
691 /* Is it still valid and holding the right tag? */
692 if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
693 {
694