PostgreSQL Source Code  git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include <lib/sort_template.h>
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static void ResOwnerReleaseBufferIO (Datum res)
 
static char * ResOwnerPrintBufferIO (Datum res)
 
static void ResOwnerReleaseBufferPin (Datum res)
 
static char * ResOwnerPrintBufferPin (Datum res)
 
static Buffer ReadBuffer_common (Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static BlockNumber ExtendBufferedRelCommon (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static BlockNumber ExtendBufferedRelShared (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void UnpinBufferNoOwner (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput, bool nowait)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner)
 
static void AbortBufferIO (Buffer buffer)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
 
static Buffer GetVictimBuffer (BufferAccessStrategy strategy, IOContext io_context)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
static void ZeroBuffer (Buffer buffer, ReadBufferMode mode)
 
static pg_attribute_always_inline Buffer PinBufferForBlock (Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static pg_attribute_always_inline bool StartReadBuffersImpl (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
static bool WaitReadBuffersCanStartIO (Buffer buffer, bool nowait)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
static void InvalidateBuffer (BufferDesc *buf)
 
static bool InvalidateVictimBuffer (BufferDesc *buf_hdr)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
bool BufferIsExclusiveLocked (Buffer buffer)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
char * DebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *wb_context, IOContext io_context)
 
bool EvictUnpinnedBuffer (Buffer buf)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int io_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 
const ResourceOwnerDesc buffer_io_resowner_desc
 
const ResourceOwnerDesc buffer_pin_resowner_desc
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 86 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 76 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 75 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 68 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:415
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:359
int32 * LocalRefCount
Definition: localbuf.c:46

Definition at line 473 of file bufmgr.c.

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 67 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 71 of file bufmgr.c.

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 95 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 78 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 5874 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 5874 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 5876 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 5876 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 5873 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 5873 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 5875 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 5875 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 5872 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 5872 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

static void AbortBufferIO ( Buffer  buffer)
static

Definition at line 5579 of file bufmgr.c.

5580 {
5581  BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1);
5582  uint32 buf_state;
5583 
5584  buf_state = LockBufHdr(buf_hdr);
5585  Assert(buf_state & (BM_IO_IN_PROGRESS | BM_TAG_VALID));
5586 
5587  if (!(buf_state & BM_VALID))
5588  {
5589  Assert(!(buf_state & BM_DIRTY));
5590  UnlockBufHdr(buf_hdr, buf_state);
5591  }
5592  else
5593  {
5594  Assert(buf_state & BM_DIRTY);
5595  UnlockBufHdr(buf_hdr, buf_state);
5596 
5597  /* Issue notice if this is not the first failure... */
5598  if (buf_state & BM_IO_ERROR)
5599  {
5600  /* Buffer is pinned, so we can read tag without spinlock */
5601  char *path;
5602 
5603  path = relpathperm(BufTagGetRelFileLocator(&buf_hdr->tag),
5604  BufTagGetForkNum(&buf_hdr->tag));
5605  ereport(WARNING,
5606  (errcode(ERRCODE_IO_ERROR),
5607  errmsg("could not write block %u of %s",
5608  buf_hdr->tag.blockNum, path),
5609  errdetail("Multiple failures --- write error might be permanent.")));
5610  pfree(path);
5611  }
5612  }
5613 
5614  TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false);
5615 }
#define BM_TAG_VALID
Definition: buf_internals.h:63
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static BufferDesc * GetBufferDescriptor(uint32 id)
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
#define BM_DIRTY
Definition: buf_internals.h:61
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:64
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
Definition: buf_internals.h:62
#define BM_IO_ERROR
Definition: buf_internals.h:65
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner)
Definition: bufmgr.c:5542
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:5688
unsigned int uint32
Definition: c.h:506
#define Assert(condition)
Definition: c.h:858
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define WARNING
Definition: elog.h:36
#define ereport(elevel,...)
Definition: elog.h:149
void pfree(void *pointer)
Definition: mcxt.c:1520
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90
BufferTag tag
BlockNumber blockNum
Definition: buf_internals.h:98

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), GetBufferDescriptor(), LockBufHdr(), pfree(), relpathperm, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by ResOwnerReleaseBufferIO().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 3502 of file bufmgr.c.

3503 {
3505 
3506  AtEOXact_LocalBuffers(isCommit);
3507 
3509 }
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:3562
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:209
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:819

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 3544 of file bufmgr.c.

3545 {
3546  UnlockBuffers();
3547 
3549 
3550  /* localbuf.c needs a chance too */
3552 }
void UnlockBuffers(void)
Definition: bufmgr.c:5057
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:830

References AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3131 of file bufmgr.c.

3132 {
3133  /* info obtained from freelist.c */
3134  int strategy_buf_id;
3135  uint32 strategy_passes;
3136  uint32 recent_alloc;
3137 
3138  /*
3139  * Information saved between calls so we can determine the strategy
3140  * point's advance rate and avoid scanning already-cleaned buffers.
3141  */
3142  static bool saved_info_valid = false;
3143  static int prev_strategy_buf_id;
3144  static uint32 prev_strategy_passes;
3145  static int next_to_clean;
3146  static uint32 next_passes;
3147 
3148  /* Moving averages of allocation rate and clean-buffer density */
3149  static float smoothed_alloc = 0;
3150  static float smoothed_density = 10.0;
3151 
3152  /* Potentially these could be tunables, but for now, not */
3153  float smoothing_samples = 16;
3154  float scan_whole_pool_milliseconds = 120000.0;
3155 
3156  /* Used to compute how far we scan ahead */
3157  long strategy_delta;
3158  int bufs_to_lap;
3159  int bufs_ahead;
3160  float scans_per_alloc;
3161  int reusable_buffers_est;
3162  int upcoming_alloc_est;
3163  int min_scan_buffers;
3164 
3165  /* Variables for the scanning loop proper */
3166  int num_to_scan;
3167  int num_written;
3168  int reusable_buffers;
3169 
3170  /* Variables for final smoothed_density update */
3171  long new_strategy_delta;
3172  uint32 new_recent_alloc;
3173 
3174  /*
3175  * Find out where the freelist clock sweep currently is, and how many
3176  * buffer allocations have happened since our last call.
3177  */
3178  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
3179 
3180  /* Report buffer alloc counts to pgstat */
3181  PendingBgWriterStats.buf_alloc += recent_alloc;
3182 
3183  /*
3184  * If we're not running the LRU scan, just stop after doing the stats
3185  * stuff. We mark the saved state invalid so that we can recover sanely
3186  * if LRU scan is turned back on later.
3187  */
3188  if (bgwriter_lru_maxpages <= 0)
3189  {
3190  saved_info_valid = false;
3191  return true;
3192  }
3193 
3194  /*
3195  * Compute strategy_delta = how many buffers have been scanned by the
3196  * clock sweep since last time. If first time through, assume none. Then
3197  * see if we are still ahead of the clock sweep, and if so, how many
3198  * buffers we could scan before we'd catch up with it and "lap" it. Note:
3199  * weird-looking coding of xxx_passes comparisons are to avoid bogus
3200  * behavior when the passes counts wrap around.
3201  */
3202  if (saved_info_valid)
3203  {
3204  int32 passes_delta = strategy_passes - prev_strategy_passes;
3205 
3206  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3207  strategy_delta += (long) passes_delta * NBuffers;
3208 
3209  Assert(strategy_delta >= 0);
3210 
3211  if ((int32) (next_passes - strategy_passes) > 0)
3212  {
3213  /* we're one pass ahead of the strategy point */
3214  bufs_to_lap = strategy_buf_id - next_to_clean;
3215 #ifdef BGW_DEBUG
3216  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3217  next_passes, next_to_clean,
3218  strategy_passes, strategy_buf_id,
3219  strategy_delta, bufs_to_lap);
3220 #endif
3221  }
3222  else if (next_passes == strategy_passes &&
3223  next_to_clean >= strategy_buf_id)
3224  {
3225  /* on same pass, but ahead or at least not behind */
3226  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
3227 #ifdef BGW_DEBUG
3228  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3229  next_passes, next_to_clean,
3230  strategy_passes, strategy_buf_id,
3231  strategy_delta, bufs_to_lap);
3232 #endif
3233  }
3234  else
3235  {
3236  /*
3237  * We're behind, so skip forward to the strategy point and start
3238  * cleaning from there.
3239  */
3240 #ifdef BGW_DEBUG
3241  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3242  next_passes, next_to_clean,
3243  strategy_passes, strategy_buf_id,
3244  strategy_delta);
3245 #endif
3246  next_to_clean = strategy_buf_id;
3247  next_passes = strategy_passes;
3248  bufs_to_lap = NBuffers;
3249  }
3250  }
3251  else
3252  {
3253  /*
3254  * Initializing at startup or after LRU scanning had been off. Always
3255  * start at the strategy point.
3256  */
3257 #ifdef BGW_DEBUG
3258  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3259  strategy_passes, strategy_buf_id);
3260 #endif
3261  strategy_delta = 0;
3262  next_to_clean = strategy_buf_id;
3263  next_passes = strategy_passes;
3264  bufs_to_lap = NBuffers;
3265  }
3266 
3267  /* Update saved info for next time */
3268  prev_strategy_buf_id = strategy_buf_id;
3269  prev_strategy_passes = strategy_passes;
3270  saved_info_valid = true;
3271 
3272  /*
3273  * Compute how many buffers had to be scanned for each new allocation, ie,
3274  * 1/density of reusable buffers, and track a moving average of that.
3275  *
3276  * If the strategy point didn't move, we don't update the density estimate
3277  */
3278  if (strategy_delta > 0 && recent_alloc > 0)
3279  {
3280  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
3281  smoothed_density += (scans_per_alloc - smoothed_density) /
3282  smoothing_samples;
3283  }
3284 
3285  /*
3286  * Estimate how many reusable buffers there are between the current
3287  * strategy point and where we've scanned ahead to, based on the smoothed
3288  * density estimate.
3289  */
3290  bufs_ahead = NBuffers - bufs_to_lap;
3291  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3292 
3293  /*
3294  * Track a moving average of recent buffer allocations. Here, rather than
3295  * a true average we want a fast-attack, slow-decline behavior: we
3296  * immediately follow any increase.
3297  */
3298  if (smoothed_alloc <= (float) recent_alloc)
3299  smoothed_alloc = recent_alloc;
3300  else
3301  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3302  smoothing_samples;
3303 
3304  /* Scale the estimate by a GUC to allow more aggressive tuning. */
3305  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
3306 
3307  /*
3308  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3309  * eventually underflow to zero, and the underflows produce annoying
3310  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3311  * zero, there's no point in tracking smaller and smaller values of
3312  * smoothed_alloc, so just reset it to exactly zero to avoid this
3313  * syndrome. It will pop back up as soon as recent_alloc increases.
3314  */
3315  if (upcoming_alloc_est == 0)
3316  smoothed_alloc = 0;
3317 
3318  /*
3319  * Even in cases where there's been little or no buffer allocation
3320  * activity, we want to make a small amount of progress through the buffer
3321  * cache so that as many reusable buffers as possible are clean after an
3322  * idle period.
3323  *
3324  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3325  * the BGW will be called during the scan_whole_pool time; slice the
3326  * buffer pool into that many sections.
3327  */
3328  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3329 
3330  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3331  {
3332 #ifdef BGW_DEBUG
3333  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3334  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3335 #endif
3336  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3337  }
3338 
3339  /*
3340  * Now write out dirty reusable buffers, working forward from the
3341  * next_to_clean point, until we have lapped the strategy scan, or cleaned
3342  * enough buffers to match our estimate of the next cycle's allocation
3343  * requirements, or hit the bgwriter_lru_maxpages limit.
3344  */
3345 
3346  num_to_scan = bufs_to_lap;
3347  num_written = 0;
3348  reusable_buffers = reusable_buffers_est;
3349 
3350  /* Execute the LRU scan */
3351  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3352  {
3353  int sync_state = SyncOneBuffer(next_to_clean, true,
3354  wb_context);
3355 
3356  if (++next_to_clean >= NBuffers)
3357  {
3358  next_to_clean = 0;
3359  next_passes++;
3360  }
3361  num_to_scan--;
3362 
3363  if (sync_state & BUF_WRITTEN)
3364  {
3365  reusable_buffers++;
3366  if (++num_written >= bgwriter_lru_maxpages)
3367  {
3369  break;
3370  }
3371  }
3372  else if (sync_state & BUF_REUSABLE)
3373  reusable_buffers++;
3374  }
3375 
3376  PendingBgWriterStats.buf_written_clean += num_written;
3377 
3378 #ifdef BGW_DEBUG
3379  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3380  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3381  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3382  bufs_to_lap - num_to_scan,
3383  num_written,
3384  reusable_buffers - reusable_buffers_est);
3385 #endif
3386 
3387  /*
3388  * Consider the above scan as being like a new allocation scan.
3389  * Characterize its density and update the smoothed one based on it. This
3390  * effectively halves the moving average period in cases where both the
3391  * strategy and the background writer are doing some useful scanning,
3392  * which is helpful because a long memory isn't as desirable on the
3393  * density estimates.
3394  */
3395  new_strategy_delta = bufs_to_lap - num_to_scan;
3396  new_recent_alloc = reusable_buffers - reusable_buffers_est;
3397  if (new_strategy_delta > 0 && new_recent_alloc > 0)
3398  {
3399  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3400  smoothed_density += (scans_per_alloc - smoothed_density) /
3401  smoothing_samples;
3402 
3403 #ifdef BGW_DEBUG
3404  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3405  new_recent_alloc, new_strategy_delta,
3406  scans_per_alloc, smoothed_density);
3407 #endif
3408  }
3409 
3410  /* Return true if OK to hibernate */
3411  return (bufs_to_lap == 0 && recent_alloc == 0);
3412 }
int BgWriterDelay
Definition: bgwriter.c:57
#define BUF_REUSABLE
Definition: bufmgr.c:76
double bgwriter_lru_multiplier
Definition: bufmgr.c:141
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:3429
int bgwriter_lru_maxpages
Definition: bufmgr.c:140
#define BUF_WRITTEN
Definition: bufmgr.c:75
signed int int32
Definition: c.h:494
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:224
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:394
int NBuffers
Definition: globals.c:139
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition: pgstat.h:255
PgStat_Counter maxwritten_clean
Definition: pgstat.h:256
PgStat_Counter buf_alloc
Definition: pgstat.h:257

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ BufferAlloc()

static pg_attribute_always_inline BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr,
IOContext  io_context 
)
inlinestatic

Definition at line 1548 of file bufmgr.c.

1552 {
1553  BufferTag newTag; /* identity of requested block */
1554  uint32 newHash; /* hash value for newTag */
1555  LWLock *newPartitionLock; /* buffer partition lock for it */
1556  int existing_buf_id;
1557  Buffer victim_buffer;
1558  BufferDesc *victim_buf_hdr;
1559  uint32 victim_buf_state;
1560 
1561  /* Make sure we will have room to remember the buffer pin */
1564 
1565  /* create a tag so we can lookup the buffer */
1566  InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
1567 
1568  /* determine its hash code and partition lock ID */
1569  newHash = BufTableHashCode(&newTag);
1570  newPartitionLock = BufMappingPartitionLock(newHash);
1571 
1572  /* see if the block is in the buffer pool already */
1573  LWLockAcquire(newPartitionLock, LW_SHARED);
1574  existing_buf_id = BufTableLookup(&newTag, newHash);
1575  if (existing_buf_id >= 0)
1576  {
1577  BufferDesc *buf;
1578  bool valid;
1579 
1580  /*
1581  * Found it. Now, pin the buffer so no one can steal it from the
1582  * buffer pool, and check to see if the correct data has been loaded
1583  * into the buffer.
1584  */
1585  buf = GetBufferDescriptor(existing_buf_id);
1586 
1587  valid = PinBuffer(buf, strategy);
1588 
1589  /* Can release the mapping lock as soon as we've pinned it */
1590  LWLockRelease(newPartitionLock);
1591 
1592  *foundPtr = true;
1593 
1594  if (!valid)
1595  {
1596  /*
1597  * We can only get here if (a) someone else is still reading in
1598  * the page, (b) a previous read attempt failed, or (c) someone
1599  * called StartReadBuffers() but not yet WaitReadBuffers().
1600  */
1601  *foundPtr = false;
1602  }
1603 
1604  return buf;
1605  }
1606 
1607  /*
1608  * Didn't find it in the buffer pool. We'll have to initialize a new
1609  * buffer. Remember to unlock the mapping lock while doing the work.
1610  */
1611  LWLockRelease(newPartitionLock);
1612 
1613  /*
1614  * Acquire a victim buffer. Somebody else might try to do the same, we
1615  * don't hold any conflicting locks. If so we'll have to undo our work
1616  * later.
1617  */
1618  victim_buffer = GetVictimBuffer(strategy, io_context);
1619  victim_buf_hdr = GetBufferDescriptor(victim_buffer - 1);
1620 
1621  /*
1622  * Try to make a hashtable entry for the buffer under its new tag. If
1623  * somebody else inserted another buffer for the tag, we'll release the
1624  * victim buffer we acquired and use the already inserted one.
1625  */
1626  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1627  existing_buf_id = BufTableInsert(&newTag, newHash, victim_buf_hdr->buf_id);
1628  if (existing_buf_id >= 0)
1629  {
1630  BufferDesc *existing_buf_hdr;
1631  bool valid;
1632 
1633  /*
1634  * Got a collision. Someone has already done what we were about to do.
1635  * We'll just handle this as if it were found in the buffer pool in
1636  * the first place. First, give up the buffer we were planning to
1637  * use.
1638  *
1639  * We could do this after releasing the partition lock, but then we'd
1640  * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry()
1641  * before acquiring the lock, for the rare case of such a collision.
1642  */
1643  UnpinBuffer(victim_buf_hdr);
1644 
1645  /*
1646  * The victim buffer we acquired previously is clean and unused, let
1647  * it be found again quickly
1648  */
1649  StrategyFreeBuffer(victim_buf_hdr);
1650 
1651  /* remaining code should match code at top of routine */
1652 
1653  existing_buf_hdr = GetBufferDescriptor(existing_buf_id);
1654 
1655  valid = PinBuffer(existing_buf_hdr, strategy);
1656 
1657  /* Can release the mapping lock as soon as we've pinned it */
1658  LWLockRelease(newPartitionLock);
1659 
1660  *foundPtr = true;
1661 
1662  if (!valid)
1663  {
1664  /*
1665  * We can only get here if (a) someone else is still reading in
1666  * the page, (b) a previous read attempt failed, or (c) someone
1667  * called StartReadBuffers() but not yet WaitReadBuffers().
1668  */
1669  *foundPtr = false;
1670  }
1671 
1672  return existing_buf_hdr;
1673  }
1674 
1675  /*
1676  * Need to lock the buffer header too in order to change its tag.
1677  */
1678  victim_buf_state = LockBufHdr(victim_buf_hdr);
1679 
1680  /* some sanity checks while we hold the buffer header lock */
1681  Assert(BUF_STATE_GET_REFCOUNT(victim_buf_state) == 1);
1682  Assert(!(victim_buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY | BM_IO_IN_PROGRESS)));
1683 
1684  victim_buf_hdr->tag = newTag;
1685 
1686  /*
1687  * Make sure BM_PERMANENT is set for buffers that must be written at every
1688  * checkpoint. Unlogged buffers only need to be written at shutdown
1689  * checkpoints, except for their "init" forks, which need to be treated
1690  * just like permanent relations.
1691  */
1692  victim_buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1693  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1694  victim_buf_state |= BM_PERMANENT;
1695 
1696  UnlockBufHdr(victim_buf_hdr, victim_buf_state);
1697 
1698  LWLockRelease(newPartitionLock);
1699 
1700  /*
1701  * Buffer contents are currently invalid.
1702  */
1703  *foundPtr = false;
1704 
1705  return victim_buf_hdr;
1706 }
int Buffer
Definition: buf.h:23
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_PERMANENT
Definition: buf_internals.h:69
static LWLock * BufMappingPartitionLock(uint32 hashcode)
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:46
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:51
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:78
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:118
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:2595
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
Definition: bufmgr.c:1892
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:249
static void UnpinBuffer(BufferDesc *buf)
Definition: bufmgr.c:2749
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:363
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1170
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1783
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
static char * buf
Definition: pg_test_fsync.c:73
@ INIT_FORKNUM
Definition: relpath.h:53
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition: resowner.c:442
Definition: lwlock.h:42
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), CurrentResourceOwner, GetBufferDescriptor(), GetVictimBuffer(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrRelationData::smgr_rlocator, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by PinBufferForBlock().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 3667 of file bufmgr.c.

3668 {
3669  BufferDesc *bufHdr;
3670 
3671  Assert(BufferIsPinned(buffer));
3672 
3673  if (BufferIsLocal(buffer))
3674  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3675  else
3676  bufHdr = GetBufferDescriptor(buffer - 1);
3677 
3678  /* pinned, so OK to read tag without spinlock */
3679  return bufHdr->tag.blockNum;
3680 }
#define BufferIsLocal(buffer)
Definition: buf.h:37
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:473

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_page_prune_and_freeze(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_noprune(), lazy_scan_prune(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and WaitReadBuffers().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 3928 of file bufmgr.c.

3929 {
3930  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
3931  char *page = BufferGetPage(buffer);
3932  XLogRecPtr lsn;
3933  uint32 buf_state;
3934 
3935  /*
3936  * If we don't need locking for correctness, fastpath out.
3937  */
3938  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
3939  return PageGetLSN(page);
3940 
3941  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3942  Assert(BufferIsValid(buffer));
3943  Assert(BufferIsPinned(buffer));
3944 
3945  buf_state = LockBufHdr(bufHdr);
3946  lsn = PageGetLSN(page);
3947  UnlockBufHdr(bufHdr, buf_state);
3948 
3949  return lsn;
3950 }
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:408
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
#define XLogHintBitIsNeeded()
Definition: xlog.h:118
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 3688 of file bufmgr.c.

3690 {
3691  BufferDesc *bufHdr;
3692 
3693  /* Do the same checks as BufferGetBlockNumber. */
3694  Assert(BufferIsPinned(buffer));
3695 
3696  if (BufferIsLocal(buffer))
3697  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3698  else
3699  bufHdr = GetBufferDescriptor(buffer - 1);
3700 
3701  /* pinned, so OK to read tag without spinlock */
3702  *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
3703  *forknum = BufTagGetForkNum(&bufHdr->tag);
3704  *blknum = bufHdr->tag.blockNum;
3705 }

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 2442 of file bufmgr.c.

2443 {
2444  BufferDesc *bufHdr;
2445 
2446  if (BufferIsLocal(buffer))
2447  {
2448  int bufid = -buffer - 1;
2449 
2450  bufHdr = GetLocalBufferDescriptor(bufid);
2451  }
2452  else
2453  {
2454  bufHdr = GetBufferDescriptor(buffer - 1);
2455  }
2456 
2457  Assert(BufferIsPinned(buffer));
2459  LW_EXCLUSIVE));
2460 
2461  return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
2462 }
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:234
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1939
pg_atomic_uint32 state

References Assert, BM_DIRTY, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by XLogRegisterBuffer().

◆ BufferIsExclusiveLocked()

bool BufferIsExclusiveLocked ( Buffer  buffer)

Definition at line 2413 of file bufmgr.c.

2414 {
2415  BufferDesc *bufHdr;
2416 
2417  if (BufferIsLocal(buffer))
2418  {
2419  int bufid = -buffer - 1;
2420 
2421  bufHdr = GetLocalBufferDescriptor(bufid);
2422  }
2423  else
2424  {
2425  bufHdr = GetBufferDescriptor(buffer - 1);
2426  }
2427 
2428  Assert(BufferIsPinned(buffer));
2430  LW_EXCLUSIVE);
2431 }

References Assert, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), LW_EXCLUSIVE, and LWLockHeldByMeInMode().

Referenced by XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 3898 of file bufmgr.c.

3899 {
3900  BufferDesc *bufHdr;
3901 
3902  /* Local buffers are used only for temp relations. */
3903  if (BufferIsLocal(buffer))
3904  return false;
3905 
3906  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3907  Assert(BufferIsValid(buffer));
3908  Assert(BufferIsPinned(buffer));
3909 
3910  /*
3911  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
3912  * need not bother with the buffer header spinlock. Even if someone else
3913  * changes the buffer header state while we're doing this, the state is
3914  * changed atomically, so we'll read the old value or the new value, but
3915  * not random garbage.
3916  */
3917  bufHdr = GetBufferDescriptor(buffer - 1);
3918  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
3919 }

References Assert, BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 2855 of file bufmgr.c.

2856 {
2857  uint32 buf_state;
2858  int buf_id;
2859  int num_to_scan;
2860  int num_spaces;
2861  int num_processed;
2862  int num_written;
2863  CkptTsStatus *per_ts_stat = NULL;
2864  Oid last_tsid;
2865  binaryheap *ts_heap;
2866  int i;
2867  int mask = BM_DIRTY;
2868  WritebackContext wb_context;
2869 
2870  /*
2871  * Unless this is a shutdown checkpoint or we have been explicitly told,
2872  * we write only permanent, dirty buffers. But at shutdown or end of
2873  * recovery, we write all dirty buffers.
2874  */
2877  mask |= BM_PERMANENT;
2878 
2879  /*
2880  * Loop over all buffers, and mark the ones that need to be written with
2881  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
2882  * can estimate how much work needs to be done.
2883  *
2884  * This allows us to write only those pages that were dirty when the
2885  * checkpoint began, and not those that get dirtied while it proceeds.
2886  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
2887  * later in this function, or by normal backends or the bgwriter cleaning
2888  * scan, the flag is cleared. Any buffer dirtied after this point won't
2889  * have the flag set.
2890  *
2891  * Note that if we fail to write some buffer, we may leave buffers with
2892  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
2893  * certainly need to be written for the next checkpoint attempt, too.
2894  */
2895  num_to_scan = 0;
2896  for (buf_id = 0; buf_id < NBuffers; buf_id++)
2897  {
2898  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
2899 
2900  /*
2901  * Header spinlock is enough to examine BM_DIRTY, see comment in
2902  * SyncOneBuffer.
2903  */
2904  buf_state = LockBufHdr(bufHdr);
2905 
2906  if ((buf_state & mask) == mask)
2907  {
2908  CkptSortItem *item;
2909 
2910  buf_state |= BM_CHECKPOINT_NEEDED;
2911 
2912  item = &CkptBufferIds[num_to_scan++];
2913  item->buf_id = buf_id;
2914  item->tsId = bufHdr->tag.spcOid;
2915  item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
2916  item->forkNum = BufTagGetForkNum(&bufHdr->tag);
2917  item->blockNum = bufHdr->tag.blockNum;
2918  }
2919 
2920  UnlockBufHdr(bufHdr, buf_state);
2921 
2922  /* Check for barrier events in case NBuffers is large. */
2925  }
2926 
2927  if (num_to_scan == 0)
2928  return; /* nothing to do */
2929 
2931 
2932  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
2933 
2934  /*
2935  * Sort buffers that need to be written to reduce the likelihood of random
2936  * IO. The sorting is also important for the implementation of balancing
2937  * writes between tablespaces. Without balancing writes we'd potentially
2938  * end up writing to the tablespaces one-by-one; possibly overloading the
2939  * underlying system.
2940  */
2941  sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
2942 
2943  num_spaces = 0;
2944 
2945  /*
2946  * Allocate progress status for each tablespace with buffers that need to
2947  * be flushed. This requires the to-be-flushed array to be sorted.
2948  */
2949  last_tsid = InvalidOid;
2950  for (i = 0; i < num_to_scan; i++)
2951  {
2952  CkptTsStatus *s;
2953  Oid cur_tsid;
2954 
2955  cur_tsid = CkptBufferIds[i].tsId;
2956 
2957  /*
2958  * Grow array of per-tablespace status structs, every time a new
2959  * tablespace is found.
2960  */
2961  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
2962  {
2963  Size sz;
2964 
2965  num_spaces++;
2966 
2967  /*
2968  * Not worth adding grow-by-power-of-2 logic here - even with a
2969  * few hundred tablespaces this should be fine.
2970  */
2971  sz = sizeof(CkptTsStatus) * num_spaces;
2972 
2973  if (per_ts_stat == NULL)
2974  per_ts_stat = (CkptTsStatus *) palloc(sz);
2975  else
2976  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
2977 
2978  s = &per_ts_stat[num_spaces - 1];
2979  memset(s, 0, sizeof(*s));
2980  s->tsId = cur_tsid;
2981 
2982  /*
2983  * The first buffer in this tablespace. As CkptBufferIds is sorted
2984  * by tablespace all (s->num_to_scan) buffers in this tablespace
2985  * will follow afterwards.
2986  */
2987  s->index = i;
2988 
2989  /*
2990  * progress_slice will be determined once we know how many buffers
2991  * are in each tablespace, i.e. after this loop.
2992  */
2993 
2994  last_tsid = cur_tsid;
2995  }
2996  else
2997  {
2998  s = &per_ts_stat[num_spaces - 1];
2999  }
3000 
3001  s->num_to_scan++;
3002 
3003  /* Check for barrier events. */
3006  }
3007 
3008  Assert(num_spaces > 0);
3009 
3010  /*
3011  * Build a min-heap over the write-progress in the individual tablespaces,
3012  * and compute how large a portion of the total progress a single
3013  * processed buffer is.
3014  */
3015  ts_heap = binaryheap_allocate(num_spaces,
3017  NULL);
3018 
3019  for (i = 0; i < num_spaces; i++)
3020  {
3021  CkptTsStatus *ts_stat = &per_ts_stat[i];
3022 
3023  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
3024 
3025  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
3026  }
3027 
3028  binaryheap_build(ts_heap);
3029 
3030  /*
3031  * Iterate through to-be-checkpointed buffers and write the ones (still)
3032  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
3033  * tablespaces; otherwise the sorting would lead to only one tablespace
3034  * receiving writes at a time, making inefficient use of the hardware.
3035  */
3036  num_processed = 0;
3037  num_written = 0;
3038  while (!binaryheap_empty(ts_heap))
3039  {
3040  BufferDesc *bufHdr = NULL;
3041  CkptTsStatus *ts_stat = (CkptTsStatus *)
3043 
3044  buf_id = CkptBufferIds[ts_stat->index].buf_id;
3045  Assert(buf_id != -1);
3046 
3047  bufHdr = GetBufferDescriptor(buf_id);
3048 
3049  num_processed++;
3050 
3051  /*
3052  * We don't need to acquire the lock here, because we're only looking
3053  * at a single bit. It's possible that someone else writes the buffer
3054  * and clears the flag right after we check, but that doesn't matter
3055  * since SyncOneBuffer will then do nothing. However, there is a
3056  * further race condition: it's conceivable that between the time we
3057  * examine the bit here and the time SyncOneBuffer acquires the lock,
3058  * someone else not only wrote the buffer but replaced it with another
3059  * page and dirtied it. In that improbable case, SyncOneBuffer will
3060  * write the buffer though we didn't need to. It doesn't seem worth
3061  * guarding against this, though.
3062  */
3064  {
3065  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
3066  {
3067  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
3069  num_written++;
3070  }
3071  }
3072 
3073  /*
3074  * Measure progress independent of actually having to flush the buffer
3075  * - otherwise writing become unbalanced.
3076  */
3077  ts_stat->progress += ts_stat->progress_slice;
3078  ts_stat->num_scanned++;
3079  ts_stat->index++;
3080 
3081  /* Have all the buffers from the tablespace been processed? */
3082  if (ts_stat->num_scanned == ts_stat->num_to_scan)
3083  {
3084  binaryheap_remove_first(ts_heap);
3085  }
3086  else
3087  {
3088  /* update heap with the new progress */
3089  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
3090  }
3091 
3092  /*
3093  * Sleep to throttle our I/O rate.
3094  *
3095  * (This will check for barrier events even if it doesn't sleep.)
3096  */
3097  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
3098  }
3099 
3100  /*
3101  * Issue all pending flushes. Only checkpointer calls BufferSync(), so
3102  * IOContext will always be IOCONTEXT_NORMAL.
3103  */
3105 
3106  pfree(per_ts_stat);
3107  per_ts_stat = NULL;
3108  binaryheap_free(ts_heap);
3109 
3110  /*
3111  * Update checkpoint statistics. As noted above, this doesn't include
3112  * buffers written by other backends or bgwriter scan.
3113  */
3114  CheckpointStats.ckpt_bufs_written += num_written;
3115 
3116  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
3117 }
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:138
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:255
bh_node_type binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:177
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:192
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:39
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:75
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:116
#define binaryheap_empty(h)
Definition: binaryheap.h:65
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:68
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:5807
int checkpoint_flush_after
Definition: bufmgr.c:170
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:5830
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
Definition: bufmgr.c:5887
struct CkptTsStatus CkptTsStatus
double float8
Definition: c.h:630
size_t Size
Definition: c.h:605
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:711
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:38
int i
Definition: isn.c:73
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
void * palloc(Size size)
Definition: mcxt.c:1316
@ IOCONTEXT_NORMAL
Definition: pgstat.h:290
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:464
int ckpt_bufs_written
Definition: xlog.h:165
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition: bufmgr.c:114
int index
Definition: bufmgr.c:122
int num_scanned
Definition: bufmgr.c:119
float8 progress
Definition: bufmgr.c:113
int num_to_scan
Definition: bufmgr.c:117
Oid tsId
Definition: bufmgr.c:104
PgStat_Counter buffers_written
Definition: pgstat.h:270
Oid spcOid
Definition: buf_internals.h:94
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:138
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:141
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:137

References Assert, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buffers_written, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, DatumGetPointer(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IOCONTEXT_NORMAL, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u32(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), buftag::spcOid, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 5742 of file bufmgr.c.

5743 {
5744  int ret;
5745  RelFileLocator rlocatora;
5746  RelFileLocator rlocatorb;
5747 
5748  rlocatora = BufTagGetRelFileLocator(ba);
5749  rlocatorb = BufTagGetRelFileLocator(bb);
5750 
5751  ret = rlocator_comparator(&rlocatora, &rlocatorb);
5752 
5753  if (ret != 0)
5754  return ret;
5755 
5756  if (BufTagGetForkNum(ba) < BufTagGetForkNum(bb))
5757  return -1;
5758  if (BufTagGetForkNum(ba) > BufTagGetForkNum(bb))
5759  return 1;
5760 
5761  if (ba->blockNum < bb->blockNum)
5762  return -1;
5763  if (ba->blockNum > bb->blockNum)
5764  return 1;
5765 
5766  return 0;
5767 }
static int rlocator_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:5661

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), and rlocator_comparator().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 5132 of file bufmgr.c.

5133 {
5134  if (BufferIsLocal(buffer))
5135  {
5136  if (LocalRefCount[-buffer - 1] != 1)
5137  elog(ERROR, "incorrect local pin count: %d",
5138  LocalRefCount[-buffer - 1]);
5139  }
5140  else
5141  {
5142  if (GetPrivateRefCount(buffer) != 1)
5143  elog(ERROR, "incorrect local pin count: %d",
5144  GetPrivateRefCount(buffer));
5145  }
5146 }
#define ERROR
Definition: elog.h:39

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), and LockBufferForCleanup().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 3562 of file bufmgr.c.

3563 {
3564 #ifdef USE_ASSERT_CHECKING
3565  int RefCountErrors = 0;
3567  int i;
3568  char *s;
3569 
3570  /* check the array */
3571  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
3572  {
3574 
3575  if (res->buffer != InvalidBuffer)
3576  {
3577  s = DebugPrintBufferRefcount(res->buffer);
3578  elog(WARNING, "buffer refcount leak: %s", s);
3579  pfree(s);
3580 
3581  RefCountErrors++;
3582  }
3583  }
3584 
3585  /* if necessary search the hash */
3587  {
3588  HASH_SEQ_STATUS hstat;
3589 
3591  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
3592  {
3593  s = DebugPrintBufferRefcount(res->buffer);
3594  elog(WARNING, "buffer refcount leak: %s", s);
3595  pfree(s);
3596  RefCountErrors++;
3597  }
3598  }
3599 
3600  Assert(RefCountErrors == 0);
3601 #endif
3602 }
#define InvalidBuffer
Definition: buf.h:25
char * DebugPrintBufferRefcount(Buffer buffer)
Definition: bufmgr.c:3608
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:95
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:207
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:208
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1395
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385

References Assert, DebugPrintBufferRefcount(), elog, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, pfree(), PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, and WARNING.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 3653 of file bufmgr.c.

3654 {
3655  BufferSync(flags);
3656 }
static void BufferSync(int flags)
Definition: bufmgr.c:2855

References BufferSync().

Referenced by CheckPointGuts().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 5776 of file bufmgr.c.

5777 {
5778  /* compare tablespace */
5779  if (a->tsId < b->tsId)
5780  return -1;
5781  else if (a->tsId > b->tsId)
5782  return 1;
5783  /* compare relation */
5784  if (a->relNumber < b->relNumber)
5785  return -1;
5786  else if (a->relNumber > b->relNumber)
5787  return 1;
5788  /* compare fork */
5789  else if (a->forkNum < b->forkNum)
5790  return -1;
5791  else if (a->forkNum > b->forkNum)
5792  return 1;
5793  /* compare block number */
5794  else if (a->blockNum < b->blockNum)
5795  return -1;
5796  else if (a->blockNum > b->blockNum)
5797  return 1;
5798  /* equal page IDs are unlikely, but not impossible */
5799  return 0;
5800 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69

References a, and b.

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 5111 of file bufmgr.c.

5112 {
5113  BufferDesc *buf;
5114 
5115  Assert(BufferIsPinned(buffer));
5116  if (BufferIsLocal(buffer))
5117  return true; /* act as though we got it */
5118 
5119  buf = GetBufferDescriptor(buffer - 1);
5120 
5122  LW_EXCLUSIVE);
5123 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1341

References Assert, buf, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 5326 of file bufmgr.c.

5327 {
5328  BufferDesc *bufHdr;
5329  uint32 buf_state,
5330  refcount;
5331 
5332  Assert(BufferIsValid(buffer));
5333 
5334  if (BufferIsLocal(buffer))
5335  {
5336  refcount = LocalRefCount[-buffer - 1];
5337  /* There should be exactly one pin */
5338  Assert(refcount > 0);
5339  if (refcount != 1)
5340  return false;
5341  /* Nobody else to wait for */
5342  return true;
5343  }
5344 
5345  /* There should be exactly one local pin */
5346  refcount = GetPrivateRefCount(buffer);
5347  Assert(refcount);
5348  if (refcount != 1)
5349  return false;
5350 
5351  /* Try to acquire lock */
5352  if (!ConditionalLockBuffer(buffer))
5353  return false;
5354 
5355  bufHdr = GetBufferDescriptor(buffer - 1);
5356  buf_state = LockBufHdr(bufHdr);
5357  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
5358 
5359  Assert(refcount > 0);
5360  if (refcount == 1)
5361  {
5362  /* Successfully acquired exclusive lock with pincount 1 */
5363  UnlockBufHdr(bufHdr, buf_state);
5364  return true;
5365  }
5366 
5367  /* Failed, so release the lock */
5368  UnlockBufHdr(bufHdr, buf_state);
5369  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5370  return false;
5371 }
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5111
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5085
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:197

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 4724 of file bufmgr.c.

4726 {
4727  char relpersistence;
4728  SMgrRelation src_rel;
4729  SMgrRelation dst_rel;
4730 
4731  /* Set the relpersistence. */
4732  relpersistence = permanent ?
4733  RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
4734 
4735  src_rel = smgropen(src_rlocator, INVALID_PROC_NUMBER);
4736  dst_rel = smgropen(dst_rlocator, INVALID_PROC_NUMBER);
4737 
4738  /*
4739  * Create and copy all forks of the relation. During create database we
4740  * have a separate cleanup mechanism which deletes complete database
4741  * directory. Therefore, each individual relation doesn't need to be
4742  * registered for cleanup.
4743  */
4744  RelationCreateStorage(dst_rlocator, relpersistence, false);
4745 
4746  /* copy main fork. */
4747  RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM,
4748  permanent);
4749 
4750  /* copy those extra forks that exist */
4751  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
4752  forkNum <= MAX_FORKNUM; forkNum++)
4753  {
4754  if (smgrexists(src_rel, forkNum))
4755  {
4756  smgrcreate(dst_rel, forkNum, false);
4757 
4758  /*
4759  * WAL log creation if the relation is persistent, or this is the
4760  * init fork of an unlogged relation.
4761  */
4762  if (permanent || forkNum == INIT_FORKNUM)
4763  log_smgrcreate(&dst_rlocator, forkNum);
4764 
4765  /* Copy a fork's data, block by block. */
4766  RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
4767  permanent);
4768  }
4769  }
4770 }
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition: bufmgr.c:4633
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
ForkNumber
Definition: relpath.h:48
@ MAIN_FORKNUM
Definition: relpath.h:50
#define MAX_FORKNUM
Definition: relpath.h:62
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:198
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:411
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:398
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:121
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:186

References INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char* DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 3608 of file bufmgr.c.

3609 {
3610  BufferDesc *buf;
3611  int32 loccount;
3612  char *path;
3613  char *result;
3614  ProcNumber backend;
3615  uint32 buf_state;
3616 
3617  Assert(BufferIsValid(buffer));
3618  if (BufferIsLocal(buffer))
3619  {
3620  buf = GetLocalBufferDescriptor(-buffer - 1);
3621  loccount = LocalRefCount[-buffer - 1];
3622  backend = MyProcNumber;
3623  }
3624  else
3625  {
3626  buf = GetBufferDescriptor(buffer - 1);
3627  loccount = GetPrivateRefCount(buffer);
3628  backend = INVALID_PROC_NUMBER;
3629  }
3630 
3631  /* theoretically we should lock the bufhdr here */
3632  path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend,
3633  BufTagGetForkNum(&buf->tag));
3634  buf_state = pg_atomic_read_u32(&buf->state);
3635 
3636  result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
3637  buffer, path,
3638  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
3639  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
3640  pfree(path);
3641  return result;
3642 }
#define BUF_FLAG_MASK
Definition: buf_internals.h:48
ProcNumber MyProcNumber
Definition: globals.c:87
int ProcNumber
Definition: procnumber.h:24
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:85

References Assert, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pfree(), pg_atomic_read_u32(), psprintf(), and relpathbackend.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBufferPin().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 4329 of file bufmgr.c.

4330 {
4331  int i;
4332 
4333  /*
4334  * We needn't consider local buffers, since by assumption the target
4335  * database isn't our own.
4336  */
4337 
4338  for (i = 0; i < NBuffers; i++)
4339  {
4340  BufferDesc *bufHdr = GetBufferDescriptor(i);
4341  uint32 buf_state;
4342 
4343  /*
4344  * As in DropRelationBuffers, an unlocked precheck should be safe and
4345  * saves some cycles.
4346  */
4347  if (bufHdr->tag.dbOid != dbid)
4348  continue;
4349 
4350  buf_state = LockBufHdr(bufHdr);
4351  if (bufHdr->tag.dbOid == dbid)
4352  InvalidateBuffer(bufHdr); /* releases spinlock */
4353  else
4354  UnlockBufHdr(bufHdr, buf_state);
4355  }
4356 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1726
Oid dbOid
Definition: buf_internals.h:95

References buftag::dbOid, GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, BufferDesc::tag, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 3974 of file bufmgr.c.

3976 {
3977  int i;
3978  int j;
3979  RelFileLocatorBackend rlocator;
3980  BlockNumber nForkBlock[MAX_FORKNUM];
3981  uint64 nBlocksToInvalidate = 0;
3982 
3983  rlocator = smgr_reln->smgr_rlocator;
3984 
3985  /* If it's a local relation, it's localbuf.c's problem. */
3986  if (RelFileLocatorBackendIsTemp(rlocator))
3987  {
3988  if (rlocator.backend == MyProcNumber)
3989  {
3990  for (j = 0; j < nforks; j++)
3991  DropRelationLocalBuffers(rlocator.locator, forkNum[j],
3992  firstDelBlock[j]);
3993  }
3994  return;
3995  }
3996 
3997  /*
3998  * To remove all the pages of the specified relation forks from the buffer
3999  * pool, we need to scan the entire buffer pool but we can optimize it by
4000  * finding the buffers from BufMapping table provided we know the exact
4001  * size of each fork of the relation. The exact size is required to ensure
4002  * that we don't leave any buffer for the relation being dropped as
4003  * otherwise the background writer or checkpointer can lead to a PANIC
4004  * error while flushing buffers corresponding to files that don't exist.
4005  *
4006  * To know the exact size, we rely on the size cached for each fork by us
4007  * during recovery which limits the optimization to recovery and on
4008  * standbys but we can easily extend it once we have shared cache for
4009  * relation size.
4010  *
4011  * In recovery, we cache the value returned by the first lseek(SEEK_END)
4012  * and the future writes keeps the cached value up-to-date. See
4013  * smgrextend. It is possible that the value of the first lseek is smaller
4014  * than the actual number of existing blocks in the file due to buggy
4015  * Linux kernels that might not have accounted for the recent write. But
4016  * that should be fine because there must not be any buffers after that
4017  * file size.
4018  */
4019  for (i = 0; i < nforks; i++)
4020  {
4021  /* Get the number of blocks for a relation's fork */
4022  nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
4023 
4024  if (nForkBlock[i] == InvalidBlockNumber)
4025  {
4026  nBlocksToInvalidate = InvalidBlockNumber;
4027  break;
4028  }
4029 
4030  /* calculate the number of blocks to be invalidated */
4031  nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
4032  }
4033 
4034  /*
4035  * We apply the optimization iff the total number of blocks to invalidate
4036  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4037  */
4038  if (BlockNumberIsValid(nBlocksToInvalidate) &&
4039  nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4040  {
4041  for (j = 0; j < nforks; j++)
4042  FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4043  nForkBlock[j], firstDelBlock[j]);
4044  return;
4045  }
4046 
4047  for (i = 0; i < NBuffers; i++)
4048  {
4049  BufferDesc *bufHdr = GetBufferDescriptor(i);
4050  uint32 buf_state;
4051 
4052  /*
4053  * We can make this a tad faster by prechecking the buffer tag before
4054  * we attempt to lock the buffer; this saves a lot of lock
4055  * acquisitions in typical cases. It should be safe because the
4056  * caller must have AccessExclusiveLock on the relation, or some other
4057  * reason to be certain that no one is loading new pages of the rel
4058  * into the buffer pool. (Otherwise we might well miss such pages
4059  * entirely.) Therefore, while the tag might be changing while we
4060  * look at it, it can't be changing *to* a value we care about, only
4061  * *away* from such a value. So false negatives are impossible, and
4062  * false positives are safe because we'll recheck after getting the
4063  * buffer lock.
4064  *
4065  * We could check forkNum and blockNum as well as the rlocator, but
4066  * the incremental win from doing so seems small.
4067  */
4068  if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4069  continue;
4070 
4071  buf_state = LockBufHdr(bufHdr);
4072 
4073  for (j = 0; j < nforks; j++)
4074  {
4075  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4076  BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4077  bufHdr->tag.blockNum >= firstDelBlock[j])
4078  {
4079  InvalidateBuffer(bufHdr); /* releases spinlock */
4080  break;
4081  }
4082  }
4083  if (j >= nforks)
4084  UnlockBufHdr(bufHdr, buf_state);
4085  }
4086 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:86
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:4268
int j
Definition: isn.c:74
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:489
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:679

References RelFileLocatorBackend::backend, buftag::blockNum, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, SMgrRelationData::smgr_rlocator, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4097 of file bufmgr.c.

4098 {
4099  int i;
4100  int n = 0;
4101  SMgrRelation *rels;
4102  BlockNumber (*block)[MAX_FORKNUM + 1];
4103  uint64 nBlocksToInvalidate = 0;
4104  RelFileLocator *locators;
4105  bool cached = true;
4106  bool use_bsearch;
4107 
4108  if (nlocators == 0)
4109  return;
4110 
4111  rels = palloc(sizeof(SMgrRelation) * nlocators); /* non-local relations */
4112 
4113  /* If it's a local relation, it's localbuf.c's problem. */
4114  for (i = 0; i < nlocators; i++)
4115  {
4116  if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4117  {
4118  if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4119  DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4120  }
4121  else
4122  rels[n++] = smgr_reln[i];
4123  }
4124 
4125  /*
4126  * If there are no non-local relations, then we're done. Release the
4127  * memory and return.
4128  */
4129  if (n == 0)
4130  {
4131  pfree(rels);
4132  return;
4133  }
4134 
4135  /*
4136  * This is used to remember the number of blocks for all the relations
4137  * forks.
4138  */
4139  block = (BlockNumber (*)[MAX_FORKNUM + 1])
4140  palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4141 
4142  /*
4143  * We can avoid scanning the entire buffer pool if we know the exact size
4144  * of each of the given relation forks. See DropRelationBuffers.
4145  */
4146  for (i = 0; i < n && cached; i++)
4147  {
4148  for (int j = 0; j <= MAX_FORKNUM; j++)
4149  {
4150  /* Get the number of blocks for a relation's fork. */
4151  block[i][j] = smgrnblocks_cached(rels[i], j);
4152 
4153  /* We need to only consider the relation forks that exists. */
4154  if (block[i][j] == InvalidBlockNumber)
4155  {
4156  if (!smgrexists(rels[i], j))
4157  continue;
4158  cached = false;
4159  break;
4160  }
4161 
4162  /* calculate the total number of blocks to be invalidated */
4163  nBlocksToInvalidate += block[i][j];
4164  }
4165  }
4166 
4167  /*
4168  * We apply the optimization iff the total number of blocks to invalidate
4169  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4170  */
4171  if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4172  {
4173  for (i = 0; i < n; i++)
4174  {
4175  for (int j = 0; j <= MAX_FORKNUM; j++)
4176  {
4177  /* ignore relation forks that doesn't exist */
4178  if (!BlockNumberIsValid(block[i][j]))
4179  continue;
4180 
4181  /* drop all the buffers for a particular relation fork */
4182  FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4183  j, block[i][j], 0);
4184  }
4185  }
4186 
4187  pfree(block);
4188  pfree(rels);
4189  return;
4190  }
4191 
4192  pfree(block);
4193  locators = palloc(sizeof(RelFileLocator) * n); /* non-local relations */
4194  for (i = 0; i < n; i++)
4195  locators[i] = rels[i]->smgr_rlocator.locator;
4196 
4197  /*
4198  * For low number of relations to drop just use a simple walk through, to
4199  * save the bsearch overhead. The threshold to use is rather a guess than
4200  * an exactly determined value, as it depends on many factors (CPU and RAM
4201  * speeds, amount of shared buffers etc.).
4202  */
4203  use_bsearch = n > RELS_BSEARCH_THRESHOLD;
4204 
4205  /* sort the list of rlocators if necessary */
4206  if (use_bsearch)
4207  qsort(locators, n, sizeof(RelFileLocator), rlocator_comparator);
4208 
4209  for (i = 0; i < NBuffers; i++)
4210  {
4211  RelFileLocator *rlocator = NULL;
4212  BufferDesc *bufHdr = GetBufferDescriptor(i);
4213  uint32 buf_state;
4214 
4215  /*
4216  * As in DropRelationBuffers, an unlocked precheck should be safe and
4217  * saves some cycles.
4218  */
4219 
4220  if (!use_bsearch)
4221  {
4222  int j;
4223 
4224  for (j = 0; j < n; j++)
4225  {
4226  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
4227  {
4228  rlocator = &locators[j];
4229  break;
4230  }
4231  }
4232  }
4233  else
4234  {
4235  RelFileLocator locator;
4236 
4237  locator = BufTagGetRelFileLocator(&bufHdr->tag);
4238  rlocator = bsearch((const void *) &(locator),
4239  locators, n, sizeof(RelFileLocator),
4241  }
4242 
4243  /* buffer doesn't belong to any of the given relfilelocators; skip it */
4244  if (rlocator == NULL)
4245  continue;
4246 
4247  buf_state = LockBufHdr(bufHdr);
4248  if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4249  InvalidateBuffer(bufHdr); /* releases spinlock */
4250  else
4251  UnlockBufHdr(bufHdr, buf_state);
4252  }
4253 
4254  pfree(locators);
4255  pfree(rels);
4256 }
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:78
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition: localbuf.c:537
#define qsort(a, b, c, d)
Definition: port.h:449

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, if(), InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf)

Definition at line 6023 of file bufmgr.c.

6024 {
6025  BufferDesc *desc;
6026  uint32 buf_state;
6027  bool result;
6028 
6029  /* Make sure we can pin the buffer. */
6032 
6034  desc = GetBufferDescriptor(buf - 1);
6035 
6036  /* Lock the header and check if it's valid. */
6037  buf_state = LockBufHdr(desc);
6038  if ((buf_state & BM_VALID) == 0)
6039  {
6040  UnlockBufHdr(desc, buf_state);
6041  return false;
6042  }
6043 
6044  /* Check that it's not pinned already. */
6045  if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
6046  {
6047  UnlockBufHdr(desc, buf_state);
6048  return false;
6049  }
6050 
6051  PinBuffer_Locked(desc); /* releases spinlock */
6052 
6053  /* If it was dirty, try to clean it once. */
6054  if (buf_state & BM_DIRTY)
6055  {
6059  }
6060 
6061  /* This will return false if it becomes dirty or someone else pins it. */
6062  result = InvalidateVictimBuffer(desc);
6063 
6064  UnpinBuffer(desc);
6065 
6066  return result;
6067 }
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:3727
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:2706
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
Definition: bufmgr.c:1824
@ IOOBJECT_RELATION
Definition: pgstat.h:280

References Assert, BM_DIRTY, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock(), BufferIsLocal, CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), InvalidateVictimBuffer(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), and UnpinBuffer().

Referenced by pg_buffercache_evict().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 845 of file bufmgr.c.

849 {
850  Buffer buf;
851  uint32 extend_by = 1;
852 
853  ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
854  &buf, &extend_by);
855 
856  return buf;
857 }
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:877

References buf, and ExtendBufferedRelBy().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 877 of file bufmgr.c.

884 {
885  Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
886  Assert(bmr.smgr == NULL || bmr.relpersistence != 0);
887  Assert(extend_by > 0);
888 
889  if (bmr.smgr == NULL)
890  {
891  bmr.smgr = RelationGetSmgr(bmr.rel);
892  bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
893  }
894 
895  return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
896  extend_by, InvalidBlockNumber,
897  buffers, extended_by);
898 }
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2089
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
struct SMgrRelationData * smgr
Definition: bufmgr.h:103
Form_pg_class rd_rel
Definition: rel.h:111

References Assert, ExtendBufferedRelCommon(), InvalidBlockNumber, RelationData::rd_rel, BufferManagerRelation::rel, RelationGetSmgr(), BufferManagerRelation::relpersistence, and BufferManagerRelation::smgr.

Referenced by ExtendBufferedRel(), and RelationAddBlocks().

◆ ExtendBufferedRelCommon()

static BlockNumber ExtendBufferedRelCommon ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2089 of file bufmgr.c.

2097 {
2098  BlockNumber first_block;
2099 
2100  TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
2104  bmr.smgr->smgr_rlocator.backend,
2105  extend_by);
2106 
2107  if (bmr.relpersistence == RELPERSISTENCE_TEMP)
2108  first_block = ExtendBufferedRelLocal(bmr, fork, flags,
2109  extend_by, extend_upto,
2110  buffers, &extend_by);
2111  else
2112  first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags,
2113  extend_by, extend_upto,
2114  buffers, &extend_by);
2115  *extended_by = extend_by;
2116 
2117  TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
2121  bmr.smgr->smgr_rlocator.backend,
2122  *extended_by,
2123  first_block);
2124 
2125  return first_block;
2126 }
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2133
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: localbuf.c:313
RelFileNumber relNumber

References RelFileLocatorBackend::backend, RelFileLocator::dbOid, ExtendBufferedRelLocal(), ExtendBufferedRelShared(), RelFileLocatorBackend::locator, RelFileLocator::relNumber, BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, SMgrRelationData::smgr_rlocator, and RelFileLocator::spcOid.

Referenced by ExtendBufferedRelBy(), and ExtendBufferedRelTo().

◆ ExtendBufferedRelShared()

static BlockNumber ExtendBufferedRelShared ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2133 of file bufmgr.c.

2141 {
2142  BlockNumber first_block;
2143  IOContext io_context = IOContextForStrategy(strategy);
2144  instr_time io_start;
2145 
2146  LimitAdditionalPins(&extend_by);
2147 
2148  /*
2149  * Acquire victim buffers for extension without holding extension lock.
2150  * Writing out victim buffers is the most expensive part of extending the
2151  * relation, particularly when doing so requires WAL flushes. Zeroing out
2152  * the buffers is also quite expensive, so do that before holding the
2153  * extension lock as well.
2154  *
2155  * These pages are pinned by us and not valid. While we hold the pin they
2156  * can't be acquired as victim buffers by another backend.
2157  */
2158  for (uint32 i = 0; i < extend_by; i++)
2159  {
2160  Block buf_block;
2161 
2162  buffers[i] = GetVictimBuffer(strategy, io_context);
2163  buf_block = BufHdrGetBlock(GetBufferDescriptor(buffers[i] - 1));
2164 
2165  /* new buffers are zero-filled */
2166  MemSet((char *) buf_block, 0, BLCKSZ);
2167  }
2168 
2169  /*
2170  * Lock relation against concurrent extensions, unless requested not to.
2171  *
2172  * We use the same extension lock for all forks. That's unnecessarily
2173  * restrictive, but currently extensions for forks don't happen often
2174  * enough to make it worth locking more granularly.
2175  *
2176  * Note that another backend might have extended the relation by the time
2177  * we get the lock.
2178  */
2179  if (!(flags & EB_SKIP_EXTENSION_LOCK))
2181 
2182  /*
2183  * If requested, invalidate size cache, so that smgrnblocks asks the
2184  * kernel.
2185  */
2186  if (flags & EB_CLEAR_SIZE_CACHE)
2188 
2189  first_block = smgrnblocks(bmr.smgr, fork);
2190 
2191  /*
2192  * Now that we have the accurate relation size, check if the caller wants
2193  * us to extend to only up to a specific size. If there were concurrent
2194  * extensions, we might have acquired too many buffers and need to release
2195  * them.
2196  */
2197  if (extend_upto != InvalidBlockNumber)
2198  {
2199  uint32 orig_extend_by = extend_by;
2200 
2201  if (first_block > extend_upto)
2202  extend_by = 0;
2203  else if ((uint64) first_block + extend_by > extend_upto)
2204  extend_by = extend_upto - first_block;
2205 
2206  for (uint32 i = extend_by; i < orig_extend_by; i++)
2207  {
2208  BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
2209 
2210  /*
2211  * The victim buffer we acquired previously is clean and unused,
2212  * let it be found again quickly
2213  */
2214  StrategyFreeBuffer(buf_hdr);
2215  UnpinBuffer(buf_hdr);
2216  }
2217 
2218  if (extend_by == 0)
2219  {
2220  if (!(flags & EB_SKIP_EXTENSION_LOCK))
2222  *extended_by = extend_by;
2223  return first_block;
2224  }
2225  }
2226 
2227  /* Fail if relation is already at maximum possible length */
2228  if ((uint64) first_block + extend_by >= MaxBlockNumber)
2229  ereport(ERROR,
2230  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2231  errmsg("cannot extend relation %s beyond %u blocks",
2232  relpath(bmr.smgr->smgr_rlocator, fork),
2233  MaxBlockNumber)));
2234 
2235  /*
2236  * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
2237  *
2238  * This needs to happen before we extend the relation, because as soon as
2239  * we do, other backends can start to read in those pages.
2240  */
2241  for (uint32 i = 0; i < extend_by; i++)
2242  {
2243  Buffer victim_buf = buffers[i];
2244  BufferDesc *victim_buf_hdr = GetBufferDescriptor(victim_buf - 1);
2245  BufferTag tag;
2246  uint32 hash;
2247  LWLock *partition_lock;
2248  int existing_id;
2249 
2250  /* in case we need to pin an existing buffer below */
2253 
2254  InitBufferTag(&tag, &bmr.smgr->smgr_rlocator.locator, fork, first_block + i);
2255  hash = BufTableHashCode(&tag);
2256  partition_lock = BufMappingPartitionLock(hash);
2257 
2258  LWLockAcquire(partition_lock, LW_EXCLUSIVE);
2259 
2260  existing_id = BufTableInsert(&tag, hash, victim_buf_hdr->buf_id);
2261 
2262  /*
2263  * We get here only in the corner case where we are trying to extend
2264  * the relation but we found a pre-existing buffer. This can happen
2265  * because a prior attempt at extending the relation failed, and
2266  * because mdread doesn't complain about reads beyond EOF (when
2267  * zero_damaged_pages is ON) and so a previous attempt to read a block
2268  * beyond EOF could have left a "valid" zero-filled buffer.
2269  * Unfortunately, we have also seen this case occurring because of
2270  * buggy Linux kernels that sometimes return an lseek(SEEK_END) result
2271  * that doesn't account for a recent write. In that situation, the
2272  * pre-existing buffer would contain valid data that we don't want to
2273  * overwrite. Since the legitimate cases should always have left a
2274  * zero-filled buffer, complain if not PageIsNew.
2275  */
2276  if (existing_id >= 0)
2277  {
2278  BufferDesc *existing_hdr = GetBufferDescriptor(existing_id);
2279  Block buf_block;
2280  bool valid;
2281 
2282  /*
2283  * Pin the existing buffer before releasing the partition lock,
2284  * preventing it from being evicted.
2285  */
2286  valid = PinBuffer(existing_hdr, strategy);
2287 
2288  LWLockRelease(partition_lock);
2289 
2290  /*
2291  * The victim buffer we acquired previously is clean and unused,
2292  * let it be found again quickly
2293  */
2294  StrategyFreeBuffer(victim_buf_hdr);
2295  UnpinBuffer(victim_buf_hdr);
2296 
2297  buffers[i] = BufferDescriptorGetBuffer(existing_hdr);
2298  buf_block = BufHdrGetBlock(existing_hdr);
2299 
2300  if (valid && !PageIsNew((Page) buf_block))
2301  ereport(ERROR,
2302  (errmsg("unexpected data beyond EOF in block %u of relation %s",
2303  existing_hdr->tag.blockNum, relpath(bmr.smgr->smgr_rlocator, fork)),
2304  errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
2305 
2306  /*
2307  * We *must* do smgr[zero]extend before succeeding, else the page
2308  * will not be reserved by the kernel, and the next P_NEW call
2309  * will decide to return the same page. Clear the BM_VALID bit,
2310  * do StartBufferIO() and proceed.
2311  *
2312  * Loop to handle the very small possibility that someone re-sets
2313  * BM_VALID between our clearing it and StartBufferIO inspecting
2314  * it.
2315  */
2316  do
2317  {
2318  uint32 buf_state = LockBufHdr(existing_hdr);
2319 
2320  buf_state &= ~BM_VALID;
2321  UnlockBufHdr(existing_hdr, buf_state);
2322  } while (!StartBufferIO(existing_hdr, true, false));
2323  }
2324  else
2325  {
2326  uint32 buf_state;
2327 
2328  buf_state = LockBufHdr(victim_buf_hdr);
2329 
2330  /* some sanity checks while we hold the buffer header lock */
2331  Assert(!(buf_state & (BM_VALID | BM_TAG_VALID | BM_DIRTY | BM_JUST_DIRTIED)));
2332  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2333 
2334  victim_buf_hdr->tag = tag;
2335 
2336  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
2337  if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
2338  buf_state |= BM_PERMANENT;
2339 
2340  UnlockBufHdr(victim_buf_hdr, buf_state);
2341 
2342  LWLockRelease(partition_lock);
2343 
2344  /* XXX: could combine the locked operations in it with the above */
2345  StartBufferIO(victim_buf_hdr, true, false);
2346  }
2347  }
2348 
2350 
2351  /*
2352  * Note: if smgrzeroextend fails, we will end up with buffers that are
2353  * allocated but not marked BM_VALID. The next relation extension will
2354  * still select the same block number (because the relation didn't get any
2355  * longer on disk) and so future attempts to extend the relation will find
2356  * the same buffers (if they have not been recycled) but come right back
2357  * here to try smgrzeroextend again.
2358  *
2359  * We don't need to set checksum for all-zero pages.
2360  */
2361  smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);
2362 
2363  /*
2364  * Release the file-extension lock; it's now OK for someone else to extend
2365  * the relation some more.
2366  *
2367  * We remove IO_IN_PROGRESS after this, as waking up waiting backends can
2368  * take noticeable time.
2369  */
2370  if (!(flags & EB_SKIP_EXTENSION_LOCK))
2372 
2374  io_start, extend_by);
2375 
2376  /* Set BM_VALID, terminate IO, and wake up any waiters */
2377  for (uint32 i = 0; i < extend_by; i++)
2378  {
2379  Buffer buf = buffers[i];
2380  BufferDesc *buf_hdr = GetBufferDescriptor(buf - 1);
2381  bool lock = false;
2382 
2383  if (flags & EB_LOCK_FIRST && i == 0)
2384  lock = true;
2385  else if (flags & EB_LOCK_TARGET)
2386  {
2387  Assert(extend_upto != InvalidBlockNumber);
2388  if (first_block + i + 1 == extend_upto)
2389  lock = true;
2390  }
2391 
2392  if (lock)
2394 
2395  TerminateBufferIO(buf_hdr, false, BM_VALID, true);
2396  }
2397 
2398  pgBufferUsage.shared_blks_written += extend_by;
2399 
2400  *extended_by = extend_by;
2401 
2402  return first_block;
2403 }
#define MaxBlockNumber
Definition: block.h:35
#define BM_JUST_DIRTIED
Definition: buf_internals.h:66
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
bool track_io_timing
Definition: bufmgr.c:142
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:67
void LimitAdditionalPins(uint32 *additional_pins)
Definition: bufmgr.c:2058
static bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
Definition: bufmgr.c:5485
void * Block
Definition: bufmgr.h:25
@ EB_LOCK_TARGET
Definition: bufmgr.h:92
@ EB_CLEAR_SIZE_CACHE
Definition: bufmgr.h:89
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:74
@ EB_LOCK_FIRST
Definition: bufmgr.h:86
Pointer Page
Definition: bufpage.h:78
static bool PageIsNew(Page page)
Definition: bufpage.h:230
#define MemSet(start, val, len)
Definition: c.h:1020
int errhint(const char *fmt,...)
Definition: elog.c:1319
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:758
BufferUsage pgBufferUsage
Definition: instrument.c:20
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:430
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:480
#define ExclusiveLock
Definition: lockdefs.h:42
IOContext
Definition: pgstat.h:287
@ IOOP_EXTEND
Definition: pgstat.h:299
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:100
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:122
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
#define relpath(rlocator, forknum)
Definition: relpath.h:94
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:655
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:560
int64 shared_blks_written
Definition: instrument.h:29
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46

References Assert, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufHdrGetBlock, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), CurrentResourceOwner, EB_CLEAR_SIZE_CACHE, EB_LOCK_FIRST, EB_LOCK_TARGET, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), errhint(), errmsg(), ERROR, ExclusiveLock, GetBufferDescriptor(), GetVictimBuffer(), hash(), i, INIT_FORKNUM, InitBufferTag(), InvalidBlockNumber, IOContextForStrategy(), IOOBJECT_RELATION, IOOP_EXTEND, LimitAdditionalPins(), RelFileLocatorBackend::locator, LockBufHdr(), LockRelationForExtension(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MaxBlockNumber, MemSet, PageIsNew(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer(), BufferManagerRelation::rel, relpath, BufferManagerRelation::relpersistence, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_written, BufferManagerRelation::smgr, SMgrRelationData::smgr_cached_nblocks, SMgrRelationData::smgr_rlocator, smgrnblocks(), smgrzeroextend(), StartBufferIO(), StrategyFreeBuffer(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr(), UnlockRelationForExtension(), and UnpinBuffer().

Referenced by ExtendBufferedRelCommon().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 909 of file bufmgr.c.

915 {
917  uint32 extended_by = 0;
918  Buffer buffer = InvalidBuffer;
919  Buffer buffers[64];
920 
921  Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
922  Assert(bmr.smgr == NULL || bmr.relpersistence != 0);
923  Assert(extend_to != InvalidBlockNumber && extend_to > 0);
924 
925  if (bmr.smgr == NULL)
926  {
927  bmr.smgr = RelationGetSmgr(bmr.rel);
928  bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
929  }
930 
931  /*
932  * If desired, create the file if it doesn't exist. If
933  * smgr_cached_nblocks[fork] is positive then it must exist, no need for
934  * an smgrexists call.
935  */
936  if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
937  (bmr.smgr->smgr_cached_nblocks[fork] == 0 ||
939  !smgrexists(bmr.smgr, fork))
940  {
942 
943  /* recheck, fork might have been created concurrently */
944  if (!smgrexists(bmr.smgr, fork))
945  smgrcreate(bmr.smgr, fork, flags & EB_PERFORMING_RECOVERY);
946 
948  }
949 
950  /*
951  * If requested, invalidate size cache, so that smgrnblocks asks the
952  * kernel.
953  */
954  if (flags & EB_CLEAR_SIZE_CACHE)
956 
957  /*
958  * Estimate how many pages we'll need to extend by. This avoids acquiring
959  * unnecessarily many victim buffers.
960  */
961  current_size = smgrnblocks(bmr.smgr, fork);
962 
963  /*
964  * Since no-one else can be looking at the page contents yet, there is no
965  * difference between an exclusive lock and a cleanup-strength lock. Note
966  * that we pass the original mode to ReadBuffer_common() below, when
967  * falling back to reading the buffer to a concurrent relation extension.
968  */
970  flags |= EB_LOCK_TARGET;
971 
972  while (current_size < extend_to)
973  {
974  uint32 num_pages = lengthof(buffers);
975  BlockNumber first_block;
976 
977  if ((uint64) current_size + num_pages > extend_to)
978  num_pages = extend_to - current_size;
979 
980  first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
981  num_pages, extend_to,
982  buffers, &extended_by);
983 
984  current_size = first_block + extended_by;
985  Assert(num_pages != 0 || current_size >= extend_to);
986 
987  for (uint32 i = 0; i < extended_by; i++)
988  {
989  if (first_block + i != extend_to - 1)
990  ReleaseBuffer(buffers[i]);
991  else
992  buffer = buffers[i];
993  }
994  }
995 
996  /*
997  * It's possible that another backend concurrently extended the relation.
998  * In that case read the buffer.
999  *
1000  * XXX: Should we control this via a flag?
1001  */
1002  if (buffer == InvalidBuffer)
1003  {
1004  Assert(extended_by == 0);
1005  buffer = ReadBuffer_common(bmr.rel, bmr.smgr, 0,
1006  fork, extend_to - 1, mode, strategy);
1007  }
1008 
1009  return buffer;
1010 }
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:1152
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4850
@ EB_PERFORMING_RECOVERY
Definition: bufmgr.h:77
@ EB_CREATE_FORK_IF_NEEDED
Definition: bufmgr.h:83
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:48
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:46
#define lengthof(array)
Definition: c.h:788
static PgChecksumMode mode
Definition: pg_checksums.c:56
int64 current_size
Definition: pg_checksums.c:64

References Assert, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RelationData::rd_rel, ReadBuffer_common(), BufferManagerRelation::rel, RelationGetSmgr(), ReleaseBuffer(), BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, SMgrRelationData::smgr_cached_nblocks, smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 4268 of file bufmgr.c.

4271 {
4272  BlockNumber curBlock;
4273 
4274  for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4275  {
4276  uint32 bufHash; /* hash value for tag */
4277  BufferTag bufTag; /* identity of requested block */
4278  LWLock *bufPartitionLock; /* buffer partition lock for it */
4279  int buf_id;
4280  BufferDesc *bufHdr;
4281  uint32 buf_state;
4282 
4283  /* create a tag so we can lookup the buffer */
4284  InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4285 
4286  /* determine its hash code and partition lock ID */
4287  bufHash = BufTableHashCode(&bufTag);
4288  bufPartitionLock = BufMappingPartitionLock(bufHash);
4289 
4290  /* Check that it is in the buffer pool. If not, do nothing. */
4291  LWLockAcquire(bufPartitionLock, LW_SHARED);
4292  buf_id = BufTableLookup(&bufTag, bufHash);
4293  LWLockRelease(bufPartitionLock);
4294 
4295  if (buf_id < 0)
4296  continue;
4297 
4298  bufHdr = GetBufferDescriptor(buf_id);
4299 
4300  /*
4301  * We need to lock the buffer header and recheck if the buffer is
4302  * still associated with the same block because the buffer could be
4303  * evicted by some other backend loading blocks for a different
4304  * relation after we release lock on the BufMapping table.
4305  */
4306  buf_state = LockBufHdr(bufHdr);
4307 
4308  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
4309  BufTagGetForkNum(&bufHdr->tag) == forkNum &&
4310  bufHdr->tag.blockNum >= firstDelBlock)
4311  InvalidateBuffer(bufHdr); /* releases spinlock */
4312  else
4313  UnlockBufHdr(bufHdr, buf_state);
4314  }
4315 }

References buftag::blockNum, BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), BufferDesc::tag, and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 3727 of file bufmgr.c.

3729 {
3730  XLogRecPtr recptr;
3731  ErrorContextCallback errcallback;
3732  instr_time io_start;
3733  Block bufBlock;
3734  char *bufToWrite;
3735  uint32 buf_state;
3736 
3737  /*
3738  * Try to start an I/O operation. If StartBufferIO returns false, then
3739  * someone else flushed the buffer before we could, so we need not do
3740  * anything.
3741  */
3742  if (!StartBufferIO(buf, false, false))
3743  return;
3744 
3745  /* Setup error traceback support for ereport() */
3747  errcallback.arg = (void *) buf;
3748  errcallback.previous = error_context_stack;
3749  error_context_stack = &errcallback;
3750 
3751  /* Find smgr relation for buffer */
3752  if (reln == NULL)
3754 
3755  TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag),
3756  buf->tag.blockNum,
3758  reln->smgr_rlocator.locator.dbOid,
3760 
3761  buf_state = LockBufHdr(buf);
3762 
3763  /*
3764  * Run PageGetLSN while holding header lock, since we don't have the
3765  * buffer locked exclusively in all cases.
3766  */
3767  recptr = BufferGetLSN(buf);
3768 
3769  /* To check if block content changes while flushing. - vadim 01/17/97 */
3770  buf_state &= ~BM_JUST_DIRTIED;
3771  UnlockBufHdr(buf, buf_state);
3772 
3773  /*
3774  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
3775  * rule that log updates must hit disk before any of the data-file changes
3776  * they describe do.
3777  *
3778  * However, this rule does not apply to unlogged relations, which will be
3779  * lost after a crash anyway. Most unlogged relation pages do not bear
3780  * LSNs since we never emit WAL records for them, and therefore flushing
3781  * up through the buffer LSN would be useless, but harmless. However,
3782  * GiST indexes use LSNs internally to track page-splits, and therefore
3783  * unlogged GiST pages bear "fake" LSNs generated by
3784  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
3785  * LSN counter could advance past the WAL insertion point; and if it did
3786  * happen, attempting to flush WAL through that location would fail, with
3787  * disastrous system-wide consequences. To make sure that can't happen,
3788  * skip the flush if the buffer isn't permanent.
3789  */
3790  if (buf_state & BM_PERMANENT)
3791  XLogFlush(recptr);
3792 
3793  /*
3794  * Now it's safe to write buffer to disk. Note that no one else should
3795  * have been able to write it while we were busy with log flushing because
3796  * only one process at a time can set the BM_IO_IN_PROGRESS bit.
3797  */
3798  bufBlock = BufHdrGetBlock(buf);
3799 
3800  /*
3801  * Update page checksum if desired. Since we have only shared lock on the
3802  * buffer, other processes might be updating hint bits in it, so we must
3803  * copy the page to private storage if we do checksumming.
3804  */
3805  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
3806 
3808 
3809  /*
3810  * bufToWrite is either the shared buffer or a copy, as appropriate.
3811  */
3812  smgrwrite(reln,
3813  BufTagGetForkNum(&buf->tag),
3814  buf->tag.blockNum,
3815  bufToWrite,
3816  false);
3817 
3818  /*
3819  * When a strategy is in use, only flushes of dirty buffers already in the
3820  * strategy ring are counted as strategy writes (IOCONTEXT
3821  * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
3822  * statistics tracking.
3823  *
3824  * If a shared buffer initially added to the ring must be flushed before
3825  * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
3826  *
3827  * If a shared buffer which was added to the ring later because the
3828  * current strategy buffer is pinned or in use or because all strategy
3829  * buffers were dirty and rejected (for BAS_BULKREAD operations only)
3830  * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
3831  * (from_ring will be false).
3832  *
3833  * When a strategy is not in use, the write can only be a "regular" write
3834  * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
3835  */
3837  IOOP_WRITE, io_start, 1);
3838 
3840 
3841  /*
3842  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
3843  * end the BM_IO_IN_PROGRESS state.
3844  */
3845  TerminateBufferIO(buf, true, 0, true);
3846 
3847  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(BufTagGetForkNum(&buf->tag),
3848  buf->tag.blockNum,
3850  reln->smgr_rlocator.locator.dbOid,
3852 
3853  /* Pop the error context stack */
3854  error_context_stack = errcallback.previous;
3855 }
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:68
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:5621
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1510
ErrorContextCallback * error_context_stack
Definition: elog.c:94
@ IOOP_WRITE
Definition: pgstat.h:304
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.h:121
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2791

References ErrorContextCallback::arg, BM_JUST_DIRTIED, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, RelFileLocator::dbOid, error_context_stack, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITE, RelFileLocatorBackend::locator, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), ErrorContextCallback::previous, RelFileLocator::relNumber, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rlocator, smgropen(), smgrwrite(), RelFileLocator::spcOid, StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdr(), and XLogFlush().

Referenced by EvictUnpinnedBuffer(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), and SyncOneBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 4788 of file bufmgr.c.

4789 {
4790  int i;
4791  BufferDesc *bufHdr;
4792 
4793  for (i = 0; i < NBuffers; i++)
4794  {
4795  uint32 buf_state;
4796 
4797  bufHdr = GetBufferDescriptor(i);
4798 
4799  /*
4800  * As in DropRelationBuffers, an unlocked precheck should be safe and
4801  * saves some cycles.
4802  */
4803  if (bufHdr->tag.dbOid != dbid)
4804  continue;
4805 
4806  /* Make sure we can handle the pin */
4809 
4810  buf_state = LockBufHdr(bufHdr);
4811  if (bufHdr->tag.dbOid == dbid &&
4812  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4813  {
4814  PinBuffer_Locked(bufHdr);
4818  UnpinBuffer(bufHdr);
4819  }
4820  else
4821  UnlockBufHdr(bufHdr, buf_state);
4822  }
4823 }

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), CurrentResourceOwner, buftag::dbOid, FlushBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 4830 of file bufmgr.c.

4831 {
4832  BufferDesc *bufHdr;
4833 
4834  /* currently not needed, but no fundamental reason not to support */
4835  Assert(!BufferIsLocal(buffer));
4836 
4837  Assert(BufferIsPinned(buffer));
4838 
4839  bufHdr = GetBufferDescriptor(buffer - 1);
4840 
4842 
4844 }
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1895

References Assert, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 4435 of file bufmgr.c.

4436 {
4437  int i;
4438  BufferDesc *bufHdr;
4439  SMgrRelation srel = RelationGetSmgr(rel);
4440 
4441  if (RelationUsesLocalBuffers(rel))
4442  {
4443  for (i = 0; i < NLocBuffer; i++)
4444  {
4445  uint32 buf_state;
4446  instr_time io_start;
4447 
4448  bufHdr = GetLocalBufferDescriptor(i);
4449  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4450  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
4451  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4452  {
4453  ErrorContextCallback errcallback;
4454  Page localpage;
4455 
4456  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
4457 
4458  /* Setup error traceback support for ereport() */
4460  errcallback.arg = (void *) bufHdr;
4461  errcallback.previous = error_context_stack;
4462  error_context_stack = &errcallback;
4463 
4464  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
4465 
4467 
4468  smgrwrite(srel,
4469  BufTagGetForkNum(&bufHdr->tag),
4470  bufHdr->tag.blockNum,
4471  localpage,
4472  false);
4473 
4476  io_start, 1);
4477 
4478  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
4479  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
4480 
4482 
4483  /* Pop the error context stack */
4484  error_context_stack = errcallback.previous;
4485  }
4486  }
4487 
4488  return;
4489  }
4490 
4491  for (i = 0; i < NBuffers; i++)
4492  {
4493  uint32 buf_state;
4494 
4495  bufHdr = GetBufferDescriptor(i);
4496 
4497  /*
4498  * As in DropRelationBuffers, an unlocked precheck should be safe and
4499  * saves some cycles.
4500  */
4501  if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator))
4502  continue;
4503 
4504  /* Make sure we can handle the pin */
4507 
4508  buf_state = LockBufHdr(bufHdr);
4509  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4510  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4511  {
4512  PinBuffer_Locked(bufHdr);
4516  UnpinBuffer(bufHdr);
4517  }
4518  else
4519  UnlockBufHdr(bufHdr, buf_state);
4520  }
4521 }
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:290
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:71
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:5641
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1542
int NLocBuffer
Definition: localbuf.c:42
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:281
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:637
int64 local_blks_written
Definition: instrument.h:33
RelFileLocator rd_locator
Definition: rel.h:57

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_WRITE, BufferUsage::local_blks_written, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), smgrwrite(), BufferDesc::state, BufferDesc::tag, track_io_timing, UnlockBufHdr(), and UnpinBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 4533 of file bufmgr.c.

4534 {
4535  int i;
4536  SMgrSortArray *srels;
4537  bool use_bsearch;
4538 
4539  if (nrels == 0)
4540  return;
4541 
4542  /* fill-in array for qsort */
4543  srels = palloc(sizeof(SMgrSortArray) * nrels);
4544 
4545  for (i = 0; i < nrels; i++)
4546  {
4547  Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
4548 
4549  srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
4550  srels[i].srel = smgrs[i];
4551  }
4552 
4553  /*
4554  * Save the bsearch overhead for low number of relations to sync. See
4555  * DropRelationsAllBuffers for details.
4556  */
4557  use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
4558 
4559  /* sort the list of SMgrRelations if necessary */
4560  if (use_bsearch)
4561  qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
4562 
4563  for (i = 0; i < NBuffers; i++)
4564  {
4565  SMgrSortArray *srelent = NULL;
4566  BufferDesc *bufHdr = GetBufferDescriptor(i);
4567  uint32 buf_state;
4568 
4569  /*
4570  * As in DropRelationBuffers, an unlocked precheck should be safe and
4571  * saves some cycles.
4572  */
4573 
4574  if (!use_bsearch)
4575  {
4576  int j;
4577 
4578  for (j = 0; j < nrels; j++)
4579  {
4580  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
4581  {
4582  srelent = &srels[j];
4583  break;
4584  }
4585  }
4586  }
4587  else
4588  {
4589  RelFileLocator rlocator;
4590 
4591  rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4592  srelent = bsearch((const void *) &(rlocator),
4593  srels, nrels, sizeof(SMgrSortArray),
4595  }
4596 
4597  /* buffer doesn't belong to any of the given relfilelocators; skip it */
4598  if (srelent == NULL)
4599  continue;
4600 
4601  /* Make sure we can handle the pin */
4604 
4605  buf_state = LockBufHdr(bufHdr);
4606  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
4607  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4608  {
4609  PinBuffer_Locked(bufHdr);
4611  FlushBuffer(bufHdr, srelent->srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
4613  UnpinBuffer(bufHdr);
4614  }
4615  else
4616  UnlockBufHdr(bufHdr, buf_state);
4617  }
4618 
4619  pfree(srels);
4620 }
SMgrRelation srel
Definition: bufmgr.c:135
RelFileLocator rlocator
Definition: bufmgr.c:134

References Assert, BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, RelFileLocatorBackend::locator, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, palloc(), pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrSortArray::rlocator, rlocator_comparator(), SMgrRelationData::smgr_rlocator, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 438 of file bufmgr.c.

439 {
440  Assert(ref->refcount == 0);
441 
442  if (ref >= &PrivateRefCountArray[0] &&
444  {
445  ref->buffer = InvalidBuffer;
446 
447  /*
448  * Mark the just used entry as reserved - in many scenarios that
449  * allows us to avoid ever having to search the array/hash for free
450  * entries.
451  */
452  ReservedRefCountEntry = ref;
453  }
454  else
455  {
456  bool found;
457  Buffer buffer = ref->buffer;
458 
459  hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found);
460  Assert(found);
463  }
464 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:211
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_REMOVE
Definition: hsearch.h:115

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountEntry.

Referenced by UnpinBufferNoOwner().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 415 of file bufmgr.c.

416 {
418 
419  Assert(BufferIsValid(buffer));
420  Assert(!BufferIsLocal(buffer));
421 
422  /*
423  * Not moving the entry - that's ok for the current users, but we might
424  * want to change this one day.
425  */
426  ref = GetPrivateRefCountEntry(buffer, false);
427 
428  if (ref == NULL)
429  return 0;
430  return ref->refcount;
431 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:341

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), MarkBufferDirtyHint(), and ReadRecentBuffer().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 341 of file bufmgr.c.

342 {
344  int i;
345 
346  Assert(BufferIsValid(buffer));
347  Assert(!BufferIsLocal(buffer));
348 
349  /*
350  * First search for references in the array, that'll be sufficient in the
351  * majority of cases.
352  */
353  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
354  {
356 
357  if (res->buffer == buffer)
358  return res;
359  }
360 
361  /*
362  * By here we know that the buffer, if already pinned, isn't residing in
363  * the array.
364  *
365  * Only look up the buffer in the hashtable if we've previously overflowed
366  * into it.
367  */
368  if (PrivateRefCountOverflowed == 0)
369  return NULL;
370 
371  res = hash_search(PrivateRefCountHash, &buffer, HASH_FIND, NULL);
372 
373  if (res == NULL)
374  return NULL;
375  else if (!do_move)
376  {
377  /* caller doesn't want us to move the hash entry into the array */
378  return res;
379  }
380  else
381  {
382  /* move buffer from hashtable into the free array slot */
383  bool found;
385 
386  /* Ensure there's a free array slot */
388 
389  /* Use up the reserved slot */
390  Assert(ReservedRefCountEntry != NULL);
392  ReservedRefCountEntry = NULL;
393  Assert(free->buffer == InvalidBuffer);
394 
395  /* and fill it */
396  free->buffer = buffer;
397  free->refcount = res->refcount;
398 
399  /* delete from hashtable */
400  hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found);
401  Assert(found);
404 
405  return free;
406  }
407 }
#define free(a)
Definition: header.h:65
@ HASH_FIND
Definition: hsearch.h:113

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBufferNoOwner().

◆ GetVictimBuffer()

static Buffer GetVictimBuffer ( BufferAccessStrategy  strategy,
IOContext  io_context 
)
static

Definition at line 1892 of file bufmgr.c.

1893 {
1894  BufferDesc *buf_hdr;
1895  Buffer buf;
1896  uint32 buf_state;
1897  bool from_ring;
1898 
1899  /*
1900  * Ensure, while the spinlock's not yet held, that there's a free refcount
1901  * entry, and a resource owner slot for the pin.
1902  */
1905 
1906  /* we return here if a prospective victim buffer gets used concurrently */
1907 again:
1908 
1909  /*
1910  * Select a victim buffer. The buffer is returned with its header
1911  * spinlock still held!
1912  */
1913  buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring);
1914  buf = BufferDescriptorGetBuffer(buf_hdr);
1915 
1916  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1917 
1918  /* Pin the buffer and then release the buffer spinlock */
1919  PinBuffer_Locked(buf_hdr);
1920 
1921  /*
1922  * We shouldn't have any other pins for this buffer.
1923  */
1925 
1926  /*
1927  * If the buffer was dirty, try to write it out. There is a race
1928  * condition here, in that someone might dirty it after we released the
1929  * buffer header lock above, or even while we are writing it out (since
1930  * our share-lock won't prevent hint-bit updates). We will recheck the
1931  * dirty bit after re-locking the buffer header.
1932  */
1933  if (buf_state & BM_DIRTY)
1934  {
1935  LWLock *content_lock;
1936 
1937  Assert(buf_state & BM_TAG_VALID);
1938  Assert(buf_state & BM_VALID);
1939 
1940  /*
1941  * We need a share-lock on the buffer contents to write it out (else
1942  * we might write invalid data, eg because someone else is compacting
1943  * the page contents while we write). We must use a conditional lock
1944  * acquisition here to avoid deadlock. Even though the buffer was not
1945  * pinned (and therefore surely not locked) when StrategyGetBuffer
1946  * returned it, someone else could have pinned and exclusive-locked it
1947  * by the time we get here. If we try to get the lock unconditionally,
1948  * we'd block waiting for them; if they later block waiting for us,
1949  * deadlock ensues. (This has been observed to happen when two
1950  * backends are both trying to split btree index pages, and the second
1951  * one just happens to be trying to split the page the first one got
1952  * from StrategyGetBuffer.)
1953  */
1954  content_lock = BufferDescriptorGetContentLock(buf_hdr);
1955  if (!LWLockConditionalAcquire(content_lock, LW_SHARED))
1956  {
1957  /*
1958  * Someone else has locked the buffer, so give it up and loop back
1959  * to get another one.
1960  */
1961  UnpinBuffer(buf_hdr);
1962  goto again;
1963  }
1964 
1965  /*
1966  * If using a nondefault strategy, and writing the buffer would
1967  * require a WAL flush, let the strategy decide whether to go ahead
1968  * and write/reuse the buffer or to choose another victim. We need a
1969  * lock to inspect the page LSN, so this can't be done inside
1970  * StrategyGetBuffer.
1971  */
1972  if (strategy != NULL)
1973  {
1974  XLogRecPtr lsn;
1975 
1976  /* Read the LSN while holding buffer header lock */
1977  buf_state = LockBufHdr(buf_hdr);
1978  lsn = BufferGetLSN(buf_hdr);
1979  UnlockBufHdr(buf_hdr, buf_state);
1980 
1981  if (XLogNeedsFlush(lsn)
1982  && StrategyRejectBuffer(strategy, buf_hdr, from_ring))
1983  {
1984  LWLockRelease(content_lock);
1985  UnpinBuffer(buf_hdr);
1986  goto again;
1987  }
1988  }
1989 
1990  /* OK, do the I/O */
1991  FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
1992  LWLockRelease(content_lock);
1993 
1995  &buf_hdr->tag);
1996  }
1997 
1998 
1999  if (buf_state & BM_VALID)
2000  {
2001  /*
2002  * When a BufferAccessStrategy is in use, blocks evicted from shared
2003  * buffers are counted as IOOP_EVICT in the corresponding context
2004  * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
2005  * strategy in two cases: 1) while initially claiming buffers for the
2006  * strategy ring 2) to replace an existing strategy ring buffer
2007  * because it is pinned or in use and cannot be reused.
2008  *
2009  * Blocks evicted from buffers already in the strategy ring are
2010  * counted as IOOP_REUSE in the corresponding strategy context.
2011  *
2012  * At this point, we can accurately count evictions and reuses,
2013  * because we have successfully claimed the valid buffer. Previously,
2014  * we may have been forced to release the buffer due to concurrent
2015  * pinners or erroring out.
2016  */
2018  from_ring ? IOOP_REUSE : IOOP_EVICT);
2019  }
2020 
2021  /*
2022  * If the buffer has an entry in the buffer mapping table, delete it. This
2023  * can fail because another backend could have pinned or dirtied the
2024  * buffer.
2025  */
2026  if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr))
2027  {
2028  UnpinBuffer(buf_hdr);
2029  goto again;
2030  }
2031 
2032  /* a final set of sanity checks */
2033 #ifdef USE_ASSERT_CHECKING
2034  buf_state = pg_atomic_read_u32(&buf_hdr->state);
2035 
2036  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2037  Assert(!(buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY)));
2038 
2040 #endif
2041 
2042  return buf;
2043 }
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition: bufmgr.c:5132
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
Definition: bufmgr.c:5842
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
Definition: freelist.c:196
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
Definition: freelist.c:798
@ IOOP_EVICT
Definition: pgstat.h:298
@ IOOP_REUSE
Definition: pgstat.h:303
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:77
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3122

References Assert, BackendWritebackContext, BM_DIRTY, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferGetLSN, CheckBufferIsPinnedOnce(), CurrentResourceOwner, FlushBuffer(), InvalidateVictimBuffer(), IOOBJECT_RELATION, IOOP_EVICT, IOOP_REUSE, LockBufHdr(), LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), pg_atomic_read_u32(), pgstat_count_io_op(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::state, StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by BufferAlloc(), and ExtendBufferedRelShared().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 5300 of file bufmgr.c.

5301 {
5302  int bufid = GetStartupBufferPinWaitBufId();
5303 
5304  /*
5305  * If we get woken slowly then it's possible that the Startup process was
5306  * already woken by other backends before we got here. Also possible that
5307  * we get here by multiple interrupts or interrupts at inappropriate
5308  * times, so make sure we do nothing if the bufid is not set.
5309  */
5310  if (bufid < 0)
5311  return false;
5312 
5313  if (GetPrivateRefCount(bufid + 1) > 0)
5314  return true;
5315 
5316  return false;
5317 }
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:671

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 4882 of file bufmgr.c.

4883 {
4884  Assert(BufferIsPinned(buffer));
4886  if (BufferIsLocal(buffer))
4887  LocalRefCount[-buffer - 1]++;
4888  else
4889  {
4890  PrivateRefCountEntry *ref;
4891 
4892  ref = GetPrivateRefCountEntry(buffer, true);
4893  Assert(ref != NULL);
4894  ref->refcount++;
4895  }
4897 }
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlarge(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), RelationAddBlocks(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferPoolAccess()

void InitBufferPoolAccess ( void  )

Definition at line 3519 of file bufmgr.c.

3520 {
3521  HASHCTL hash_ctl;
3522 
3523  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
3524 
3525  hash_ctl.keysize = sizeof(int32);
3526  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
3527 
3528  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
3529  HASH_ELEM | HASH_BLOBS);
3530 
3531  /*
3532  * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
3533  * the corresponding phase of backend shutdown.
3534  */
3535  Assert(MyProc != NULL);
3537 }
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:3544
struct PrivateRefCountEntry PrivateRefCountEntry
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
PGPROC * MyProc
Definition: proc.c:66
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76

References Assert, AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MyProc, on_shmem_exit(), PrivateRefCountArray, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1726 of file bufmgr.c.

1727 {
1728  BufferTag oldTag;
1729  uint32 oldHash; /* hash value for oldTag */
1730  LWLock *oldPartitionLock; /* buffer partition lock for it */
1731  uint32 oldFlags;
1732  uint32 buf_state;
1733 
1734  /* Save the original buffer tag before dropping the spinlock */
1735  oldTag = buf->tag;
1736 
1737  buf_state = pg_atomic_read_u32(&buf->state);
1738  Assert(buf_state & BM_LOCKED);
1739  UnlockBufHdr(buf, buf_state);
1740 
1741  /*
1742  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1743  * worth storing the hashcode in BufferDesc so we need not recompute it
1744  * here? Probably not.
1745  */
1746  oldHash = BufTableHashCode(&oldTag);
1747  oldPartitionLock = BufMappingPartitionLock(oldHash);
1748 
1749 retry:
1750 
1751  /*
1752  * Acquire exclusive mapping lock in preparation for changing the buffer's
1753  * association.
1754  */
1755  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1756 
1757  /* Re-lock the buffer header */
1758  buf_state = LockBufHdr(buf);
1759 
1760  /* If it's changed while we were waiting for lock, do nothing */
1761  if (!BufferTagsEqual(&buf->tag, &oldTag))
1762  {
1763  UnlockBufHdr(buf, buf_state);
1764  LWLockRelease(oldPartitionLock);
1765  return;
1766  }
1767 
1768  /*
1769  * We assume the only reason for it to be pinned is that someone else is
1770  * flushing the page out. Wait for them to finish. (This could be an
1771  * infinite loop if the refcount is messed up... it would be nice to time
1772  * out after awhile, but there seems no way to be sure how many loops may
1773  * be needed. Note that if the other guy has pinned the buffer but not
1774  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1775  * be busy-looping here.)
1776  */
1777  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1778  {
1779  UnlockBufHdr(buf, buf_state);
1780  LWLockRelease(oldPartitionLock);
1781  /* safety check: should definitely not be our *own* pin */
1783  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1784  WaitIO(buf);
1785  goto retry;
1786  }
1787 
1788  /*
1789  * Clear out the buffer's tag and flags. We must do this to ensure that
1790  * linear scans of the buffer array don't think the buffer is valid.
1791  */
1792  oldFlags = buf_state & BUF_FLAG_MASK;
1793  ClearBufferTag(&buf->tag);
1794  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1795  UnlockBufHdr(buf, buf_state);
1796 
1797  /*
1798  * Remove the buffer from the lookup hashtable, if it was in there.
1799  */
1800  if (oldFlags & BM_TAG_VALID)
1801  BufTableDelete(&oldTag, oldHash);
1802 
1803  /*
1804  * Done with mapping lock.
1805  */
1806  LWLockRelease(oldPartitionLock);
1807 
1808  /*
1809  * Insert the buffer at the head of the list of free buffers.
1810  */
1812 }
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:45
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
#define BM_LOCKED
Definition: buf_internals.h:60
static void ClearBufferTag(BufferTag *tag)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:148
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:5436

References Assert, BM_LOCKED, BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), StrategyFreeBuffer(), UnlockBufHdr(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ InvalidateVictimBuffer()

static bool InvalidateVictimBuffer ( BufferDesc buf_hdr)
static

Definition at line 1824 of file bufmgr.c.

1825 {
1826  uint32 buf_state;
1827  uint32 hash;
1828  LWLock *partition_lock;
1829  BufferTag tag;
1830 
1832 
1833  /* have buffer pinned, so it's safe to read tag without lock */
1834  tag = buf_hdr->tag;
1835 
1836  hash = BufTableHashCode(&tag);
1837  partition_lock = BufMappingPartitionLock(hash);
1838 
1839  LWLockAcquire(partition_lock, LW_EXCLUSIVE);
1840 
1841  /* lock the buffer header */
1842  buf_state = LockBufHdr(buf_hdr);
1843 
1844  /*
1845  * We have the buffer pinned nobody else should have been able to unset
1846  * this concurrently.
1847  */
1848  Assert(buf_state & BM_TAG_VALID);
1849  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1850  Assert(BufferTagsEqual(&buf_hdr->tag, &tag));
1851 
1852  /*
1853  * If somebody else pinned the buffer since, or even worse, dirtied it,
1854  * give up on this buffer: It's clearly in use.
1855  */
1856  if (BUF_STATE_GET_REFCOUNT(buf_state) != 1 || (buf_state & BM_DIRTY))
1857  {
1858  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1859 
1860  UnlockBufHdr(buf_hdr, buf_state);
1861  LWLockRelease(partition_lock);
1862 
1863  return false;
1864  }
1865 
1866  /*
1867  * Clear out the buffer's tag and flags and usagecount. This is not
1868  * strictly required, as BM_TAG_VALID/BM_VALID needs to be checked before
1869  * doing anything with the buffer. But currently it's beneficial, as the
1870  * cheaper pre-check for several linear scans of shared buffers use the
1871  * tag (see e.g. FlushDatabaseBuffers()).
1872  */
1873  ClearBufferTag(&buf_hdr->tag);
1874  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1875  UnlockBufHdr(buf_hdr, buf_state);
1876 
1877  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1878 
1879  /* finally delete buffer from the buffer mapping table */
1880  BufTableDelete(&tag, hash);
1881 
1882  LWLockRelease(partition_lock);
1883 
1884  Assert(!(buf_state & (BM_DIRTY | BM_VALID | BM_TAG_VALID)));
1885  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1887 
1888  return true;
1889 }

References Assert, BM_DIRTY, BM_TAG_VALID, BM_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), GetPrivateRefCount(), hash(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by EvictUnpinnedBuffer(), and GetVictimBuffer().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 5382 of file bufmgr.c.

5383 {
5384  BufferDesc *bufHdr;
5385  uint32 buf_state;
5386 
5387  Assert(BufferIsValid(buffer));
5388 
5389  if (BufferIsLocal(buffer))
5390  {
5391  /* There should be exactly one pin */
5392  if (LocalRefCount[-buffer - 1] != 1)
5393  return false;
5394  /* Nobody else to wait for */
5395  return true;
5396  }
5397 
5398  /* There should be exactly one local pin */
5399  if (GetPrivateRefCount(buffer) != 1)
5400  return false;
5401 
5402  bufHdr = GetBufferDescriptor(buffer - 1);
5403 
5404  /* caller must hold exclusive lock on buffer */
5406  LW_EXCLUSIVE));
5407 
5408  buf_state = LockBufHdr(bufHdr);
5409 
5410  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5411  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5412  {
5413  /* pincount is OK. */
5414  UnlockBufHdr(bufHdr, buf_state);
5415  return true;
5416  }
5417 
5418  UnlockBufHdr(bufHdr, buf_state);
5419  return false;
5420 }

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsValid(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext wb_context,
IOContext  io_context 
)

Definition at line 5887 of file bufmgr.c.

5888 {
5889  instr_time io_start;
5890  int i;
5891 
5892  if (wb_context->nr_pending == 0)
5893  return;
5894 
5895  /*
5896  * Executing the writes in-order can make them a lot faster, and allows to
5897  * merge writeback requests to consecutive blocks into larger writebacks.
5898  */
5899  sort_pending_writebacks(wb_context->pending_writebacks,
5900  wb_context->nr_pending);
5901 
5903 
5904  /*
5905  * Coalesce neighbouring writes, but nothing else. For that we iterate
5906  * through the, now sorted, array of pending flushes, and look forward to
5907  * find all neighbouring (or identical) writes.
5908  */
5909  for (i = 0; i < wb_context->nr_pending; i++)
5910  {
5913  SMgrRelation reln;
5914  int ahead;
5915  BufferTag tag;
5916  RelFileLocator currlocator;
5917  Size nblocks = 1;
5918 
5919  cur = &wb_context->pending_writebacks[i];
5920  tag = cur->tag;
5921  currlocator = BufTagGetRelFileLocator(&tag);
5922 
5923  /*
5924  * Peek ahead, into following writeback requests, to see if they can
5925  * be combined with the current one.
5926  */
5927  for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
5928  {
5929 
5930  next = &wb_context->pending_writebacks[i + ahead + 1];
5931 
5932  /* different file, stop */
5933  if (!RelFileLocatorEquals(currlocator,
5934  BufTagGetRelFileLocator(&next->tag)) ||
5935  BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
5936  break;
5937 
5938  /* ok, block queued twice, skip */
5939  if (cur->tag.blockNum == next->tag.blockNum)
5940  continue;
5941 
5942  /* only merge consecutive writes */
5943  if (cur->tag.blockNum + 1 != next->tag.blockNum)
5944  break;
5945 
5946  nblocks++;
5947  cur = next;
5948  }
5949 
5950  i += ahead;
5951 
5952  /* and finally tell the kernel to write the data to storage */
5953  reln = smgropen(currlocator, INVALID_PROC_NUMBER);
5954  smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
5955  }
5956 
5957  /*
5958  * Assume that writeback requests are only issued for buffers containing
5959  * blocks of permanent relations.
5960  */
5962  IOOP_WRITEBACK, io_start, wb_context->nr_pending);
5963 
5964  wb_context->nr_pending = 0;
5965 }
static int32 next
Definition: blutils.c:221
struct cursor * cur
Definition: ecpg.c:28
@ IOOP_WRITEBACK
Definition: pgstat.h:305
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:643
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, i, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITEBACK, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RelFileLocatorEquals, smgropen(), smgrwriteback(), and track_io_timing.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2058 of file bufmgr.c.

2059 {
2060  uint32 max_backends;
2061  int max_proportional_pins;
2062 
2063  if (*additional_pins <= 1)
2064  return;
2065 
2066  max_backends = MaxBackends + NUM_AUXILIARY_PROCS;
2067  max_proportional_pins = NBuffers / max_backends;
2068 
2069  /*
2070  * Subtract the approximate number of buffers already pinned by this
2071  * backend. We get the number of "overflowed" pins for free, but don't
2072  * know the number of pins in PrivateRefCountArray. The cost of
2073  * calculating that exactly doesn't seem worth it, so just assume the max.
2074  */
2075  max_proportional_pins -= PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES;
2076 
2077  if (max_proportional_pins <= 0)
2078  max_proportional_pins = 1;
2079 
2080  if (*additional_pins > max_proportional_pins)
2081  *additional_pins = max_proportional_pins;
2082 }
int MaxBackends
Definition: globals.c:143
#define NUM_AUXILIARY_PROCS
Definition: proc.h:440

References MaxBackends, NBuffers, NUM_AUXILIARY_PROCS, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by ExtendBufferedRelShared(), and read_stream_begin_relation().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 5641 of file bufmgr.c.

5642 {
5643  BufferDesc *bufHdr = (BufferDesc *) arg;
5644 
5645  if (bufHdr != NULL)
5646  {
5647  char *path = relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
5648  MyProcNumber,
5649  BufTagGetForkNum(&bufHdr->tag));
5650 
5651  errcontext("writing block %u of relation %s",
5652  bufHdr->tag.blockNum, path);
5653  pfree(path);
5654  }
5655 }
#define errcontext
Definition: elog.h:196
void * arg

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, MyProcNumber, pfree(), relpathbackend, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 5085 of file bufmgr.c.

5086 {
5087  BufferDesc *buf;
5088 
5089  Assert(BufferIsPinned(buffer));
5090  if (BufferIsLocal(buffer))
5091  return; /* local buffers need no lock */
5092 
5093  buf = GetBufferDescriptor(buffer - 1);
5094 
5095  if (mode == BUFFER_LOCK_UNLOCK)
5097  else if (mode == BUFFER_LOCK_SHARE)
5099  else if (mode == BUFFER_LOCK_EXCLUSIVE)
5101  else
5102  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
5103 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:198
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:199

References Assert, buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), and mode.

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_prepare_pagescan(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgettup(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), ScanSourceDatabasePgClass(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferForRedoExtended(), XLogRecordPageWithFreeSpace(), and ZeroBuffer().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 5165 of file bufmgr.c.

5166 {
5167  BufferDesc *bufHdr;
5168  TimestampTz waitStart = 0;
5169  bool waiting = false;
5170  bool logged_recovery_conflict = false;
5171 
5172  Assert(BufferIsPinned(buffer));
5173  Assert(PinCountWaitBuf == NULL);
5174 
5175  CheckBufferIsPinnedOnce(buffer);
5176 
5177  /* Nobody else to wait for */
5178  if (BufferIsLocal(buffer))
5179  return;
5180 
5181  bufHdr = GetBufferDescriptor(buffer - 1);
5182 
5183  for (;;)
5184  {
5185  uint32 buf_state;
5186 
5187  /* Try to acquire lock */
5189  buf_state = LockBufHdr(bufHdr);
5190 
5191  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5192  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5193  {
5194  /* Successfully acquired exclusive lock with pincount 1 */
5195  UnlockBufHdr(bufHdr, buf_state);
5196 
5197  /*
5198  * Emit the log message if recovery conflict on buffer pin was
5199  * resolved but the startup process waited longer than
5200  * deadlock_timeout for it.
5201  */
5202  if (logged_recovery_conflict)
5204  waitStart, GetCurrentTimestamp(),
5205  NULL, false);
5206 
5207  if (waiting)
5208  {
5209  /* reset ps display to remove the suffix if we added one */
5211  waiting = false;
5212  }
5213  return;
5214  }
5215  /* Failed, so mark myself as waiting for pincount 1 */
5216  if (buf_state & BM_PIN_COUNT_WAITER)
5217  {
5218  UnlockBufHdr(bufHdr, buf_state);
5219  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5220  elog(ERROR, "multiple backends attempting to wait for pincount 1");
5221  }
5223  PinCountWaitBuf = bufHdr;
5224  buf_state |= BM_PIN_COUNT_WAITER;
5225  UnlockBufHdr(bufHdr, buf_state);
5226  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5227 
5228  /* Wait to be signaled by UnpinBuffer() */
5229  if (InHotStandby)
5230  {
5231  if (!waiting)
5232  {
5233  /* adjust the process title to indicate that it's waiting */
5234  set_ps_display_suffix("waiting");
5235  waiting = true;
5236  }
5237 
5238  /*
5239  * Emit the log message if the startup process is waiting longer
5240  * than deadlock_timeout for recovery conflict on buffer pin.
5241  *
5242  * Skip this if first time through because the startup process has
5243  * not started waiting yet in this case. So, the wait start
5244  * timestamp is set after this logic.
5245  */
5246  if (waitStart != 0 && !logged_recovery_conflict)
5247  {
5249 
5250  if (TimestampDifferenceExceeds(waitStart, now,
5251  DeadlockTimeout))
5252  {
5254  waitStart, now, NULL, true);
5255  logged_recovery_conflict = true;
5256  }
5257  }
5258 
5259  /*
5260  * Set the wait start timestamp if logging is enabled and first
5261  * time through.
5262  */
5263  if (log_recovery_conflict_waits && waitStart == 0)
5264  waitStart = GetCurrentTimestamp();
5265 
5266  /* Publish the bufid that Startup process waits on */
5267  SetStartupBufferPinWaitBufId(buffer - 1);
5268  /* Set alarm and then wait to be signaled by UnpinBuffer() */
5270  /* Reset the published bufid */
5272  }
5273  else
5274  ProcWaitForSignal(WAIT_EVENT_BUFFER_PIN);
5275 
5276  /*
5277  * Remove flag marking us as waiter. Normally this will not be set
5278  * anymore, but ProcWaitForSignal() can return for other signals as
5279  * well. We take care to only reset the flag if we're the waiter, as
5280  * theoretically another backend could have started waiting. That's
5281  * impossible with the current usages due to table level locking, but
5282  * better be safe.
5283  */
5284  buf_state = LockBufHdr(bufHdr);
5285  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5287  buf_state &= ~BM_PIN_COUNT_WAITER;
5288  UnlockBufHdr(bufHdr, buf_state);
5289 
5290  PinCountWaitBuf = NULL;
5291  /* Loop back and try again */
5292  }
5293 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1790
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1618
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:67
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:175
int64 TimestampTz
Definition: timestamp.h:39
static volatile sig_atomic_t waiting
Definition: latch.c:162
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:421
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:369
int DeadlockTimeout
Definition: proc.c:57
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:659
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1866
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:792
bool log_recovery_conflict_waits
Definition: standby.c:41
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:273
int wait_backend_pgprocno
#define InHotStandby
Definition: xlogutils.h:57

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), BufferDesc::wait_backend_pgprocno, and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroBuffer().

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 5688 of file bufmgr.c.

5689 {
5690  SpinDelayStatus delayStatus;
5691  uint32 old_buf_state;
5692 
5694 
5695  init_local_spin_delay(&delayStatus);
5696 
5697  while (true)
5698  {
5699  /* set BM_LOCKED flag */
5700  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
5701  /* if it wasn't set before we're OK */
5702  if (!(old_buf_state & BM_LOCKED))
5703  break;
5704  perform_spin_delay(&delayStatus);
5705  }
5706  finish_spin_delay(&delayStatus);
5707  return old_buf_state | BM_LOCKED;
5708 }
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:405
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:132
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:192
#define init_local_spin_delay(status)
Definition: s_lock.h:843

References Assert, BM_LOCKED, BufferDescriptorGetBuffer(), BufferIsLocal, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), GetVictimBuffer(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadRecentBuffer(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBufferNoOwner(), WaitIO(), and ZeroBuffer().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 2474 of file bufmgr.c.

2475 {
2476  BufferDesc *bufHdr;
2477  uint32 buf_state;
2478  uint32 old_buf_state;
2479 
2480  if (!BufferIsValid(buffer))
2481  elog(ERROR, "bad buffer ID: %d", buffer);
2482 
2483  if (BufferIsLocal(buffer))
2484  {
2485  MarkLocalBufferDirty(buffer);
2486  return;
2487  }
2488 
2489  bufHdr = GetBufferDescriptor(buffer - 1);
2490 
2491  Assert(BufferIsPinned(buffer));
2493  LW_EXCLUSIVE));
2494 
2495  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
2496  for (;;)
2497  {
2498  if (old_buf_state & BM_LOCKED)
2499  old_buf_state = WaitBufHdrUnlocked(bufHdr);
2500 
2501  buf_state = old_buf_state;
2502 
2503  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2504  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
2505 
2506  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
2507  buf_state))
2508  break;
2509  }
2510 
2511  /*
2512  * If the buffer was not dirty already, do vacuum accounting.
2513  */
2514  if (!(old_buf_state & BM_DIRTY))
2515  {
2516  VacuumPageDirty++;
2518  if (VacuumCostActive)
2520  }
2521 }
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:344
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:5718
bool VacuumCostActive
Definition: globals.c:159
int64 VacuumPageDirty
Definition: globals.c:156
int VacuumCostBalance
Definition: globals.c:158
int VacuumCostPageDirty
Definition: globals.c:150
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:449
int64 shared_blks_dirtied
Definition: instrument.h:28

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), do_setval(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 4914 of file bufmgr.c.

4915 {
4916  BufferDesc *bufHdr;
4917  Page page = BufferGetPage(buffer);
4918 
4919  if (!BufferIsValid(buffer))
4920  elog(ERROR, "bad buffer ID: %d", buffer);
4921 
4922  if (BufferIsLocal(buffer))
4923  {
4924  MarkLocalBufferDirty(buffer);
4925  return;
4926  }
4927 
4928  bufHdr = GetBufferDescriptor(buffer - 1);
4929 
4930  Assert(GetPrivateRefCount(buffer) > 0);
4931  /* here, either share or exclusive lock is OK */
4933 
4934  /*
4935  * This routine might get called many times on the same page, if we are
4936  * making the first scan after commit of an xact that added/deleted many
4937  * tuples. So, be as quick as we can if the buffer is already dirty. We
4938  * do this by not acquiring spinlock if it looks like the status bits are
4939  * already set. Since we make this test unlocked, there's a chance we
4940  * might fail to notice that the flags have just been cleared, and failed
4941  * to reset them, due to memory-ordering issues. But since this function
4942  * is only intended to be used in cases where failing to write out the
4943  * data would be harmless anyway, it doesn't really matter.
4944  */
4945  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
4947  {
4949  bool dirtied = false;
4950  bool delayChkptFlags = false;
4951  uint32 buf_state;
4952 
4953  /*
4954  * If we need to protect hint bit updates from torn writes, WAL-log a
4955  * full page image of the page. This full page image is only necessary
4956  * if the hint bit update is the first change to the page since the
4957  * last checkpoint.
4958  *
4959  * We don't check full_page_writes here because that logic is included
4960  * when we call XLogInsert() since the value changes dynamically.
4961  */
4962  if (XLogHintBitIsNeeded() &&
4963  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
4964  {
4965  /*
4966  * If we must not write WAL, due to a relfilelocator-specific
4967  * condition or being in recovery, don't dirty the page. We can
4968  * set the hint, just not dirty the page as a result so the hint
4969  * is lost when we evict the page or shutdown.
4970  *
4971  * See src/backend/storage/page/README for longer discussion.
4972  */
4973  if (RecoveryInProgress() ||
4975  return;
4976 
4977  /*
4978  * If the block is already dirty because we either made a change
4979  * or set a hint already, then we don't need to write a full page
4980  * image. Note that aggressive cleaning of blocks dirtied by hint
4981  * bit setting would increase the call rate. Bulk setting of hint
4982  * bits would reduce the call rate...
4983  *
4984  * We must issue the WAL record before we mark the buffer dirty.
4985  * Otherwise we might write the page before we write the WAL. That
4986  * causes a race condition, since a checkpoint might occur between
4987  * writing the WAL record and marking the buffer dirty. We solve
4988  * that with a kluge, but one that is already in use during
4989  * transaction commit to prevent race conditions. Basically, we
4990  * simply prevent the checkpoint WAL record from being written
4991  * until we have marked the buffer dirty. We don't start the
4992  * checkpoint flush until we have marked dirty, so our checkpoint
4993  * must flush the change to disk successfully or the checkpoint
4994  * never gets written, so crash recovery will fix.
4995  *
4996  * It's possible we may enter here without an xid, so it is
4997  * essential that CreateCheckPoint waits for virtual transactions
4998  * rather than full transactionids.
4999  */
5002  delayChkptFlags = true;
5003  lsn = XLogSaveBufferForHint(buffer, buffer_std);
5004  }
5005 
5006  buf_state = LockBufHdr(bufHdr);
5007 
5008  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5009 
5010  if (!(buf_state & BM_DIRTY))
5011  {
5012  dirtied = true; /* Means "will be dirtied by this action" */
5013 
5014  /*
5015  * Set the page LSN if we wrote a backup block. We aren't supposed
5016  * to set this when only holding a share lock but as long as we
5017  * serialise it somehow we're OK. We choose to set LSN while
5018  * holding the buffer header lock, which causes any reader of an
5019  * LSN who holds only a share lock to also obtain a buffer header
5020  * lock before using PageGetLSN(), which is enforced in
5021  * BufferGetLSNAtomic().
5022  *
5023  * If checksums are enabled, you might think we should reset the
5024  * checksum here. That will happen when the page is written
5025  * sometime later in this checkpoint cycle.
5026  */
5027  if (!XLogRecPtrIsInvalid(lsn))
5028  PageSetLSN(page, lsn);
5029  }
5030 
5031  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
5032  UnlockBufHdr(bufHdr, buf_state);
5033 
5034  if (delayChkptFlags)
5036 
5037  if (dirtied)
5038  {
5039  VacuumPageDirty++;
5041  if (VacuumCostActive)
5043  }
5044  }
5045 }
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define DELAY_CHKPT_START
Definition: proc.h:114
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition: storage.c:532
int delayChkptFlags
Definition: proc.h:236
bool RecoveryInProgress(void)
Definition: xlog.c:6290
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:1065

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferGetPage(), BufferIsLocal, BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 315 of file bufmgr.c.

316 {
318 
319  /* only allowed to be called when a reservation has been made */
320  Assert(ReservedRefCountEntry != NULL);
321 
322  /* use up the reserved entry */
324  ReservedRefCountEntry = NULL;
325 
326  /* and fill it */
327  res->buffer = buffer;
328  res->refcount = 0;
329 
330  return res;
331 }

References Assert, PrivateRefCountEntry::buffer, res, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 2595 of file bufmgr.c.

2596 {
2598  bool result;
2599  PrivateRefCountEntry *ref;
2600 
2601  Assert(!BufferIsLocal(b));
2602  Assert(ReservedRefCountEntry != NULL);
2603 
2604  ref = GetPrivateRefCountEntry(b, true);
2605 
2606  if (ref == NULL)
2607  {
2608  uint32 buf_state;
2609  uint32 old_buf_state;
2610 
2611  ref = NewPrivateRefCountEntry(b);
2612 
2613  old_buf_state = pg_atomic_read_u32(&buf->state);
2614  for (;;)
2615  {
2616  if (old_buf_state & BM_LOCKED)
2617  old_buf_state = WaitBufHdrUnlocked(buf);
2618 
2619  buf_state = old_buf_state;
2620 
2621  /* increase refcount */
2622  buf_state += BUF_REFCOUNT_ONE;
2623 
2624  if (strategy == NULL)
2625  {
2626  /* Default case: increase usagecount unless already max. */
2628  buf_state += BUF_USAGECOUNT_ONE;
2629  }
2630  else
2631  {
2632  /*
2633  * Ring buffers shouldn't evict others from pool. Thus we
2634  * don't make usagecount more than 1.
2635  */
2636  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
2637  buf_state += BUF_USAGECOUNT_ONE;
2638  }
2639 
2640  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
2641  buf_state))
2642  {
2643  result = (buf_state & BM_VALID) != 0;
2644 
2645  /*
2646  * Assume that we acquired a buffer pin for the purposes of
2647  * Valgrind buffer client checks (even in !result case) to
2648  * keep things simple. Buffers that are unsafe to access are
2649  * not generally guaranteed to be marked undefined or
2650  * non-accessible in any case.
2651  */
2653  break;
2654  }
2655  }
2656  }
2657  else
2658  {
2659  /*
2660  * If we previously pinned the buffer, it is likely to be valid, but
2661  * it may not be if StartReadBuffers() was called and
2662  * WaitReadBuffers() hasn't been called yet. We'll check by loading
2663  * the flags without locking. This is racy, but it's OK to return
2664  * false spuriously: when WaitReadBuffers() calls StartBufferIO(),
2665  * it'll see that it's now valid.
2666  *
2667  * Note: We deliberately avoid a Valgrind client request here.
2668  * Individual access methods can optionally superimpose buffer page
2669  * client requests on top of our client requests to enforce that
2670  * buffers are only accessed while locked (and pinned). It's possible
2671  * that the buffer page is legitimately non-accessible here. We
2672  * cannot meddle with that.
2673  */
2674  result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0;
2675  }
2676 
2677  ref->refcount++;
2678  Assert(ref->refcount > 0);
2680  return result;
2681 }
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:78
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:43
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:52
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:315
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26

References Assert, b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservedRefCountEntry, ResourceOwnerRememberBuffer(), VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 2706 of file bufmgr.c.

2707 {
2708  Buffer b;
2709  PrivateRefCountEntry *ref;
2710  uint32 buf_state;
2711 
2712  /*
2713  * As explained, We don't expect any preexisting pins. That allows us to
2714  * manipulate the PrivateRefCount after releasing the spinlock
2715  */
2717 
2718  /*
2719  * Buffer can't have a preexisting pin, so mark its page as defined to
2720  * Valgrind (this is similar to the PinBuffer() case where the backend
2721  * doesn't already have a buffer pin)
2722  */
2724 
2725  /*
2726  * Since we hold the buffer spinlock, we can update the buffer state and
2727  * release the lock in one operation.
2728  */
2729  buf_state = pg_atomic_read_u32(&buf->state);
2730  Assert(buf_state & BM_LOCKED);
2731  buf_state += BUF_REFCOUNT_ONE;
2732  UnlockBufHdr(buf, buf_state);
2733 
2735 
2736  ref = NewPrivateRefCountEntry(b);
2737  ref->refcount++;
2738 
2740 }

References Assert, b, BM_LOCKED, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), UnlockBufHdr(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by EvictUnpinnedBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ PinBufferForBlock()

static pg_attribute_always_inline Buffer PinBufferForBlock ( Relation  rel,
SMgrRelation  smgr,
char  smgr_persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1059 of file bufmgr.c.

1066 {
1067  BufferDesc *bufHdr;
1068  IOContext io_context;
1069  IOObject io_object;
1070  char persistence;
1071 
1072  Assert(blockNum != P_NEW);
1073 
1074  /*
1075  * If there is no Relation it usually implies recovery and thus permanent,
1076  * but we take an argument because CreateAndCopyRelationData can reach us
1077  * with only an SMgrRelation for an unlogged relation that we don't want
1078  * to flag with BM_PERMANENT.
1079  */
1080  if (rel)
1081  persistence = rel->rd_rel->relpersistence;
1082  else if (smgr_persistence == 0)
1083  persistence = RELPERSISTENCE_PERMANENT;
1084  else
1085  persistence = smgr_persistence;
1086 
1087  if (persistence == RELPERSISTENCE_TEMP)
1088  {
1089  io_context = IOCONTEXT_NORMAL;
1090  io_object = IOOBJECT_TEMP_RELATION;
1091  }
1092  else
1093  {
1094  io_context = IOContextForStrategy(strategy);
1095  io_object = IOOBJECT_RELATION;
1096  }
1097 
1098  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1100  smgr->smgr_rlocator.locator.dbOid,
1102  smgr->smgr_rlocator.backend);
1103 
1104  if (persistence == RELPERSISTENCE_TEMP)
1105  {
1106  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1107  if (*foundPtr)
1109  }
1110  else
1111  {
1112  bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1113  strategy, foundPtr, io_context);
1114  if (*foundPtr)
1116  }
1117  if (rel)
1118  {
1119  /*
1120  * While pgBufferUsage's "read" counter isn't bumped unless we reach
1121  * WaitReadBuffers() (so, not for hits, and not for buffers that are
1122  * zeroed instead), the per-relation stats always count them.
1123  */
1125  if (*foundPtr)
1127  }
1128  if (*foundPtr)
1129  {
1130  VacuumPageHit++;
1131  pgstat_count_io_op(io_object, io_context, IOOP_HIT);
1132  if (VacuumCostActive)
1134 
1135  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1137  smgr->smgr_rlocator.locator.dbOid,
1139  smgr->smgr_rlocator.backend,
1140  true);
1141  }
1142 
1143  return BufferDescriptorGetBuffer(bufHdr);
1144 }
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
Definition: bufmgr.c:1548
#define P_NEW
Definition: bufmgr.h:192
int64 VacuumPageHit
Definition: globals.c:154
int VacuumCostPageHit
Definition: globals.c:148
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition: localbuf.c:116
IOObject
Definition: pgstat.h:279
#define pgstat_count_buffer_read(rel)
Definition: pgstat.h:635
@ IOOP_HIT
Definition: pgstat.h:301
#define pgstat_count_buffer_hit(rel)
Definition: pgstat.h:640
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_hit
Definition: instrument.h:26

References Assert, RelFileLocatorBackend::backend, BufferAlloc(), BufferDescriptorGetBuffer(), RelFileLocator::dbOid, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, BufferUsage::local_blks_hit, LocalBufferAlloc(), RelFileLocatorBackend::locator, P_NEW, pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_buffer_read, pgstat_count_io_op(), RelationData::rd_rel, RelFileLocator::relNumber, BufferUsage::shared_blks_hit, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, and VacuumPageHit.

Referenced by ReadBuffer_common(), and StartReadBuffersImpl().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 638 of file bufmgr.c.

639 {
640  Assert(RelationIsValid(reln));
641  Assert(BlockNumberIsValid(blockNum));
642 
643  if (RelationUsesLocalBuffers(reln))
644  {
645  /* see comments in ReadBufferExtended */
646  if (RELATION_IS_OTHER_TEMP(reln))
647  ereport(ERROR,
648  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
649  errmsg("cannot access temporary tables of other sessions")));
650 
651  /* pass it off to localbuf.c */
652  return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
653  }
654  else
655  {
656  /* pass it to the shared buffer version */
657  return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
658  }
659 }
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:548
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:69
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:658
#define RelationIsValid(relation)
Definition: rel.h:478

References Assert, BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by BitmapPrefetch(), count_nondeletable_pages(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 548 of file bufmgr.c.

551 {
552  PrefetchBufferResult result = {InvalidBuffer, false};
553  BufferTag newTag; /* identity of requested block */
554  uint32 newHash; /* hash value for newTag */
555  LWLock *newPartitionLock; /* buffer partition lock for it */
556  int buf_id;
557 
558  Assert(BlockNumberIsValid(blockNum));
559 
560  /* create a tag so we can lookup the buffer */
561  InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
562  forkNum, blockNum);
563 
564  /* determine its hash code and partition lock ID */
565  newHash = BufTableHashCode(&newTag);
566  newPartitionLock = BufMappingPartitionLock(newHash);
567 
568  /* see if the block is in the buffer pool already */
569  LWLockAcquire(newPartitionLock, LW_SHARED);
570  buf_id = BufTableLookup(&newTag, newHash);
571  LWLockRelease(newPartitionLock);
572 
573  /* If not in buffers, initiate prefetch */
574  if (buf_id < 0)
575  {
576 #ifdef USE_PREFETCH
577  /*
578  * Try to initiate an asynchronous read. This returns false in
579  * recovery if the relation file doesn't exist.
580  */
581  if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
582  smgrprefetch(smgr_reln, forkNum, blockNum, 1))
583  {
584  result.initiated_io = true;
585  }
586 #endif /* USE_PREFETCH */
587  }
588  else
589  {
590  /*
591  * Report the buffer it was in at that time. The caller may be able
592  * to avoid a buffer table lookup, but it's not pinned and it must be
593  * rechecked!
594  */
595  result.recent_buffer = buf_id + 1;
596  }
597 
598  /*
599  * If the block *is* in buffers, we do nothing. This is not really ideal:
600  * the block might be just about to be evicted, which would be stupid
601  * since we know we are going to need it soon. But the only easy answer
602  * is to bump the usage_count, which does not seem like a great solution:
603  * when the caller does ultimately touch the block, usage_count would get
604  * bumped again, resulting in too much favoritism for blocks that are
605  * involved in a prefetch sequence. A real fix would involve some
606  * additional per-buffer state, and it's not clear that there's enough of
607  * a problem to justify that.
608  */
609 
610  return result;
611 }
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_DATA
Definition: fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:585
Buffer recent_buffer
Definition: bufmgr.h:60

References Assert, BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, RelFileLocatorBackend::locator, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rlocator, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 745 of file bufmgr.c.

746 {
747  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
748 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:792
@ RBM_NORMAL
Definition: bufmgr.h:45

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static pg_attribute_always_inline Buffer ReadBuffer_common ( Relation  rel,
SMgrRelation  smgr,
char  smgr_persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
static

Definition at line 1152 of file bufmgr.c.

1156 {
1157  ReadBuffersOperation operation;
1158  Buffer buffer;
1159  int flags;
1160 
1161  /*
1162  * Backward compatibility path, most code should use ExtendBufferedRel()
1163  * instead, as acquiring the extension lock inside ExtendBufferedRel()
1164  * scales a lot better.
1165  */
1166  if (unlikely(blockNum == P_NEW))
1167  {
1169 
1170  /*
1171  * Since no-one else can be looking at the page contents yet, there is
1172  * no difference between an exclusive lock and a cleanup-strength
1173  * lock.
1174  */
1176  flags |= EB_LOCK_FIRST;
1177 
1178  return ExtendBufferedRel(BMR_REL(rel), forkNum, strategy, flags);
1179  }
1180 
1182  mode == RBM_ZERO_AND_LOCK))
1183  {
1184  bool found;
1185 
1186  buffer = PinBufferForBlock(rel, smgr, smgr_persistence,
1187  forkNum, blockNum, strategy, &found);
1188  ZeroBuffer(buffer, mode);
1189  return buffer;
1190  }
1191 
1192  if (mode == RBM_ZERO_ON_ERROR)
1194  else
1195  flags = 0;
1196  operation.smgr = smgr;
1197  operation.rel = rel;
1198  operation.smgr_persistence = smgr_persistence;
1199  operation.forknum = forkNum;
1200  operation.strategy = strategy;
1201  if (StartReadBuffer(&operation,
1202  &buffer,
1203  blockNum,
1204  flags))
1205  WaitReadBuffers(&operation);
1206 
1207  return buffer;
1208 }
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition: bufmgr.c:1059
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:845
void WaitReadBuffers(ReadBuffersOperation *operation)
Definition: bufmgr.c:1349
static void ZeroBuffer(Buffer buffer, ReadBufferMode mode)
Definition: bufmgr.c:1019
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Definition: bufmgr.c:1321
@ READ_BUFFERS_ZERO_ON_ERROR
Definition: bufmgr.h:113
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:50
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
#define unlikely(x)
Definition: c.h:311
ForkNumber forknum
Definition: bufmgr.h:129
BufferAccessStrategy strategy
Definition: bufmgr.h:130
struct SMgrRelationData * smgr
Definition: bufmgr.h:127

References BMR_REL, PrivateRefCountEntry::buffer, EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, ExtendBufferedRel(), ReadBuffersOperation::forknum, mode, P_NEW, PinBufferForBlock(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, ReadBuffersOperation::smgr_persistence, StartReadBuffer(), ReadBuffersOperation::strategy, unlikely, WaitReadBuffers(), and ZeroBuffer().

Referenced by ExtendBufferedRelTo(), ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 792 of file bufmgr.c.

794 {
795  Buffer buf;
796 
797  /*
798  * Reject attempts to read non-local temporary relations; we would be
799  * likely to get wrong data since we have no visibility into the owning
800  * session's local buffers.
801  */
802  if (RELATION_IS_OTHER_TEMP(reln))
803  ereport(ERROR,
804  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
805  errmsg("cannot access temporary tables of other sessions")));
806 
807  /*
808  * Read the buffer, and update pgstat counters to reflect a cache hit or
809  * miss.
810  */
811  buf = ReadBuffer_common(reln, RelationGetSmgr(reln), 0,
812  forkNum, blockNum, mode, strategy);
813 
814  return buf;
815 }

References buf, ereport, errcode(), errmsg(), ERROR, mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), autoprewarm_database_main(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), brin_vacuum_scan(), bt_recheck_sibling_links(), btvacuumpage(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), fsm_readbuf(), get_raw_page_internal(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), log_newpage_range(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), spgvacuumpage(), statapprox_heap(), verify_heapam(), and vm_readbuf().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 829 of file bufmgr.c.

832 {
833  SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
834 
835  return ReadBuffer_common(NULL, smgr,
836  permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
837  forkNum, blockNum,
838  mode, strategy);
839 }

References INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 669 of file bufmgr.c.

671 {
672  BufferDesc *bufHdr;
673  BufferTag tag;
674  uint32 buf_state;
675  bool have_private_ref;
676 
677  Assert(BufferIsValid(recent_buffer));
678 
681  InitBufferTag(&tag, &rlocator, forkNum, blockNum);
682 
683  if (BufferIsLocal(recent_buffer))
684  {
685  int b = -recent_buffer - 1;
686 
687  bufHdr = GetLocalBufferDescriptor(b);
688  buf_state = pg_atomic_read_u32(&bufHdr->state);
689 
690  /* Is it still valid and holding the right tag? */
691  if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
692  {
693  PinLocalBuffer(bufHdr, true);
694 
696 
697  return true;
698  }
699  }
700  else
701  {
702  bufHdr = GetBufferDescriptor(recent_buffer - 1);
703  have_private_ref = GetPrivateRefCount(recent_buffer) > 0;
704 
705  /*
706  * Do we already have this buffer pinned with a private reference? If
707  * so, it must be valid and it is safe to check the tag without
708  * locking. If not, we have to lock the header first and then check.
709  */
710  if (have_private_ref)
711  buf_state = pg_atomic_read_u32(&bufHdr->state);
712  else
713  buf_state = LockBufHdr(bufHdr);
714 
715  if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
716  {
717  /*
718  * It's now safe to pin the buffer. We can't pin first and ask
719  * questions later, because it might confuse code paths like
720  * InvalidateBuffer() if we pinned a random non-matching buffer.
721  */
722  if (have_private_ref)
723  PinBuffer(bufHdr, NULL); /* bump pin count */
724  else
725  PinBuffer_Locked(bufHdr); /* pin for first time */
726 
728 
729  return true;
730  }
731 
732  /* If we locked the header above, now unlock. */
733  if (!have_private_ref)
734  UnlockBufHdr(bufHdr, buf_state);
735  }
736 
737  return false;
738 }
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition: localbuf.c:655

References Assert, b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), InitBufferTag(), BufferUsage::local_blks_hit, LockBufHdr(), pg_atomic_read_u32(), pgBufferUsage, PinBuffer(), PinBuffer_Locked(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by XLogReadBufferExtended().

◆ RelationCopyStorageUsingBuffer()

static void RelationCopyStorageUsingBuffer ( RelFileLocator  srclocator,
RelFileLocator  dstlocator,
ForkNumber  forkNum,
bool  permanent 
)
static

Definition at line 4633 of file bufmgr.c.

4636 {
4637  Buffer srcBuf;
4638  Buffer dstBuf;
4639  Page srcPage;
4640  Page dstPage;
4641  bool use_wal;
4642  BlockNumber nblocks;
4643  BlockNumber blkno;
4645  BufferAccessStrategy bstrategy_src;
4646  BufferAccessStrategy bstrategy_dst;
4647 
4648  /*
4649  * In general, we want to write WAL whenever wal_level > 'minimal', but we
4650  * can skip it when copying any fork of an unlogged relation other than
4651  * the init fork.
4652  */
4653  use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
4654 
4655  /* Get number of blocks in the source relation. */
4656  nblocks = smgrnblocks(smgropen(srclocator, INVALID_PROC_NUMBER),
4657  forkNum);
4658 
4659  /* Nothing to copy; just return. */
4660  if (nblocks == 0)
4661  return;
4662 
4663  /*
4664  * Bulk extend the destination relation of the same size as the source
4665  * relation before starting to copy block by block.
4666  */
4667  memset(buf.data, 0, BLCKSZ);
4668  smgrextend(smgropen(dstlocator, INVALID_PROC_NUMBER), forkNum, nblocks - 1,
4669  buf.data, true);
4670 
4671  /* This is a bulk operation, so use buffer access strategies. */
4672  bstrategy_src = GetAccessStrategy(BAS_BULKREAD);
4673  bstrategy_dst = GetAccessStrategy(BAS_BULKWRITE);
4674 
4675  /* Iterate over each block of the source relation file. */
4676  for (blkno = 0; blkno < nblocks; blkno++)
4677  {
4679 
4680  /* Read block from source relation. */
4681  srcBuf = ReadBufferWithoutRelcache(srclocator, forkNum, blkno,
4682  RBM_NORMAL, bstrategy_src,
4683  permanent);
4684  LockBuffer(srcBuf, BUFFER_LOCK_SHARE);
4685  srcPage = BufferGetPage(srcBuf);
4686 
4687  dstBuf = ReadBufferWithoutRelcache(dstlocator, forkNum, blkno,
4688  RBM_ZERO_AND_LOCK, bstrategy_dst,
4689  permanent);
4690  dstPage = BufferGetPage(dstBuf);
4691 
4693 
4694  /* Copy page data from the source to the destination. */
4695  memcpy(dstPage, srcPage, BLCKSZ);
4696  MarkBufferDirty(dstBuf);
4697 
4698  /* WAL-log the copied page. */
4699  if (use_wal)
4700  log_newpage_buffer(dstBuf, true);
4701 
4702  END_CRIT_SECTION();
4703 
4704  UnlockReleaseBuffer(dstBuf);
4705  UnlockReleaseBuffer(srcBuf);
4706  }
4707 
4708  FreeAccessStrategy(bstrategy_src);
4709  FreeAccessStrategy(bstrategy_dst);
4710 }
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4867
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2474
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
Definition: bufmgr.c:829
@ BAS_BULKREAD
Definition: bufmgr.h:36
@ BAS_BULKWRITE
Definition: bufmgr.h:38
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:681
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:535
#define XLogIsNeeded()
Definition: xlog.h:107
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237

References BAS_BULKREAD, BAS_BULKWRITE, buf, BUFFER_LOCK_SHARE, BufferGetPage(), CHECK_FOR_INTERRUPTS, END_CRIT_SECTION, FreeAccessStrategy(), GetAccessStrategy(), INIT_FORKNUM, INVALID_PROC_NUMBER, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), RBM_NORMAL, RBM_ZERO_AND_LOCK, ReadBufferWithoutRelcache(), smgrextend(), smgrnblocks(), smgropen(), START_CRIT_SECTION, UnlockReleaseBuffer(), and XLogIsNeeded.

Referenced by CreateAndCopyRelationData().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 3866 of file bufmgr.c.

3867 {
3868  if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
3869  {
3870  /*
3871  * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
3872  * tableam returns the size in bytes - but for the purpose of this
3873  * routine, we want the number of blocks. Therefore divide, rounding
3874  * up.
3875  */
3876  uint64 szbytes;
3877 
3878  szbytes = table_relation_size(relation, forkNum);
3879 
3880  return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
3881  }
3882  else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
3883  {
3884  return smgrnblocks(RelationGetSmgr(relation), forkNum);
3885  }
3886  else
3887  Assert(false);
3888 
3889  return 0; /* keep compiler quiet */
3890 }
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1878

References Assert, RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 2537 of file bufmgr.c.

2540 {
2541  ForkNumber forkNum = MAIN_FORKNUM;
2542  BufferDesc *bufHdr;
2543 
2544  if (BufferIsValid(buffer))
2545  {
2546  Assert(BufferIsPinned(buffer));
2547  if (BufferIsLocal(buffer))
2548  {
2549  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2550  if (bufHdr->tag.blockNum == blockNum &&
2551  BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
2552  BufTagGetForkNum(&bufHdr->tag) == forkNum)
2553  return buffer;
2554  UnpinLocalBuffer(buffer);
2555  }
2556  else
2557  {
2558  bufHdr = GetBufferDescriptor(buffer - 1);
2559  /* we have pin, so it's ok to examine tag without spinlock */
2560  if (bufHdr->tag.blockNum == blockNum &&
2561  BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
2562  BufTagGetForkNum(&bufHdr->tag) == forkNum)
2563  return buffer;
2564  UnpinBuffer(bufHdr);
2565  }
2566  }
2567 
2568  return ReadBuffer(relation, blockNum);
2569 }
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745
void UnpinLocalBuffer(Buffer buffer)
Definition: localbuf.c:681

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), BufferDesc::tag, UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 4850 of file bufmgr.c.

4851 {
4852  if (!BufferIsValid(buffer))
4853  elog(ERROR, "bad buffer ID: %d", buffer);
4854 
4855  if (BufferIsLocal(buffer))
4856  UnpinLocalBuffer(buffer);
4857  else
4858  UnpinBuffer(GetBufferDescriptor(buffer - 1));
4859 }

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_drop_lock_and_maybe_pin(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndBitmapHeapScan(), ExecEndIndexOnlyScan(), ExecReScanBitmapHeapScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_search(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_fetch_next_buffer(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgettup(), heapgettup_pagemode(), lazy_scan_heap(), lazy_vacuum_heap_rel(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pg_visibility_map_summary(), pgstatindex_impl(), read_stream_reset(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ ReservePrivateRefCountEntry()

static void ReservePrivateRefCountEntry ( void  )
static

Definition at line 249 of file bufmgr.c.

250 {
251  /* Already reserved (or freed), nothing to do */
252  if (ReservedRefCountEntry != NULL)
253  return;
254 
255  /*
256  * First search for a free entry the array, that'll be sufficient in the
257  * majority of cases.
258  */
259  {
260  int i;
261 
262  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
263  {
265 
267 
268  if (res->buffer == InvalidBuffer)
269  {
271  return;
272  }
273  }
274  }
275 
276  /*
277  * No luck. All array entries are full. Move one array entry into the hash
278  * table.
279  */
280  {
281  /*
282  * Move entry from the current clock position in the array into the
283  * hashtable. Use that slot.
284  */
285  PrivateRefCountEntry *hashent;
286  bool found;
287 
288  /* select victim slot */
291 
292  /* Better be used, otherwise we shouldn't get here. */
294 
295  /* enter victim array entry into hashtable */
298  HASH_ENTER,
299  &found);
300  Assert(!found);
302 
303  /* clear the now free array slot */
306 
308  }
309 }
static uint32 PrivateRefCountClock
Definition: bufmgr.c:210
@ HASH_ENTER
Definition: hsearch.h:114

References Assert, PrivateRefCountEntry::buffer, HASH_ENTER, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountClock, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, res, and ReservedRefCountEntry.

Referenced by BufferAlloc(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetPrivateRefCountEntry(), GetVictimBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ ResOwnerPrintBufferIO()

static char * ResOwnerPrintBufferIO ( Datum  res)
static

Definition at line 5978 of file bufmgr.c.

5979 {
5980  Buffer buffer = DatumGetInt32(res);
5981 
5982  return psprintf("lost track of buffer IO on buffer %d", buffer);
5983 }
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202

References PrivateRefCountEntry::buffer, DatumGetInt32(), psprintf(), and res.

◆ ResOwnerPrintBufferPin()

static char * ResOwnerPrintBufferPin ( Datum  res)
static

Definition at line 6001 of file bufmgr.c.

6002 {
6004 }

References DatumGetInt32(), DebugPrintBufferRefcount(), and res.

◆ ResOwnerReleaseBufferIO()

static void ResOwnerReleaseBufferIO ( Datum  res)
static

Definition at line 5970 of file bufmgr.c.

5971 {
5972  Buffer buffer = DatumGetInt32(res);
5973 
5974  AbortBufferIO(buffer);
5975 }
static void AbortBufferIO(Buffer buffer)
Definition: bufmgr.c:5579

References AbortBufferIO(), PrivateRefCountEntry::buffer, DatumGetInt32(), and res.

◆ ResOwnerReleaseBufferPin()

static void ResOwnerReleaseBufferPin ( Datum  res)
static

Definition at line 5986 of file bufmgr.c.

5987 {
5988  Buffer buffer = DatumGetInt32(res);
5989 
5990  /* Like ReleaseBuffer, but don't call ResourceOwnerForgetBuffer */
5991  if (!BufferIsValid(buffer))
5992  elog(ERROR, "bad buffer ID: %d", buffer);
5993 
5994  if (BufferIsLocal(buffer))
5995  UnpinLocalBufferNoOwner(buffer);
5996  else
5998 }
static void UnpinBufferNoOwner(BufferDesc *buf)
Definition: bufmgr.c:2758
void UnpinLocalBufferNoOwner(Buffer buffer)
Definition: localbuf.c:688

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), DatumGetInt32(), elog, ERROR, GetBufferDescriptor(), res, UnpinBufferNoOwner(), and UnpinLocalBufferNoOwner().

◆ rlocator_comparator()

static int rlocator_comparator ( const void *  p1,
const void *  p2 
)
static

Definition at line 5661 of file bufmgr.c.

5662 {
5663  RelFileLocator n1 = *(const RelFileLocator *) p1;
5664  RelFileLocator n2 = *(const RelFileLocator *) p2;
5665 
5666  if (n1.relNumber < n2.relNumber)
5667  return -1;
5668  else if (n1.relNumber > n2.relNumber)
5669  return 1;
5670 
5671  if (n1.dbOid < n2.dbOid)
5672  return -1;
5673  else if (n1.dbOid > n2.dbOid)
5674  return 1;
5675 
5676  if (n1.spcOid < n2.spcOid)
5677  return -1;
5678  else if (n1.spcOid > n2.spcOid)
5679  return 1;
5680  else
5681  return 0;
5682 }

References RelFileLocator::dbOid, p2, RelFileLocator::relNumber, and RelFileLocator::spcOid.

Referenced by buffertag_comparator(), DropRelationsAllBuffers(), and FlushRelationsAllBuffers().

◆ ScheduleBufferTagForWriteback()

void ScheduleBufferTagForWriteback ( WritebackContext wb_context,
IOContext  io_context,
BufferTag tag 
)

Definition at line 5842 of file bufmgr.c.

5844 {
5845  PendingWriteback *pending;
5846 
5848  return;
5849 
5850  /*
5851  * Add buffer to the pending writeback array, unless writeback control is
5852  * disabled.
5853  */
5854  if (*wb_context->max_pending > 0)
5855  {
5857 
5858  pending = &wb_context->pending_writebacks[wb_context->nr_pending++];
5859 
5860  pending->tag = *tag;
5861  }
5862 
5863  /*
5864  * Perform pending flushes if the writeback limit is exceeded. This
5865  * includes the case where previously an item has been added, but control
5866  * is now disabled.
5867  */
5868  if (wb_context->nr_pending >= *wb_context->max_pending)
5869  IssuePendingWritebacks(wb_context, io_context);
5870 }
#define WRITEBACK_MAX_PENDING_FLUSHES

References Assert, IO_DIRECT_DATA, io_direct_flags, IssuePendingWritebacks(), WritebackContext::max_pending, WritebackContext::nr_pending, WritebackContext::pending_writebacks, PendingWriteback::tag, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by GetVictimBuffer(), and SyncOneBuffer().

◆ shared_buffer_write_error_callback()

static void shared_buffer_write_error_callback ( void *  arg)
static

Definition at line 5621 of file bufmgr.c.

5622 {
5623  BufferDesc *bufHdr = (BufferDesc *) arg;
5624 
5625  /* Buffer is pinned, so we can read the tag without locking the spinlock */
5626  if (bufHdr != NULL)
5627  {
5628  char *path = relpathperm(BufTagGetRelFileLocator(&bufHdr->tag),
5629  BufTagGetForkNum(&bufHdr->tag));
5630 
5631  errcontext("writing block %u of relation %s",
5632  bufHdr->tag.blockNum, path);
5633  pfree(path);
5634  }
5635 }

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, pfree(), relpathperm, and BufferDesc::tag.

Referenced by FlushBuffer().

◆ StartBufferIO()

static bool StartBufferIO ( BufferDesc buf,
bool  forInput,
bool  nowait 
)
static

Definition at line 5485 of file bufmgr.c.

5486 {
5487  uint32 buf_state;
5488 
5490 
5491  for (;;)
5492  {
5493  buf_state = LockBufHdr(buf);
5494 
5495  if (!(buf_state & BM_IO_IN_PROGRESS))
5496  break;
5497  UnlockBufHdr(buf, buf_state);
5498  if (nowait)
5499  return false;
5500  WaitIO(buf);
5501  }
5502 
5503  /* Once we get here, there is definitely no I/O active on this buffer */
5504 
5505  if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
5506  {
5507  /* someone else already did the I/O */
5508  UnlockBufHdr(buf, buf_state);
5509  return false;
5510  }
5511 
5512  buf_state |= BM_IO_IN_PROGRESS;
5513  UnlockBufHdr(buf, buf_state);
5514 
5517 
5518  return true;
5519 }
static void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)

References BM_DIRTY, BM_IO_IN_PROGRESS, BM_VALID, buf, BufferDescriptorGetBuffer(), CurrentResourceOwner, LockBufHdr(), ResourceOwnerEnlarge(), ResourceOwnerRememberBufferIO(), UnlockBufHdr(), and WaitIO().

Referenced by ExtendBufferedRelShared(), FlushBuffer(), and WaitReadBuffersCanStartIO().

◆ StartReadBuffer()

bool StartReadBuffer ( ReadBuffersOperation operation,
Buffer buffer,
BlockNumber  blocknum,
int  flags 
)

Definition at line 1321 of file bufmgr.c.

1325 {
1326  int nblocks = 1;
1327  bool result;
1328 
1329  result = StartReadBuffersImpl(operation, buffer, blocknum, &nblocks, flags);
1330  Assert(nblocks == 1); /* single block can't be short */
1331 
1332  return result;
1333 }
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
Definition: bufmgr.c:1211

References Assert, PrivateRefCountEntry::buffer, and StartReadBuffersImpl().

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ StartReadBuffers()

bool StartReadBuffers ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int *  nblocks,
int  flags 
)

Definition at line 1306 of file bufmgr.c.

1311 {
1312  return StartReadBuffersImpl(operation, buffers, blockNum, nblocks, flags);
1313 }

References StartReadBuffersImpl().

Referenced by read_stream_start_pending_read().

◆ StartReadBuffersImpl()

static pg_attribute_always_inline bool StartReadBuffersImpl ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int *  nblocks,
int  flags 
)
static

Definition at line 1211 of file bufmgr.c.

1216 {
1217  int actual_nblocks = *nblocks;
1218  int io_buffers_len = 0;
1219 
1220  Assert(*nblocks > 0);
1221  Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
1222 
1223  for (int i = 0; i < actual_nblocks; ++i)
1224  {
1225  bool found;
1226 
1227  buffers[i] = PinBufferForBlock(operation->rel,
1228  operation->smgr,
1229  operation->smgr_persistence,
1230  operation->forknum,
1231  blockNum + i,
1232  operation->strategy,
1233  &found);
1234 
1235  if (found)
1236  {
1237  /*
1238  * Terminate the read as soon as we get a hit. It could be a
1239  * single buffer hit, or it could be a hit that follows a readable
1240  * range. We don't want to create more than one readable range,
1241  * so we stop here.
1242  */
1243  actual_nblocks = i + 1;
1244  break;
1245  }
1246  else
1247  {
1248  /* Extend the readable range to cover this block. */
1249  io_buffers_len++;
1250  }
1251  }
1252  *nblocks = actual_nblocks;
1253 
1254  if (likely(io_buffers_len == 0))
1255  return false;
1256 
1257  /* Populate information needed for I/O. */
1258  operation->buffers = buffers;
1259  operation->blocknum = blockNum;
1260  operation->flags = flags;
1261  operation->nblocks = actual_nblocks;
1262  operation->io_buffers_len = io_buffers_len;
1263 
1264  if (flags & READ_BUFFERS_ISSUE_ADVICE)
1265  {
1266  /*
1267  * In theory we should only do this if PinBufferForBlock() had to
1268  * allocate new buffers above. That way, if two calls to
1269  * StartReadBuffers() were made for the same blocks before
1270  * WaitReadBuffers(), only the first would issue the advice. That'd be
1271  * a better simulation of true asynchronous I/O, which would only
1272  * start the I/O once, but isn't done here for simplicity. Note also
1273  * that the following call might actually issue two advice calls if we
1274  * cross a segment boundary; in a true asynchronous version we might
1275  * choose to process only one real I/O at a time in that case.
1276  */
1277  smgrprefetch(operation->smgr,
1278  operation->forknum,
1279  blockNum,
1280  operation->io_buffers_len);
1281  }
1282 
1283  /* Indicate that WaitReadBuffers() should be called. */
1284  return true;
1285 }
#define MAX_IO_COMBINE_LIMIT
Definition: bufmgr.h:172
@ READ_BUFFERS_ISSUE_ADVICE
Definition: bufmgr.h:116
#define likely(x)
Definition: c.h:310
int16 io_buffers_len
Definition: bufmgr.h:141
Buffer * buffers
Definition: bufmgr.h:137
BlockNumber blocknum
Definition: bufmgr.h:138

References Assert, ReadBuffersOperation::blocknum, ReadBuffersOperation::buffers, ReadBuffersOperation::flags, ReadBuffersOperation::forknum, i, ReadBuffersOperation::io_buffers_len, likely, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, PinBufferForBlock(), READ_BUFFERS_ISSUE_ADVICE, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, ReadBuffersOperation::smgr_persistence, smgrprefetch(), and ReadBuffersOperation::strategy.

Referenced by StartReadBuffer(), and StartReadBuffers().

◆ SyncOneBuffer()

static int SyncOneBuffer ( int  buf_id,
bool  skip_recently_used,
WritebackContext wb_context 
)
static

Definition at line 3429 of file bufmgr.c.

3430 {
3431  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
3432  int result = 0;
3433  uint32 buf_state;
3434  BufferTag tag;
3435 
3436  /* Make sure we can handle the pin */
3439 
3440  /*
3441  * Check whether buffer needs writing.
3442  *
3443  * We can make this check without taking the buffer content lock so long
3444  * as we mark pages dirty in access methods *before* logging changes with
3445  * XLogInsert(): if someone marks the buffer dirty just after our check we
3446  * don't worry because our checkpoint.redo points before log record for
3447  * upcoming changes and so we are not required to write such dirty buffer.
3448  */
3449  buf_state = LockBufHdr(bufHdr);
3450 
3451  if (BUF_STATE_GET_REFCOUNT(buf_state) == 0 &&
3452  BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
3453  {
3454  result |= BUF_REUSABLE;
3455  }
3456  else if (skip_recently_used)
3457  {
3458  /* Caller told us not to write recently-used buffers */
3459  UnlockBufHdr(bufHdr, buf_state);
3460  return result;
3461  }
3462 
3463  if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
3464  {
3465  /* It's clean, so nothing to do */
3466  UnlockBufHdr(bufHdr, buf_state);
3467  return result;
3468  }
3469 
3470  /*
3471  * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
3472  * buffer is clean by the time we've locked it.)
3473  */
3474  PinBuffer_Locked(bufHdr);
3476 
3478 
3480 
3481  tag = bufHdr->tag;
3482 
3483  UnpinBuffer(bufHdr);
3484 
3485  /*
3486  * SyncOneBuffer() is only called by checkpointer and bgwriter, so
3487  * IOContext will always be IOCONTEXT_NORMAL.
3488  */
3490 
3491  return result | BUF_WRITTEN;
3492 }

References BM_DIRTY, BM_VALID, BUF_REUSABLE, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BUF_WRITTEN, BufferDescriptorGetContentLock(), CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by BgBufferSync(), and BufferSync().

◆ TerminateBufferIO()

static void TerminateBufferIO ( BufferDesc buf,
bool  clear_dirty,
uint32  set_flag_bits,
bool  forget_owner 
)
static

Definition at line 5542 of file bufmgr.c.

5544 {
5545  uint32 buf_state;
5546 
5547  buf_state = LockBufHdr(buf);
5548 
5549  Assert(buf_state & BM_IO_IN_PROGRESS);
5550 
5551  buf_state &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
5552  if (clear_dirty && !(buf_state & BM_JUST_DIRTIED))
5553  buf_state &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
5554 
5555  buf_state |= set_flag_bits;
5556  UnlockBufHdr(buf, buf_state);
5557 
5558  if (forget_owner)
5561 
5563 }
static void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
void ConditionVariableBroadcast(ConditionVariable *cv)

References Assert, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_JUST_DIRTIED, buf, BufferDescriptorGetBuffer(), BufferDescriptorGetIOCV(), ConditionVariableBroadcast(), CurrentResourceOwner, LockBufHdr(), ResourceOwnerForgetBufferIO(), and UnlockBufHdr().

Referenced by AbortBufferIO(), ExtendBufferedRelShared(), FlushBuffer(), and WaitReadBuffers().

◆ ts_ckpt_progress_comparator()

static int ts_ckpt_progress_comparator ( Datum  a,
Datum  b,
void *  arg 
)
static

Definition at line 5807 of file bufmgr.c.

5808 {
5809  CkptTsStatus *sa = (CkptTsStatus *) a;
5810  CkptTsStatus *sb = (CkptTsStatus *) b;
5811 
5812  /* we want a min-heap, so return 1 for the a < b */
5813  if (sa->progress < sb->progress)
5814  return 1;
5815  else if (sa->progress == sb->progress)
5816  return 0;
5817  else
5818  return -1;
5819 }

References a, b, and CkptTsStatus::progress.

Referenced by BufferSync().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 5057 of file bufmgr.c.

5058 {
5060 
5061  if (buf)
5062  {
5063  uint32 buf_state;
5064 
5065  buf_state = LockBufHdr(buf);
5066 
5067  /*
5068  * Don't complain if flag bit not set; it could have been reset but we
5069  * got a cancel/die interrupt before getting the signal.
5070  */
5071  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5072  buf->wait_backend_pgprocno == MyProcNumber)
5073  buf_state &= ~BM_PIN_COUNT_WAITER;
5074 
5075  UnlockBufHdr(buf, buf_state);
5076 
5077  PinCountWaitBuf = NULL;
5078  }
5079 }

References BM_PIN_COUNT_WAITER, buf, LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdr().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 4867 of file bufmgr.c.

4868 {
4869  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4870  ReleaseBuffer(buffer);
4871 }

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), do_setval(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ UnpinBuffer()

◆ UnpinBufferNoOwner()

static void UnpinBufferNoOwner ( BufferDesc buf)
static

Definition at line 2758 of file bufmgr.c.

2759 {
2760  PrivateRefCountEntry *ref;
2762 
2763  Assert(!BufferIsLocal(b));
2764 
2765  /* not moving as we're likely deleting it soon anyway */
2766  ref = GetPrivateRefCountEntry(b, false);
2767  Assert(ref != NULL);
2768  Assert(ref->refcount > 0);
2769  ref->refcount--;
2770  if (ref->refcount == 0)
2771  {
2772  uint32 buf_state;
2773  uint32 old_buf_state;
2774 
2775  /*
2776  * Mark buffer non-accessible to Valgrind.
2777  *
2778  * Note that the buffer may have already been marked non-accessible
2779  * within access method code that enforces that buffers are only
2780  * accessed while a buffer lock is held.
2781  */
2783 
2784  /* I'd better not still hold the buffer content lock */
2786 
2787  /*
2788  * Decrement the shared reference count.
2789  *
2790  * Since buffer spinlock holder can update status using just write,
2791  * it's not safe to use atomic decrement here; thus use a CAS loop.
2792  */
2793  old_buf_state = pg_atomic_read_u32(&buf->state);
2794  for (;;)
2795  {
2796  if (old_buf_state & BM_LOCKED)
2797  old_buf_state = WaitBufHdrUnlocked(buf);
2798 
2799  buf_state = old_buf_state;
2800 
2801  buf_state -= BUF_REFCOUNT_ONE;
2802 
2803  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
2804  buf_state))
2805  break;
2806  }
2807 
2808  /* Support LockBufferForCleanup() */
2809  if (buf_state & BM_PIN_COUNT_WAITER)
2810  {
2811  /*
2812  * Acquire the buffer header lock, re-check that there's a waiter.
2813  * Another backend could have unpinned this buffer, and already
2814  * woken up the waiter. There's no danger of the buffer being
2815  * replaced after we unpinned it above, as it's pinned by the
2816  * waiter.
2817  */
2818  buf_state = LockBufHdr(buf);
2819 
2820  if ((buf_state & BM_PIN_COUNT_WAITER) &&
2821  BUF_STATE_GET_REFCOUNT(buf_state) == 1)
2822  {
2823  /* we just released the last pin other than the waiter's */
2824  int wait_backend_pgprocno = buf->wait_backend_pgprocno;
2825 
2826  buf_state &= ~BM_PIN_COUNT_WAITER;
2827  UnlockBufHdr(buf, buf_state);
2828  ProcSendSignal(wait_backend_pgprocno);
2829  }
2830  else
2831  UnlockBufHdr(buf, buf_state);
2832  }
2834  }
2835 }
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
Definition: bufmgr.c:438
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition: memdebug.h:27
void ProcSendSignal(ProcNumber procNumber)
Definition: proc.c:1878

References Assert, b, BM_LOCKED, BM_PIN_COUNT_WAITER, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferIsLocal, BufHdrGetBlock, ForgetPrivateRefCountEntry(), GetPrivateRefCountEntry(), LockBufHdr(), LWLockHeldByMe(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), ProcSendSignal(), PrivateRefCountEntry::refcount, UnlockBufHdr(), VALGRIND_MAKE_MEM_NOACCESS, and WaitBufHdrUnlocked().

Referenced by ResOwnerReleaseBufferPin(), and UnpinBuffer().

◆ WaitBufHdrUnlocked()

static uint32 WaitBufHdrUnlocked ( BufferDesc buf)
static

Definition at line 5718 of file bufmgr.c.

5719 {
5720  SpinDelayStatus delayStatus;
5721  uint32 buf_state;
5722 
5723  init_local_spin_delay(&delayStatus);
5724 
5725  buf_state = pg_atomic_read_u32(&buf->state);
5726 
5727  while (buf_state & BM_LOCKED)
5728  {
5729  perform_spin_delay(&delayStatus);
5730  buf_state = pg_atomic_read_u32(&buf->state);
5731  }
5732 
5733  finish_spin_delay(&delayStatus);
5734 
5735  return buf_state;
5736 }

References BM_LOCKED, buf, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), and pg_atomic_read_u32().

Referenced by MarkBufferDirty(), PinBuffer(), and UnpinBufferNoOwner().

◆ WaitIO()

static void WaitIO ( BufferDesc buf)
static

Definition at line 5436 of file bufmgr.c.

5437 {
5439 
5441  for (;;)
5442  {
5443  uint32 buf_state;
5444 
5445  /*
5446  * It may not be necessary to acquire the spinlock to check the flag
5447  * here, but since this test is essential for correctness, we'd better
5448  * play it safe.
5449  */
5450  buf_state = LockBufHdr(buf);
5451  UnlockBufHdr(buf, buf_state);
5452 
5453  if (!(buf_state & BM_IO_IN_PROGRESS))
5454  break;
5455  ConditionVariableSleep(cv, WAIT_EVENT_BUFFER_IO);
5456  }
5458 }
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)

References BM_IO_IN_PROGRESS, buf, BufferDescriptorGetIOCV(), ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), LockBufHdr(), and UnlockBufHdr().

Referenced by InvalidateBuffer(), and StartBufferIO().

◆ WaitReadBuffers()

void WaitReadBuffers ( ReadBuffersOperation operation)

Definition at line 1349 of file bufmgr.c.

1350 {
1351  Buffer *buffers;
1352  int nblocks;
1353  BlockNumber blocknum;
1354  ForkNumber forknum;
1355  IOContext io_context;
1356  IOObject io_object;
1357  char persistence;
1358 
1359  /*
1360  * Currently operations are only allowed to include a read of some range,
1361  * with an optional extra buffer that is already pinned at the end. So
1362  * nblocks can be at most one more than io_buffers_len.
1363  */
1364  Assert((operation->nblocks == operation->io_buffers_len) ||
1365  (operation->nblocks == operation->io_buffers_len + 1));
1366 
1367  /* Find the range of the physical read we need to perform. */
1368  nblocks = operation->io_buffers_len;
1369  if (nblocks == 0)
1370  return; /* nothing to do */
1371 
1372  buffers = &operation->buffers[0];
1373  blocknum = operation->blocknum;
1374  forknum = operation->forknum;
1375 
1376  persistence = operation->rel
1377  ? operation->rel->rd_rel->relpersistence
1378  : RELPERSISTENCE_PERMANENT;
1379  if (persistence == RELPERSISTENCE_TEMP)
1380  {
1381  io_context = IOCONTEXT_NORMAL;
1382  io_object = IOOBJECT_TEMP_RELATION;
1383  }
1384  else
1385  {
1386  io_context = IOContextForStrategy(operation->strategy);
1387  io_object = IOOBJECT_RELATION;
1388  }
1389 
1390  /*
1391  * We count all these blocks as read by this backend. This is traditional
1392  * behavior, but might turn out to be not true if we find that someone
1393  * else has beaten us and completed the read of some of these blocks. In
1394  * that case the system globally double-counts, but we traditionally don't
1395  * count this as a "hit", and we don't have a separate counter for "miss,
1396  * but another backend completed the read".
1397  */
1398  if (persistence == RELPERSISTENCE_TEMP)
1399  pgBufferUsage.local_blks_read += nblocks;
1400  else
1401  pgBufferUsage.shared_blks_read += nblocks;
1402 
1403  for (int i = 0; i < nblocks; ++i)
1404  {
1405  int io_buffers_len;
1406  Buffer io_buffers[MAX_IO_COMBINE_LIMIT];
1407  void *io_pages[MAX_IO_COMBINE_LIMIT];
1408  instr_time io_start;
1409  BlockNumber io_first_block;
1410 
1411  /*
1412  * Skip this block if someone else has already completed it. If an
1413  * I/O is already in progress in another backend, this will wait for
1414  * the outcome: either done, or something went wrong and we will
1415  * retry.
1416  */
1417  if (!WaitReadBuffersCanStartIO(buffers[i], false))
1418  {
1419  /*
1420  * Report this as a 'hit' for this backend, even though it must
1421  * have started out as a miss in PinBufferForBlock().
1422  */
1423  TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + i,
1424  operation->smgr->smgr_rlocator.locator.spcOid,
1425  operation->smgr->smgr_rlocator.locator.dbOid,
1426  operation->smgr->smgr_rlocator.locator.relNumber,
1427  operation->smgr->smgr_rlocator.backend,
1428  true);
1429  continue;
1430  }
1431 
1432  /* We found a buffer that we need to read in. */
1433  io_buffers[0] = buffers[i];
1434  io_pages[0] = BufferGetBlock(buffers[i]);
1435  io_first_block = blocknum + i;
1436  io_buffers_len = 1;
1437 
1438  /*
1439  * How many neighboring-on-disk blocks can we can scatter-read into
1440  * other buffers at the same time? In this case we don't wait if we
1441  * see an I/O already in progress. We already hold BM_IO_IN_PROGRESS
1442  * for the head block, so we should get on with that I/O as soon as
1443  * possible. We'll come back to this block again, above.
1444  */
1445  while ((i + 1) < nblocks &&
1446  WaitReadBuffersCanStartIO(buffers[i + 1], true))
1447  {
1448  /* Must be consecutive block numbers. */
1449  Assert(BufferGetBlockNumber(buffers[i + 1]) ==
1450  BufferGetBlockNumber(buffers[i]) + 1);
1451 
1452  io_buffers[io_buffers_len] = buffers[++i];
1453  io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
1454  }
1455 
1457  smgrreadv(operation->smgr, forknum, io_first_block, io_pages, io_buffers_len);
1458  pgstat_count_io_op_time(io_object, io_context, IOOP_READ, io_start,
1459  io_buffers_len);
1460 
1461  /* Verify each block we read, and terminate the I/O. */
1462  for (int j = 0; j < io_buffers_len; ++j)
1463  {
1464  BufferDesc *bufHdr;
1465  Block bufBlock;
1466 
1467  if (persistence == RELPERSISTENCE_TEMP)
1468  {
1469  bufHdr = GetLocalBufferDescriptor(-io_buffers[j] - 1);
1470  bufBlock = LocalBufHdrGetBlock(bufHdr);
1471  }
1472  else
1473  {
1474  bufHdr = GetBufferDescriptor(io_buffers[j] - 1);
1475  bufBlock = BufHdrGetBlock(bufHdr);
1476  }
1477 
1478  /* check for garbage data */
1479  if (!PageIsVerifiedExtended((Page) bufBlock, io_first_block + j,
1481  {
1482  if ((operation->flags & READ_BUFFERS_ZERO_ON_ERROR) || zero_damaged_pages)
1483  {
1484  ereport(WARNING,
1486  errmsg("invalid page in block %u of relation %s; zeroing out page",
1487  io_first_block + j,
1488  relpath(operation->smgr->smgr_rlocator, forknum))));
1489  memset(bufBlock, 0, BLCKSZ);
1490  }
1491  else
1492  ereport(ERROR,
1494  errmsg("invalid page in block %u of relation %s",
1495  io_first_block + j,
1496  relpath(operation->smgr->smgr_rlocator, forknum))));
1497  }
1498 
1499  /* Terminate I/O and set BM_VALID. */
1500  if (persistence == RELPERSISTENCE_TEMP)
1501  {
1502  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
1503 
1504  buf_state |= BM_VALID;
1505  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
1506  }
1507  else
1508  {
1509  /* Set BM_VALID, terminate IO, and wake up any waiters */
1510  TerminateBufferIO(bufHdr, false, BM_VALID, true);
1511  }
1512 
1513  /* Report I/Os as completing individually. */
1514  TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, io_first_block + j,
1515  operation->smgr->smgr_rlocator.locator.spcOid,
1516  operation->smgr->smgr_rlocator.locator.dbOid,
1517  operation->smgr->smgr_rlocator.locator.relNumber,
1518  operation->smgr->smgr_rlocator.backend,
1519  false);
1520  }
1521 
1522  VacuumPageMiss += io_buffers_len;
1523  if (VacuumCostActive)
1524  VacuumCostBalance += VacuumCostPageMiss * io_buffers_len;
1525  }
1526 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3667
static bool WaitReadBuffersCanStartIO(Buffer buffer, bool nowait)
Definition: bufmgr.c:1336
bool zero_damaged_pages
Definition: bufmgr.c:139
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:375
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
#define PIV_LOG_WARNING
Definition: bufpage.h:465
#define PIV_REPORT_STAT
Definition: bufpage.h:466
int VacuumCostPageMiss
Definition: globals.c:149
int64 VacuumPageMiss
Definition: globals.c:155
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
@ IOOP_READ
Definition: pgstat.h:302
void smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: smgr.c:600
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31

References Assert, RelFileLocatorBackend::backend, ReadBuffersOperation::blocknum, BM_VALID, BufferGetBlock(), BufferGetBlockNumber(), ReadBuffersOperation::buffers, BufHdrGetBlock, RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg(), ERROR, ReadBuffersOperation::flags, ReadBuffersOperation::forknum, GetBufferDescriptor(), GetLocalBufferDescriptor(), i, ReadBuffersOperation::io_buffers_len, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_READ, j, BufferUsage::local_blks_read, LocalBufHdrGetBlock, RelFileLocatorBackend::locator, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, PageIsVerifiedExtended(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PIV_LOG_WARNING, PIV_REPORT_STAT, RelationData::rd_rel, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersOperation::rel, RelFileLocator::relNumber, relpath, BufferUsage::shared_blks_read, ReadBuffersOperation::smgr, SMgrRelationData::smgr_rlocator, smgrreadv(), RelFileLocator::spcOid, BufferDesc::state, ReadBuffersOperation::strategy, TerminateBufferIO(), track_io_timing, VacuumCostActive, VacuumCostBalance, VacuumCostPageMiss, VacuumPageMiss, WaitReadBuffersCanStartIO(), WARNING, and zero_damaged_pages.

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ WaitReadBuffersCanStartIO()

static bool WaitReadBuffersCanStartIO ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1336 of file bufmgr.c.

1337 {
1338  if (BufferIsLocal(buffer))
1339  {
1340  BufferDesc *bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1341 
1342  return (pg_atomic_read_u32(&bufHdr->state) & BM_VALID) == 0;
1343  }
1344  else
1345  return StartBufferIO(GetBufferDescriptor(buffer - 1), true, nowait);
1346 }

References BM_VALID, PrivateRefCountEntry::buffer, BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), pg_atomic_read_u32(), StartBufferIO(), and BufferDesc::state.

Referenced by WaitReadBuffers().

◆ WritebackContextInit()

void WritebackContextInit ( WritebackContext context,
int *  max_pending 
)

Definition at line 5830 of file bufmgr.c.

5831 {
5832  Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES);
5833 
5834  context->max_pending = max_pending;
5835  context->nr_pending = 0;
5836 }
tree context
Definition: radixtree.h:1833

References Assert, context, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by BackgroundWriterMain(), BufferSync(), and InitBufferPool().

◆ ZeroBuffer()

static void ZeroBuffer ( Buffer  buffer,
ReadBufferMode  mode 
)
static

Definition at line 1019 of file bufmgr.c.

1020 {
1021  BufferDesc *bufHdr;
1022  uint32 buf_state;
1023 
1025 
1026  if (BufferIsLocal(buffer))
1027  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1028  else
1029  {
1030  bufHdr = GetBufferDescriptor(buffer - 1);
1031  if (mode == RBM_ZERO_AND_LOCK)
1033  else
1034  LockBufferForCleanup(buffer);
1035  }
1036 
1037  memset(BufferGetPage(buffer), 0, BLCKSZ);
1038 
1039  if (BufferIsLocal(buffer))
1040  {
1041  buf_state = pg_atomic_read_u32(&bufHdr->state);
1042  buf_state |= BM_VALID;
1043  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
1044  }
1045  else
1046  {
1047  buf_state = LockBufHdr(bufHdr);
1048  buf_state |= BM_VALID;
1049  UnlockBufHdr(bufHdr, buf_state);
1050  }
1051 }
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5165

References Assert, BM_VALID, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), LockBuffer(), LockBufferForCleanup(), LockBufHdr(), mode, pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, BufferDesc::state, and UnlockBufHdr().

Referenced by ReadBuffer_common().

Variable Documentation

◆ backend_flush_after

int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER

Definition at line 172 of file bufmgr.c.

Referenced by InitBufferPool().

◆ bgwriter_flush_after

int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER

Definition at line 171 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

int bgwriter_lru_maxpages = 100

Definition at line 140 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

double bgwriter_lru_multiplier = 2.0

Definition at line 141 of file bufmgr.c.

Referenced by BgBufferSync().

◆ buffer_io_resowner_desc

const ResourceOwnerDesc buffer_io_resowner_desc
Initial value:
=
{
.name = "buffer io",
.release_priority = RELEASE_PRIO_BUFFER_IOS,
.ReleaseResource = ResOwnerReleaseBufferIO,
.DebugPrint = ResOwnerPrintBufferIO
}
static void ResOwnerReleaseBufferIO(Datum res)
Definition: bufmgr.c:5970
static char * ResOwnerPrintBufferIO(Datum res)
Definition: bufmgr.c:5978
#define RELEASE_PRIO_BUFFER_IOS
Definition: resowner.h:62
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition: resowner.h:54

Definition at line 225 of file bufmgr.c.

Referenced by ResourceOwnerForgetBufferIO(), and ResourceOwnerRememberBufferIO().

◆ buffer_pin_resowner_desc

const ResourceOwnerDesc buffer_pin_resowner_desc
Initial value:
=
{
.name = "buffer pin",
.release_priority = RELEASE_PRIO_BUFFER_PINS,
.ReleaseResource = ResOwnerReleaseBufferPin,
.DebugPrint = ResOwnerPrintBufferPin
}
static char * ResOwnerPrintBufferPin(Datum res)
Definition: bufmgr.c:6001
static void ResOwnerReleaseBufferPin(Datum res)
Definition: bufmgr.c:5986
#define RELEASE_PRIO_BUFFER_PINS
Definition: resowner.h:63

Definition at line 234 of file bufmgr.c.

Referenced by ResourceOwnerForgetBuffer(), and ResourceOwnerRememberBuffer().

◆ checkpoint_flush_after

int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER

Definition at line 170 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY

◆ io_combine_limit

◆ maintenance_io_concurrency

◆ PinCountWaitBuf

BufferDesc* PinCountWaitBuf = NULL
static

Definition at line 175 of file bufmgr.c.

Referenced by LockBufferForCleanup(), and UnlockBuffers().

◆ PrivateRefCountArray

◆ PrivateRefCountClock

uint32 PrivateRefCountClock = 0
static

Definition at line 210 of file bufmgr.c.

Referenced by ReservePrivateRefCountEntry().

◆ PrivateRefCountHash

HTAB* PrivateRefCountHash = NULL
static

◆ PrivateRefCountOverflowed

◆ ReservedRefCountEntry

◆ track_io_timing

◆ zero_damaged_pages

bool zero_damaged_pages = false

Definition at line 139 of file bufmgr.c.

Referenced by mdreadv(), and WaitReadBuffers().