PostgreSQL Source Code  git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include <lib/sort_template.h>
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 
struct  copy_storage_using_buffer_read_stream_private
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static BlockNumber copy_storage_using_buffer_read_stream_next_block (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 
static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static void ResOwnerReleaseBufferIO (Datum res)
 
static char * ResOwnerPrintBufferIO (Datum res)
 
static void ResOwnerReleaseBufferPin (Datum res)
 
static char * ResOwnerPrintBufferPin (Datum res)
 
static Buffer ReadBuffer_common (Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static BlockNumber ExtendBufferedRelCommon (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static BlockNumber ExtendBufferedRelShared (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void UnpinBufferNoOwner (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput, bool nowait)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner)
 
static void AbortBufferIO (Buffer buffer)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
 
static Buffer GetVictimBuffer (BufferAccessStrategy strategy, IOContext io_context)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
static void ZeroAndLockBuffer (Buffer buffer, ReadBufferMode mode, bool already_valid)
 
static pg_attribute_always_inline Buffer PinBufferForBlock (Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static pg_attribute_always_inline bool StartReadBuffersImpl (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
static bool WaitReadBuffersCanStartIO (Buffer buffer, bool nowait)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
static void InvalidateBuffer (BufferDesc *buf)
 
static bool InvalidateVictimBuffer (BufferDesc *buf_hdr)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
bool BufferIsExclusiveLocked (Buffer buffer)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
char * DebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *wb_context, IOContext io_context)
 
bool EvictUnpinnedBuffer (Buffer buf)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int io_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 
const ResourceOwnerDesc buffer_io_resowner_desc
 
const ResourceOwnerDesc buffer_pin_resowner_desc
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 87 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 77 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 76 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 69 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:443
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
int32 * LocalRefCount
Definition: localbuf.c:46

Definition at line 501 of file bufmgr.c.

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 68 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 72 of file bufmgr.c.

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 96 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 79 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 5960 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 5960 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 5962 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 5962 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 5959 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 5959 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 5961 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 5961 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 5958 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 5958 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

static void AbortBufferIO ( Buffer  buffer)
static

Definition at line 5665 of file bufmgr.c.

5666 {
5667  BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1);
5668  uint32 buf_state;
5669 
5670  buf_state = LockBufHdr(buf_hdr);
5671  Assert(buf_state & (BM_IO_IN_PROGRESS | BM_TAG_VALID));
5672 
5673  if (!(buf_state & BM_VALID))
5674  {
5675  Assert(!(buf_state & BM_DIRTY));
5676  UnlockBufHdr(buf_hdr, buf_state);
5677  }
5678  else
5679  {
5680  Assert(buf_state & BM_DIRTY);
5681  UnlockBufHdr(buf_hdr, buf_state);
5682 
5683  /* Issue notice if this is not the first failure... */
5684  if (buf_state & BM_IO_ERROR)
5685  {
5686  /* Buffer is pinned, so we can read tag without spinlock */
5687  char *path;
5688 
5689  path = relpathperm(BufTagGetRelFileLocator(&buf_hdr->tag),
5690  BufTagGetForkNum(&buf_hdr->tag));
5691  ereport(WARNING,
5692  (errcode(ERRCODE_IO_ERROR),
5693  errmsg("could not write block %u of %s",
5694  buf_hdr->tag.blockNum, path),
5695  errdetail("Multiple failures --- write error might be permanent.")));
5696  pfree(path);
5697  }
5698  }
5699 
5700  TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false);
5701 }
#define BM_TAG_VALID
Definition: buf_internals.h:63
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static BufferDesc * GetBufferDescriptor(uint32 id)
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
#define BM_DIRTY
Definition: buf_internals.h:61
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:64
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
Definition: buf_internals.h:62
#define BM_IO_ERROR
Definition: buf_internals.h:65
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner)
Definition: bufmgr.c:5628
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:5774
unsigned int uint32
Definition: c.h:506
#define Assert(condition)
Definition: c.h:858
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define WARNING
Definition: elog.h:36
#define ereport(elevel,...)
Definition: elog.h:149
void pfree(void *pointer)
Definition: mcxt.c:1521
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90
BufferTag tag
BlockNumber blockNum
Definition: buf_internals.h:98

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), GetBufferDescriptor(), LockBufHdr(), pfree(), relpathperm, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by ResOwnerReleaseBufferIO().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 3571 of file bufmgr.c.

3572 {
3574 
3575  AtEOXact_LocalBuffers(isCommit);
3576 
3578 }
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:3631
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:237
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:819

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 3613 of file bufmgr.c.

3614 {
3615  UnlockBuffers();
3616 
3618 
3619  /* localbuf.c needs a chance too */
3621 }
void UnlockBuffers(void)
Definition: bufmgr.c:5143
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:830

References AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3200 of file bufmgr.c.

3201 {
3202  /* info obtained from freelist.c */
3203  int strategy_buf_id;
3204  uint32 strategy_passes;
3205  uint32 recent_alloc;
3206 
3207  /*
3208  * Information saved between calls so we can determine the strategy
3209  * point's advance rate and avoid scanning already-cleaned buffers.
3210  */
3211  static bool saved_info_valid = false;
3212  static int prev_strategy_buf_id;
3213  static uint32 prev_strategy_passes;
3214  static int next_to_clean;
3215  static uint32 next_passes;
3216 
3217  /* Moving averages of allocation rate and clean-buffer density */
3218  static float smoothed_alloc = 0;
3219  static float smoothed_density = 10.0;
3220 
3221  /* Potentially these could be tunables, but for now, not */
3222  float smoothing_samples = 16;
3223  float scan_whole_pool_milliseconds = 120000.0;
3224 
3225  /* Used to compute how far we scan ahead */
3226  long strategy_delta;
3227  int bufs_to_lap;
3228  int bufs_ahead;
3229  float scans_per_alloc;
3230  int reusable_buffers_est;
3231  int upcoming_alloc_est;
3232  int min_scan_buffers;
3233 
3234  /* Variables for the scanning loop proper */
3235  int num_to_scan;
3236  int num_written;
3237  int reusable_buffers;
3238 
3239  /* Variables for final smoothed_density update */
3240  long new_strategy_delta;
3241  uint32 new_recent_alloc;
3242 
3243  /*
3244  * Find out where the freelist clock sweep currently is, and how many
3245  * buffer allocations have happened since our last call.
3246  */
3247  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
3248 
3249  /* Report buffer alloc counts to pgstat */
3250  PendingBgWriterStats.buf_alloc += recent_alloc;
3251 
3252  /*
3253  * If we're not running the LRU scan, just stop after doing the stats
3254  * stuff. We mark the saved state invalid so that we can recover sanely
3255  * if LRU scan is turned back on later.
3256  */
3257  if (bgwriter_lru_maxpages <= 0)
3258  {
3259  saved_info_valid = false;
3260  return true;
3261  }
3262 
3263  /*
3264  * Compute strategy_delta = how many buffers have been scanned by the
3265  * clock sweep since last time. If first time through, assume none. Then
3266  * see if we are still ahead of the clock sweep, and if so, how many
3267  * buffers we could scan before we'd catch up with it and "lap" it. Note:
3268  * weird-looking coding of xxx_passes comparisons are to avoid bogus
3269  * behavior when the passes counts wrap around.
3270  */
3271  if (saved_info_valid)
3272  {
3273  int32 passes_delta = strategy_passes - prev_strategy_passes;
3274 
3275  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3276  strategy_delta += (long) passes_delta * NBuffers;
3277 
3278  Assert(strategy_delta >= 0);
3279 
3280  if ((int32) (next_passes - strategy_passes) > 0)
3281  {
3282  /* we're one pass ahead of the strategy point */
3283  bufs_to_lap = strategy_buf_id - next_to_clean;
3284 #ifdef BGW_DEBUG
3285  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3286  next_passes, next_to_clean,
3287  strategy_passes, strategy_buf_id,
3288  strategy_delta, bufs_to_lap);
3289 #endif
3290  }
3291  else if (next_passes == strategy_passes &&
3292  next_to_clean >= strategy_buf_id)
3293  {
3294  /* on same pass, but ahead or at least not behind */
3295  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
3296 #ifdef BGW_DEBUG
3297  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3298  next_passes, next_to_clean,
3299  strategy_passes, strategy_buf_id,
3300  strategy_delta, bufs_to_lap);
3301 #endif
3302  }
3303  else
3304  {
3305  /*
3306  * We're behind, so skip forward to the strategy point and start
3307  * cleaning from there.
3308  */
3309 #ifdef BGW_DEBUG
3310  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3311  next_passes, next_to_clean,
3312  strategy_passes, strategy_buf_id,
3313  strategy_delta);
3314 #endif
3315  next_to_clean = strategy_buf_id;
3316  next_passes = strategy_passes;
3317  bufs_to_lap = NBuffers;
3318  }
3319  }
3320  else
3321  {
3322  /*
3323  * Initializing at startup or after LRU scanning had been off. Always
3324  * start at the strategy point.
3325  */
3326 #ifdef BGW_DEBUG
3327  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3328  strategy_passes, strategy_buf_id);
3329 #endif
3330  strategy_delta = 0;
3331  next_to_clean = strategy_buf_id;
3332  next_passes = strategy_passes;
3333  bufs_to_lap = NBuffers;
3334  }
3335 
3336  /* Update saved info for next time */
3337  prev_strategy_buf_id = strategy_buf_id;
3338  prev_strategy_passes = strategy_passes;
3339  saved_info_valid = true;
3340 
3341  /*
3342  * Compute how many buffers had to be scanned for each new allocation, ie,
3343  * 1/density of reusable buffers, and track a moving average of that.
3344  *
3345  * If the strategy point didn't move, we don't update the density estimate
3346  */
3347  if (strategy_delta > 0 && recent_alloc > 0)
3348  {
3349  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
3350  smoothed_density += (scans_per_alloc - smoothed_density) /
3351  smoothing_samples;
3352  }
3353 
3354  /*
3355  * Estimate how many reusable buffers there are between the current
3356  * strategy point and where we've scanned ahead to, based on the smoothed
3357  * density estimate.
3358  */
3359  bufs_ahead = NBuffers - bufs_to_lap;
3360  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3361 
3362  /*
3363  * Track a moving average of recent buffer allocations. Here, rather than
3364  * a true average we want a fast-attack, slow-decline behavior: we
3365  * immediately follow any increase.
3366  */
3367  if (smoothed_alloc <= (float) recent_alloc)
3368  smoothed_alloc = recent_alloc;
3369  else
3370  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3371  smoothing_samples;
3372 
3373  /* Scale the estimate by a GUC to allow more aggressive tuning. */
3374  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
3375 
3376  /*
3377  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3378  * eventually underflow to zero, and the underflows produce annoying
3379  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3380  * zero, there's no point in tracking smaller and smaller values of
3381  * smoothed_alloc, so just reset it to exactly zero to avoid this
3382  * syndrome. It will pop back up as soon as recent_alloc increases.
3383  */
3384  if (upcoming_alloc_est == 0)
3385  smoothed_alloc = 0;
3386 
3387  /*
3388  * Even in cases where there's been little or no buffer allocation
3389  * activity, we want to make a small amount of progress through the buffer
3390  * cache so that as many reusable buffers as possible are clean after an
3391  * idle period.
3392  *
3393  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3394  * the BGW will be called during the scan_whole_pool time; slice the
3395  * buffer pool into that many sections.
3396  */
3397  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3398 
3399  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3400  {
3401 #ifdef BGW_DEBUG
3402  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3403  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3404 #endif
3405  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3406  }
3407 
3408  /*
3409  * Now write out dirty reusable buffers, working forward from the
3410  * next_to_clean point, until we have lapped the strategy scan, or cleaned
3411  * enough buffers to match our estimate of the next cycle's allocation
3412  * requirements, or hit the bgwriter_lru_maxpages limit.
3413  */
3414 
3415  num_to_scan = bufs_to_lap;
3416  num_written = 0;
3417  reusable_buffers = reusable_buffers_est;
3418 
3419  /* Execute the LRU scan */
3420  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3421  {
3422  int sync_state = SyncOneBuffer(next_to_clean, true,
3423  wb_context);
3424 
3425  if (++next_to_clean >= NBuffers)
3426  {
3427  next_to_clean = 0;
3428  next_passes++;
3429  }
3430  num_to_scan--;
3431 
3432  if (sync_state & BUF_WRITTEN)
3433  {
3434  reusable_buffers++;
3435  if (++num_written >= bgwriter_lru_maxpages)
3436  {
3438  break;
3439  }
3440  }
3441  else if (sync_state & BUF_REUSABLE)
3442  reusable_buffers++;
3443  }
3444 
3445  PendingBgWriterStats.buf_written_clean += num_written;
3446 
3447 #ifdef BGW_DEBUG
3448  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3449  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3450  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3451  bufs_to_lap - num_to_scan,
3452  num_written,
3453  reusable_buffers - reusable_buffers_est);
3454 #endif
3455 
3456  /*
3457  * Consider the above scan as being like a new allocation scan.
3458  * Characterize its density and update the smoothed one based on it. This
3459  * effectively halves the moving average period in cases where both the
3460  * strategy and the background writer are doing some useful scanning,
3461  * which is helpful because a long memory isn't as desirable on the
3462  * density estimates.
3463  */
3464  new_strategy_delta = bufs_to_lap - num_to_scan;
3465  new_recent_alloc = reusable_buffers - reusable_buffers_est;
3466  if (new_strategy_delta > 0 && new_recent_alloc > 0)
3467  {
3468  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3469  smoothed_density += (scans_per_alloc - smoothed_density) /
3470  smoothing_samples;
3471 
3472 #ifdef BGW_DEBUG
3473  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3474  new_recent_alloc, new_strategy_delta,
3475  scans_per_alloc, smoothed_density);
3476 #endif
3477  }
3478 
3479  /* Return true if OK to hibernate */
3480  return (bufs_to_lap == 0 && recent_alloc == 0);
3481 }
int BgWriterDelay
Definition: bgwriter.c:57
#define BUF_REUSABLE
Definition: bufmgr.c:77
double bgwriter_lru_multiplier
Definition: bufmgr.c:169
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:3498
int bgwriter_lru_maxpages
Definition: bufmgr.c:168
#define BUF_WRITTEN
Definition: bufmgr.c:76
signed int int32
Definition: c.h:494
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:224
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:394
int NBuffers
Definition: globals.c:140
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition: pgstat.h:255
PgStat_Counter maxwritten_clean
Definition: pgstat.h:256
PgStat_Counter buf_alloc
Definition: pgstat.h:257

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ BufferAlloc()

static pg_attribute_always_inline BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr,
IOContext  io_context 
)
inlinestatic

Definition at line 1617 of file bufmgr.c.

1621 {
1622  BufferTag newTag; /* identity of requested block */
1623  uint32 newHash; /* hash value for newTag */
1624  LWLock *newPartitionLock; /* buffer partition lock for it */
1625  int existing_buf_id;
1626  Buffer victim_buffer;
1627  BufferDesc *victim_buf_hdr;
1628  uint32 victim_buf_state;
1629 
1630  /* Make sure we will have room to remember the buffer pin */
1633 
1634  /* create a tag so we can lookup the buffer */
1635  InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
1636 
1637  /* determine its hash code and partition lock ID */
1638  newHash = BufTableHashCode(&newTag);
1639  newPartitionLock = BufMappingPartitionLock(newHash);
1640 
1641  /* see if the block is in the buffer pool already */
1642  LWLockAcquire(newPartitionLock, LW_SHARED);
1643  existing_buf_id = BufTableLookup(&newTag, newHash);
1644  if (existing_buf_id >= 0)
1645  {
1646  BufferDesc *buf;
1647  bool valid;
1648 
1649  /*
1650  * Found it. Now, pin the buffer so no one can steal it from the
1651  * buffer pool, and check to see if the correct data has been loaded
1652  * into the buffer.
1653  */
1654  buf = GetBufferDescriptor(existing_buf_id);
1655 
1656  valid = PinBuffer(buf, strategy);
1657 
1658  /* Can release the mapping lock as soon as we've pinned it */
1659  LWLockRelease(newPartitionLock);
1660 
1661  *foundPtr = true;
1662 
1663  if (!valid)
1664  {
1665  /*
1666  * We can only get here if (a) someone else is still reading in
1667  * the page, (b) a previous read attempt failed, or (c) someone
1668  * called StartReadBuffers() but not yet WaitReadBuffers().
1669  */
1670  *foundPtr = false;
1671  }
1672 
1673  return buf;
1674  }
1675 
1676  /*
1677  * Didn't find it in the buffer pool. We'll have to initialize a new
1678  * buffer. Remember to unlock the mapping lock while doing the work.
1679  */
1680  LWLockRelease(newPartitionLock);
1681 
1682  /*
1683  * Acquire a victim buffer. Somebody else might try to do the same, we
1684  * don't hold any conflicting locks. If so we'll have to undo our work
1685  * later.
1686  */
1687  victim_buffer = GetVictimBuffer(strategy, io_context);
1688  victim_buf_hdr = GetBufferDescriptor(victim_buffer - 1);
1689 
1690  /*
1691  * Try to make a hashtable entry for the buffer under its new tag. If
1692  * somebody else inserted another buffer for the tag, we'll release the
1693  * victim buffer we acquired and use the already inserted one.
1694  */
1695  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1696  existing_buf_id = BufTableInsert(&newTag, newHash, victim_buf_hdr->buf_id);
1697  if (existing_buf_id >= 0)
1698  {
1699  BufferDesc *existing_buf_hdr;
1700  bool valid;
1701 
1702  /*
1703  * Got a collision. Someone has already done what we were about to do.
1704  * We'll just handle this as if it were found in the buffer pool in
1705  * the first place. First, give up the buffer we were planning to
1706  * use.
1707  *
1708  * We could do this after releasing the partition lock, but then we'd
1709  * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry()
1710  * before acquiring the lock, for the rare case of such a collision.
1711  */
1712  UnpinBuffer(victim_buf_hdr);
1713 
1714  /*
1715  * The victim buffer we acquired previously is clean and unused, let
1716  * it be found again quickly
1717  */
1718  StrategyFreeBuffer(victim_buf_hdr);
1719 
1720  /* remaining code should match code at top of routine */
1721 
1722  existing_buf_hdr = GetBufferDescriptor(existing_buf_id);
1723 
1724  valid = PinBuffer(existing_buf_hdr, strategy);
1725 
1726  /* Can release the mapping lock as soon as we've pinned it */
1727  LWLockRelease(newPartitionLock);
1728 
1729  *foundPtr = true;
1730 
1731  if (!valid)
1732  {
1733  /*
1734  * We can only get here if (a) someone else is still reading in
1735  * the page, (b) a previous read attempt failed, or (c) someone
1736  * called StartReadBuffers() but not yet WaitReadBuffers().
1737  */
1738  *foundPtr = false;
1739  }
1740 
1741  return existing_buf_hdr;
1742  }
1743 
1744  /*
1745  * Need to lock the buffer header too in order to change its tag.
1746  */
1747  victim_buf_state = LockBufHdr(victim_buf_hdr);
1748 
1749  /* some sanity checks while we hold the buffer header lock */
1750  Assert(BUF_STATE_GET_REFCOUNT(victim_buf_state) == 1);
1751  Assert(!(victim_buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY | BM_IO_IN_PROGRESS)));
1752 
1753  victim_buf_hdr->tag = newTag;
1754 
1755  /*
1756  * Make sure BM_PERMANENT is set for buffers that must be written at every
1757  * checkpoint. Unlogged buffers only need to be written at shutdown
1758  * checkpoints, except for their "init" forks, which need to be treated
1759  * just like permanent relations.
1760  */
1761  victim_buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1762  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1763  victim_buf_state |= BM_PERMANENT;
1764 
1765  UnlockBufHdr(victim_buf_hdr, victim_buf_state);
1766 
1767  LWLockRelease(newPartitionLock);
1768 
1769  /*
1770  * Buffer contents are currently invalid.
1771  */
1772  *foundPtr = false;
1773 
1774  return victim_buf_hdr;
1775 }
int Buffer
Definition: buf.h:23
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_PERMANENT
Definition: buf_internals.h:69
static LWLock * BufMappingPartitionLock(uint32 hashcode)
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:46
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:51
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:78
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:118
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:2664
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
Definition: bufmgr.c:1961
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:277
static void UnpinBuffer(BufferDesc *buf)
Definition: bufmgr.c:2818
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:363
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
static char * buf
Definition: pg_test_fsync.c:73
@ INIT_FORKNUM
Definition: relpath.h:53
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition: resowner.c:442
Definition: lwlock.h:42
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), CurrentResourceOwner, GetBufferDescriptor(), GetVictimBuffer(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrRelationData::smgr_rlocator, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by PinBufferForBlock().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 3736 of file bufmgr.c.

3737 {
3738  BufferDesc *bufHdr;
3739 
3740  Assert(BufferIsPinned(buffer));
3741 
3742  if (BufferIsLocal(buffer))
3743  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3744  else
3745  bufHdr = GetBufferDescriptor(buffer - 1);
3746 
3747  /* pinned, so OK to read tag without spinlock */
3748  return bufHdr->tag.blockNum;
3749 }
#define BufferIsLocal(buffer)
Definition: buf.h:37
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:501

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_page_prune_and_freeze(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_noprune(), lazy_scan_prune(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and WaitReadBuffers().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 3997 of file bufmgr.c.

3998 {
3999  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
4000  char *page = BufferGetPage(buffer);
4001  XLogRecPtr lsn;
4002  uint32 buf_state;
4003 
4004  /*
4005  * If we don't need locking for correctness, fastpath out.
4006  */
4007  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
4008  return PageGetLSN(page);
4009 
4010  /* Make sure we've got a real buffer, and that we hold a pin on it. */
4011  Assert(BufferIsValid(buffer));
4012  Assert(BufferIsPinned(buffer));
4013 
4014  buf_state = LockBufHdr(bufHdr);
4015  lsn = PageGetLSN(page);
4016  UnlockBufHdr(bufHdr, buf_state);
4017 
4018  return lsn;
4019 }
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:386
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 3757 of file bufmgr.c.

3759 {
3760  BufferDesc *bufHdr;
3761 
3762  /* Do the same checks as BufferGetBlockNumber. */
3763  Assert(BufferIsPinned(buffer));
3764 
3765  if (BufferIsLocal(buffer))
3766  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3767  else
3768  bufHdr = GetBufferDescriptor(buffer - 1);
3769 
3770  /* pinned, so OK to read tag without spinlock */
3771  *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
3772  *forknum = BufTagGetForkNum(&bufHdr->tag);
3773  *blknum = bufHdr->tag.blockNum;
3774 }

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 2511 of file bufmgr.c.

2512 {
2513  BufferDesc *bufHdr;
2514 
2515  if (BufferIsLocal(buffer))
2516  {
2517  int bufid = -buffer - 1;
2518 
2519  bufHdr = GetLocalBufferDescriptor(bufid);
2520  }
2521  else
2522  {
2523  bufHdr = GetBufferDescriptor(buffer - 1);
2524  }
2525 
2526  Assert(BufferIsPinned(buffer));
2528  LW_EXCLUSIVE));
2529 
2530  return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
2531 }
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:232
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1937
pg_atomic_uint32 state

References Assert, BM_DIRTY, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by XLogRegisterBuffer().

◆ BufferIsExclusiveLocked()

bool BufferIsExclusiveLocked ( Buffer  buffer)

Definition at line 2482 of file bufmgr.c.

2483 {
2484  BufferDesc *bufHdr;
2485 
2486  if (BufferIsLocal(buffer))
2487  {
2488  int bufid = -buffer - 1;
2489 
2490  bufHdr = GetLocalBufferDescriptor(bufid);
2491  }
2492  else
2493  {
2494  bufHdr = GetBufferDescriptor(buffer - 1);
2495  }
2496 
2497  Assert(BufferIsPinned(buffer));
2499  LW_EXCLUSIVE);
2500 }

References Assert, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), LW_EXCLUSIVE, and LWLockHeldByMeInMode().

Referenced by XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 3967 of file bufmgr.c.

3968 {
3969  BufferDesc *bufHdr;
3970 
3971  /* Local buffers are used only for temp relations. */
3972  if (BufferIsLocal(buffer))
3973  return false;
3974 
3975  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3976  Assert(BufferIsValid(buffer));
3977  Assert(BufferIsPinned(buffer));
3978 
3979  /*
3980  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
3981  * need not bother with the buffer header spinlock. Even if someone else
3982  * changes the buffer header state while we're doing this, the state is
3983  * changed atomically, so we'll read the old value or the new value, but
3984  * not random garbage.
3985  */
3986  bufHdr = GetBufferDescriptor(buffer - 1);
3987  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
3988 }

References Assert, BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 2924 of file bufmgr.c.

2925 {
2926  uint32 buf_state;
2927  int buf_id;
2928  int num_to_scan;
2929  int num_spaces;
2930  int num_processed;
2931  int num_written;
2932  CkptTsStatus *per_ts_stat = NULL;
2933  Oid last_tsid;
2934  binaryheap *ts_heap;
2935  int i;
2936  int mask = BM_DIRTY;
2937  WritebackContext wb_context;
2938 
2939  /*
2940  * Unless this is a shutdown checkpoint or we have been explicitly told,
2941  * we write only permanent, dirty buffers. But at shutdown or end of
2942  * recovery, we write all dirty buffers.
2943  */
2946  mask |= BM_PERMANENT;
2947 
2948  /*
2949  * Loop over all buffers, and mark the ones that need to be written with
2950  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
2951  * can estimate how much work needs to be done.
2952  *
2953  * This allows us to write only those pages that were dirty when the
2954  * checkpoint began, and not those that get dirtied while it proceeds.
2955  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
2956  * later in this function, or by normal backends or the bgwriter cleaning
2957  * scan, the flag is cleared. Any buffer dirtied after this point won't
2958  * have the flag set.
2959  *
2960  * Note that if we fail to write some buffer, we may leave buffers with
2961  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
2962  * certainly need to be written for the next checkpoint attempt, too.
2963  */
2964  num_to_scan = 0;
2965  for (buf_id = 0; buf_id < NBuffers; buf_id++)
2966  {
2967  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
2968 
2969  /*
2970  * Header spinlock is enough to examine BM_DIRTY, see comment in
2971  * SyncOneBuffer.
2972  */
2973  buf_state = LockBufHdr(bufHdr);
2974 
2975  if ((buf_state & mask) == mask)
2976  {
2977  CkptSortItem *item;
2978 
2979  buf_state |= BM_CHECKPOINT_NEEDED;
2980 
2981  item = &CkptBufferIds[num_to_scan++];
2982  item->buf_id = buf_id;
2983  item->tsId = bufHdr->tag.spcOid;
2984  item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
2985  item->forkNum = BufTagGetForkNum(&bufHdr->tag);
2986  item->blockNum = bufHdr->tag.blockNum;
2987  }
2988 
2989  UnlockBufHdr(bufHdr, buf_state);
2990 
2991  /* Check for barrier events in case NBuffers is large. */
2994  }
2995 
2996  if (num_to_scan == 0)
2997  return; /* nothing to do */
2998 
3000 
3001  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
3002 
3003  /*
3004  * Sort buffers that need to be written to reduce the likelihood of random
3005  * IO. The sorting is also important for the implementation of balancing
3006  * writes between tablespaces. Without balancing writes we'd potentially
3007  * end up writing to the tablespaces one-by-one; possibly overloading the
3008  * underlying system.
3009  */
3010  sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
3011 
3012  num_spaces = 0;
3013 
3014  /*
3015  * Allocate progress status for each tablespace with buffers that need to
3016  * be flushed. This requires the to-be-flushed array to be sorted.
3017  */
3018  last_tsid = InvalidOid;
3019  for (i = 0; i < num_to_scan; i++)
3020  {
3021  CkptTsStatus *s;
3022  Oid cur_tsid;
3023 
3024  cur_tsid = CkptBufferIds[i].tsId;
3025 
3026  /*
3027  * Grow array of per-tablespace status structs, every time a new
3028  * tablespace is found.
3029  */
3030  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
3031  {
3032  Size sz;
3033 
3034  num_spaces++;
3035 
3036  /*
3037  * Not worth adding grow-by-power-of-2 logic here - even with a
3038  * few hundred tablespaces this should be fine.
3039  */
3040  sz = sizeof(CkptTsStatus) * num_spaces;
3041 
3042  if (per_ts_stat == NULL)
3043  per_ts_stat = (CkptTsStatus *) palloc(sz);
3044  else
3045  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
3046 
3047  s = &per_ts_stat[num_spaces - 1];
3048  memset(s, 0, sizeof(*s));
3049  s->tsId = cur_tsid;
3050 
3051  /*
3052  * The first buffer in this tablespace. As CkptBufferIds is sorted
3053  * by tablespace all (s->num_to_scan) buffers in this tablespace
3054  * will follow afterwards.
3055  */
3056  s->index = i;
3057 
3058  /*
3059  * progress_slice will be determined once we know how many buffers
3060  * are in each tablespace, i.e. after this loop.
3061  */
3062 
3063  last_tsid = cur_tsid;
3064  }
3065  else
3066  {
3067  s = &per_ts_stat[num_spaces - 1];
3068  }
3069 
3070  s->num_to_scan++;
3071 
3072  /* Check for barrier events. */
3075  }
3076 
3077  Assert(num_spaces > 0);
3078 
3079  /*
3080  * Build a min-heap over the write-progress in the individual tablespaces,
3081  * and compute how large a portion of the total progress a single
3082  * processed buffer is.
3083  */
3084  ts_heap = binaryheap_allocate(num_spaces,
3086  NULL);
3087 
3088  for (i = 0; i < num_spaces; i++)
3089  {
3090  CkptTsStatus *ts_stat = &per_ts_stat[i];
3091 
3092  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
3093 
3094  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
3095  }
3096 
3097  binaryheap_build(ts_heap);
3098 
3099  /*
3100  * Iterate through to-be-checkpointed buffers and write the ones (still)
3101  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
3102  * tablespaces; otherwise the sorting would lead to only one tablespace
3103  * receiving writes at a time, making inefficient use of the hardware.
3104  */
3105  num_processed = 0;
3106  num_written = 0;
3107  while (!binaryheap_empty(ts_heap))
3108  {
3109  BufferDesc *bufHdr = NULL;
3110  CkptTsStatus *ts_stat = (CkptTsStatus *)
3112 
3113  buf_id = CkptBufferIds[ts_stat->index].buf_id;
3114  Assert(buf_id != -1);
3115 
3116  bufHdr = GetBufferDescriptor(buf_id);
3117 
3118  num_processed++;
3119 
3120  /*
3121  * We don't need to acquire the lock here, because we're only looking
3122  * at a single bit. It's possible that someone else writes the buffer
3123  * and clears the flag right after we check, but that doesn't matter
3124  * since SyncOneBuffer will then do nothing. However, there is a
3125  * further race condition: it's conceivable that between the time we
3126  * examine the bit here and the time SyncOneBuffer acquires the lock,
3127  * someone else not only wrote the buffer but replaced it with another
3128  * page and dirtied it. In that improbable case, SyncOneBuffer will
3129  * write the buffer though we didn't need to. It doesn't seem worth
3130  * guarding against this, though.
3131  */
3133  {
3134  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
3135  {
3136  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
3138  num_written++;
3139  }
3140  }
3141 
3142  /*
3143  * Measure progress independent of actually having to flush the buffer
3144  * - otherwise writing become unbalanced.
3145  */
3146  ts_stat->progress += ts_stat->progress_slice;
3147  ts_stat->num_scanned++;
3148  ts_stat->index++;
3149 
3150  /* Have all the buffers from the tablespace been processed? */
3151  if (ts_stat->num_scanned == ts_stat->num_to_scan)
3152  {
3153  binaryheap_remove_first(ts_heap);
3154  }
3155  else
3156  {
3157  /* update heap with the new progress */
3158  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
3159  }
3160 
3161  /*
3162  * Sleep to throttle our I/O rate.
3163  *
3164  * (This will check for barrier events even if it doesn't sleep.)
3165  */
3166  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
3167  }
3168 
3169  /*
3170  * Issue all pending flushes. Only checkpointer calls BufferSync(), so
3171  * IOContext will always be IOCONTEXT_NORMAL.
3172  */
3174 
3175  pfree(per_ts_stat);
3176  per_ts_stat = NULL;
3177  binaryheap_free(ts_heap);
3178 
3179  /*
3180  * Update checkpoint statistics. As noted above, this doesn't include
3181  * buffers written by other backends or bgwriter scan.
3182  */
3183  CheckpointStats.ckpt_bufs_written += num_written;
3184 
3185  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
3186 }
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:138
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:255
bh_node_type binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:177
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:192
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:39
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:75
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:116
#define binaryheap_empty(h)
Definition: binaryheap.h:65
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:68
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:5893
int checkpoint_flush_after
Definition: bufmgr.c:198
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:5916
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
Definition: bufmgr.c:5973
struct CkptTsStatus CkptTsStatus
double float8
Definition: c.h:630
size_t Size
Definition: c.h:605
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:711
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:39
int i
Definition: isn.c:73
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
@ IOCONTEXT_NORMAL
Definition: pgstat.h:290
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:464
int ckpt_bufs_written
Definition: xlog.h:167
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition: bufmgr.c:115
int index
Definition: bufmgr.c:123
int num_scanned
Definition: bufmgr.c:120
float8 progress
Definition: bufmgr.c:114
int num_to_scan
Definition: bufmgr.c:118
Oid tsId
Definition: bufmgr.c:105
PgStat_Counter buffers_written
Definition: pgstat.h:270
Oid spcOid
Definition: buf_internals.h:94
CheckpointStatsData CheckpointStats
Definition: xlog.c:207
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:143
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139

References Assert, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buffers_written, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, DatumGetPointer(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IOCONTEXT_NORMAL, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u32(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), buftag::spcOid, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 5828 of file bufmgr.c.

5829 {
5830  int ret;
5831  RelFileLocator rlocatora;
5832  RelFileLocator rlocatorb;
5833 
5834  rlocatora = BufTagGetRelFileLocator(ba);
5835  rlocatorb = BufTagGetRelFileLocator(bb);
5836 
5837  ret = rlocator_comparator(&rlocatora, &rlocatorb);
5838 
5839  if (ret != 0)
5840  return ret;
5841 
5842  if (BufTagGetForkNum(ba) < BufTagGetForkNum(bb))
5843  return -1;
5844  if (BufTagGetForkNum(ba) > BufTagGetForkNum(bb))
5845  return 1;
5846 
5847  if (ba->blockNum < bb->blockNum)
5848  return -1;
5849  if (ba->blockNum > bb->blockNum)
5850  return 1;
5851 
5852  return 0;
5853 }
static int rlocator_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:5747

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), and rlocator_comparator().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 5218 of file bufmgr.c.

5219 {
5220  if (BufferIsLocal(buffer))
5221  {
5222  if (LocalRefCount[-buffer - 1] != 1)
5223  elog(ERROR, "incorrect local pin count: %d",
5224  LocalRefCount[-buffer - 1]);
5225  }
5226  else
5227  {
5228  if (GetPrivateRefCount(buffer) != 1)
5229  elog(ERROR, "incorrect local pin count: %d",
5230  GetPrivateRefCount(buffer));
5231  }
5232 }
#define ERROR
Definition: elog.h:39

References BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), and LockBufferForCleanup().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 3631 of file bufmgr.c.

3632 {
3633 #ifdef USE_ASSERT_CHECKING
3634  int RefCountErrors = 0;
3636  int i;
3637  char *s;
3638 
3639  /* check the array */
3640  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
3641  {
3643 
3644  if (res->buffer != InvalidBuffer)
3645  {
3646  s = DebugPrintBufferRefcount(res->buffer);
3647  elog(WARNING, "buffer refcount leak: %s", s);
3648  pfree(s);
3649 
3650  RefCountErrors++;
3651  }
3652  }
3653 
3654  /* if necessary search the hash */
3656  {
3657  HASH_SEQ_STATUS hstat;
3658 
3660  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
3661  {
3662  s = DebugPrintBufferRefcount(res->buffer);
3663  elog(WARNING, "buffer refcount leak: %s", s);
3664  pfree(s);
3665  RefCountErrors++;
3666  }
3667  }
3668 
3669  Assert(RefCountErrors == 0);
3670 #endif
3671 }
#define InvalidBuffer
Definition: buf.h:25
char * DebugPrintBufferRefcount(Buffer buffer)
Definition: bufmgr.c:3677
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:96
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:235
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:236
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1395
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385

References Assert, DebugPrintBufferRefcount(), elog, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, pfree(), PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, and WARNING.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 3722 of file bufmgr.c.

3723 {
3724  BufferSync(flags);
3725 }
static void BufferSync(int flags)
Definition: bufmgr.c:2924

References BufferSync().

Referenced by CheckPointGuts().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 5862 of file bufmgr.c.

5863 {
5864  /* compare tablespace */
5865  if (a->tsId < b->tsId)
5866  return -1;
5867  else if (a->tsId > b->tsId)
5868  return 1;
5869  /* compare relation */
5870  if (a->relNumber < b->relNumber)
5871  return -1;
5872  else if (a->relNumber > b->relNumber)
5873  return 1;
5874  /* compare fork */
5875  else if (a->forkNum < b->forkNum)
5876  return -1;
5877  else if (a->forkNum > b->forkNum)
5878  return 1;
5879  /* compare block number */
5880  else if (a->blockNum < b->blockNum)
5881  return -1;
5882  else if (a->blockNum > b->blockNum)
5883  return 1;
5884  /* equal page IDs are unlikely, but not impossible */
5885  return 0;
5886 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69

References a, and b.

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 5197 of file bufmgr.c.

5198 {
5199  BufferDesc *buf;
5200 
5201  Assert(BufferIsPinned(buffer));
5202  if (BufferIsLocal(buffer))
5203  return true; /* act as though we got it */
5204 
5205  buf = GetBufferDescriptor(buffer - 1);
5206 
5208  LW_EXCLUSIVE);
5209 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339

References Assert, buf, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 5412 of file bufmgr.c.

5413 {
5414  BufferDesc *bufHdr;
5415  uint32 buf_state,
5416  refcount;
5417 
5418  Assert(BufferIsValid(buffer));
5419 
5420  if (BufferIsLocal(buffer))
5421  {
5422  refcount = LocalRefCount[-buffer - 1];
5423  /* There should be exactly one pin */
5424  Assert(refcount > 0);
5425  if (refcount != 1)
5426  return false;
5427  /* Nobody else to wait for */
5428  return true;
5429  }
5430 
5431  /* There should be exactly one local pin */
5432  refcount = GetPrivateRefCount(buffer);
5433  Assert(refcount);
5434  if (refcount != 1)
5435  return false;
5436 
5437  /* Try to acquire lock */
5438  if (!ConditionalLockBuffer(buffer))
5439  return false;
5440 
5441  bufHdr = GetBufferDescriptor(buffer - 1);
5442  buf_state = LockBufHdr(bufHdr);
5443  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
5444 
5445  Assert(refcount > 0);
5446  if (refcount == 1)
5447  {
5448  /* Successfully acquired exclusive lock with pincount 1 */
5449  UnlockBufHdr(bufHdr, buf_state);
5450  return true;
5451  }
5452 
5453  /* Failed, so release the lock */
5454  UnlockBufHdr(bufHdr, buf_state);
5455  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5456  return false;
5457 }
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5197
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5171
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189

References Assert, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ copy_storage_using_buffer_read_stream_next_block()

static BlockNumber copy_storage_using_buffer_read_stream_next_block ( ReadStream stream,
void *  callback_private_data,
void *  per_buffer_data 
)
static

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 4810 of file bufmgr.c.

4812 {
4813  char relpersistence;
4814  SMgrRelation src_rel;
4815  SMgrRelation dst_rel;
4816 
4817  /* Set the relpersistence. */
4818  relpersistence = permanent ?
4819  RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
4820 
4821  src_rel = smgropen(src_rlocator, INVALID_PROC_NUMBER);
4822  dst_rel = smgropen(dst_rlocator, INVALID_PROC_NUMBER);
4823 
4824  /*
4825  * Create and copy all forks of the relation. During create database we
4826  * have a separate cleanup mechanism which deletes complete database
4827  * directory. Therefore, each individual relation doesn't need to be
4828  * registered for cleanup.
4829  */
4830  RelationCreateStorage(dst_rlocator, relpersistence, false);
4831 
4832  /* copy main fork. */
4833  RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM,
4834  permanent);
4835 
4836  /* copy those extra forks that exist */
4837  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
4838  forkNum <= MAX_FORKNUM; forkNum++)
4839  {
4840  if (smgrexists(src_rel, forkNum))
4841  {
4842  smgrcreate(dst_rel, forkNum, false);
4843 
4844  /*
4845  * WAL log creation if the relation is persistent, or this is the
4846  * init fork of an unlogged relation.
4847  */
4848  if (permanent || forkNum == INIT_FORKNUM)
4849  log_smgrcreate(&dst_rlocator, forkNum);
4850 
4851  /* Copy a fork's data, block by block. */
4852  RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
4853  permanent);
4854  }
4855  }
4856 }
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition: bufmgr.c:4702
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
ForkNumber
Definition: relpath.h:48
@ MAIN_FORKNUM
Definition: relpath.h:50
#define MAX_FORKNUM
Definition: relpath.h:62
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:198
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:411
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:398
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:121
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:186

References INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char* DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 3677 of file bufmgr.c.

3678 {
3679  BufferDesc *buf;
3680  int32 loccount;
3681  char *path;
3682  char *result;
3683  ProcNumber backend;
3684  uint32 buf_state;
3685 
3686  Assert(BufferIsValid(buffer));
3687  if (BufferIsLocal(buffer))
3688  {
3689  buf = GetLocalBufferDescriptor(-buffer - 1);
3690  loccount = LocalRefCount[-buffer - 1];
3691  backend = MyProcNumber;
3692  }
3693  else
3694  {
3695  buf = GetBufferDescriptor(buffer - 1);
3696  loccount = GetPrivateRefCount(buffer);
3697  backend = INVALID_PROC_NUMBER;
3698  }
3699 
3700  /* theoretically we should lock the bufhdr here */
3701  path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend,
3702  BufTagGetForkNum(&buf->tag));
3703  buf_state = pg_atomic_read_u32(&buf->state);
3704 
3705  result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
3706  buffer, path,
3707  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
3708  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
3709  pfree(path);
3710  return result;
3711 }
#define BUF_FLAG_MASK
Definition: buf_internals.h:48
ProcNumber MyProcNumber
Definition: globals.c:88
int ProcNumber
Definition: procnumber.h:24
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:85

References Assert, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pfree(), pg_atomic_read_u32(), psprintf(), and relpathbackend.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBufferPin().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 4398 of file bufmgr.c.

4399 {
4400  int i;
4401 
4402  /*
4403  * We needn't consider local buffers, since by assumption the target
4404  * database isn't our own.
4405  */
4406 
4407  for (i = 0; i < NBuffers; i++)
4408  {
4409  BufferDesc *bufHdr = GetBufferDescriptor(i);
4410  uint32 buf_state;
4411 
4412  /*
4413  * As in DropRelationBuffers, an unlocked precheck should be safe and
4414  * saves some cycles.
4415  */
4416  if (bufHdr->tag.dbOid != dbid)
4417  continue;
4418 
4419  buf_state = LockBufHdr(bufHdr);
4420  if (bufHdr->tag.dbOid == dbid)
4421  InvalidateBuffer(bufHdr); /* releases spinlock */
4422  else
4423  UnlockBufHdr(bufHdr, buf_state);
4424  }
4425 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1795
Oid dbOid
Definition: buf_internals.h:95

References buftag::dbOid, GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, BufferDesc::tag, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 4043 of file bufmgr.c.

4045 {
4046  int i;
4047  int j;
4048  RelFileLocatorBackend rlocator;
4049  BlockNumber nForkBlock[MAX_FORKNUM];
4050  uint64 nBlocksToInvalidate = 0;
4051 
4052  rlocator = smgr_reln->smgr_rlocator;
4053 
4054  /* If it's a local relation, it's localbuf.c's problem. */
4055  if (RelFileLocatorBackendIsTemp(rlocator))
4056  {
4057  if (rlocator.backend == MyProcNumber)
4058  {
4059  for (j = 0; j < nforks; j++)
4060  DropRelationLocalBuffers(rlocator.locator, forkNum[j],
4061  firstDelBlock[j]);
4062  }
4063  return;
4064  }
4065 
4066  /*
4067  * To remove all the pages of the specified relation forks from the buffer
4068  * pool, we need to scan the entire buffer pool but we can optimize it by
4069  * finding the buffers from BufMapping table provided we know the exact
4070  * size of each fork of the relation. The exact size is required to ensure
4071  * that we don't leave any buffer for the relation being dropped as
4072  * otherwise the background writer or checkpointer can lead to a PANIC
4073  * error while flushing buffers corresponding to files that don't exist.
4074  *
4075  * To know the exact size, we rely on the size cached for each fork by us
4076  * during recovery which limits the optimization to recovery and on
4077  * standbys but we can easily extend it once we have shared cache for
4078  * relation size.
4079  *
4080  * In recovery, we cache the value returned by the first lseek(SEEK_END)
4081  * and the future writes keeps the cached value up-to-date. See
4082  * smgrextend. It is possible that the value of the first lseek is smaller
4083  * than the actual number of existing blocks in the file due to buggy
4084  * Linux kernels that might not have accounted for the recent write. But
4085  * that should be fine because there must not be any buffers after that
4086  * file size.
4087  */
4088  for (i = 0; i < nforks; i++)
4089  {
4090  /* Get the number of blocks for a relation's fork */
4091  nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
4092 
4093  if (nForkBlock[i] == InvalidBlockNumber)
4094  {
4095  nBlocksToInvalidate = InvalidBlockNumber;
4096  break;
4097  }
4098 
4099  /* calculate the number of blocks to be invalidated */
4100  nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
4101  }
4102 
4103  /*
4104  * We apply the optimization iff the total number of blocks to invalidate
4105  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4106  */
4107  if (BlockNumberIsValid(nBlocksToInvalidate) &&
4108  nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4109  {
4110  for (j = 0; j < nforks; j++)
4111  FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4112  nForkBlock[j], firstDelBlock[j]);
4113  return;
4114  }
4115 
4116  for (i = 0; i < NBuffers; i++)
4117  {
4118  BufferDesc *bufHdr = GetBufferDescriptor(i);
4119  uint32 buf_state;
4120 
4121  /*
4122  * We can make this a tad faster by prechecking the buffer tag before
4123  * we attempt to lock the buffer; this saves a lot of lock
4124  * acquisitions in typical cases. It should be safe because the
4125  * caller must have AccessExclusiveLock on the relation, or some other
4126  * reason to be certain that no one is loading new pages of the rel
4127  * into the buffer pool. (Otherwise we might well miss such pages
4128  * entirely.) Therefore, while the tag might be changing while we
4129  * look at it, it can't be changing *to* a value we care about, only
4130  * *away* from such a value. So false negatives are impossible, and
4131  * false positives are safe because we'll recheck after getting the
4132  * buffer lock.
4133  *
4134  * We could check forkNum and blockNum as well as the rlocator, but
4135  * the incremental win from doing so seems small.
4136  */
4137  if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4138  continue;
4139 
4140  buf_state = LockBufHdr(bufHdr);
4141 
4142  for (j = 0; j < nforks; j++)
4143  {
4144  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4145  BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4146  bufHdr->tag.blockNum >= firstDelBlock[j])
4147  {
4148  InvalidateBuffer(bufHdr); /* releases spinlock */
4149  break;
4150  }
4151  }
4152  if (j >= nforks)
4153  UnlockBufHdr(bufHdr, buf_state);
4154  }
4155 }
uint32 BlockNumber
Definition: block.h:31
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:87
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:4337
int j
Definition: isn.c:74
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:489
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:679

References RelFileLocatorBackend::backend, buftag::blockNum, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, SMgrRelationData::smgr_rlocator, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4166 of file bufmgr.c.

4167 {
4168  int i;
4169  int n = 0;
4170  SMgrRelation *rels;
4171  BlockNumber (*block)[MAX_FORKNUM + 1];
4172  uint64 nBlocksToInvalidate = 0;
4173  RelFileLocator *locators;
4174  bool cached = true;
4175  bool use_bsearch;
4176 
4177  if (nlocators == 0)
4178  return;
4179 
4180  rels = palloc(sizeof(SMgrRelation) * nlocators); /* non-local relations */
4181 
4182  /* If it's a local relation, it's localbuf.c's problem. */
4183  for (i = 0; i < nlocators; i++)
4184  {
4185  if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4186  {
4187  if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4188  DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4189  }
4190  else
4191  rels[n++] = smgr_reln[i];
4192  }
4193 
4194  /*
4195  * If there are no non-local relations, then we're done. Release the
4196  * memory and return.
4197  */
4198  if (n == 0)
4199  {
4200  pfree(rels);
4201  return;
4202  }
4203 
4204  /*
4205  * This is used to remember the number of blocks for all the relations
4206  * forks.
4207  */
4208  block = (BlockNumber (*)[MAX_FORKNUM + 1])
4209  palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4210 
4211  /*
4212  * We can avoid scanning the entire buffer pool if we know the exact size
4213  * of each of the given relation forks. See DropRelationBuffers.
4214  */
4215  for (i = 0; i < n && cached; i++)
4216  {
4217  for (int j = 0; j <= MAX_FORKNUM; j++)
4218  {
4219  /* Get the number of blocks for a relation's fork. */
4220  block[i][j] = smgrnblocks_cached(rels[i], j);
4221 
4222  /* We need to only consider the relation forks that exists. */
4223  if (block[i][j] == InvalidBlockNumber)
4224  {
4225  if (!smgrexists(rels[i], j))
4226  continue;
4227  cached = false;
4228  break;
4229  }
4230 
4231  /* calculate the total number of blocks to be invalidated */
4232  nBlocksToInvalidate += block[i][j];
4233  }
4234  }
4235 
4236  /*
4237  * We apply the optimization iff the total number of blocks to invalidate
4238  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4239  */
4240  if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4241  {
4242  for (i = 0; i < n; i++)
4243  {
4244  for (int j = 0; j <= MAX_FORKNUM; j++)
4245  {
4246  /* ignore relation forks that doesn't exist */
4247  if (!BlockNumberIsValid(block[i][j]))
4248  continue;
4249 
4250  /* drop all the buffers for a particular relation fork */
4251  FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4252  j, block[i][j], 0);
4253  }
4254  }
4255 
4256  pfree(block);
4257  pfree(rels);
4258  return;
4259  }
4260 
4261  pfree(block);
4262  locators = palloc(sizeof(RelFileLocator) * n); /* non-local relations */
4263  for (i = 0; i < n; i++)
4264  locators[i] = rels[i]->smgr_rlocator.locator;
4265 
4266  /*
4267  * For low number of relations to drop just use a simple walk through, to
4268  * save the bsearch overhead. The threshold to use is rather a guess than
4269  * an exactly determined value, as it depends on many factors (CPU and RAM
4270  * speeds, amount of shared buffers etc.).
4271  */
4272  use_bsearch = n > RELS_BSEARCH_THRESHOLD;
4273 
4274  /* sort the list of rlocators if necessary */
4275  if (use_bsearch)
4276  qsort(locators, n, sizeof(RelFileLocator), rlocator_comparator);
4277 
4278  for (i = 0; i < NBuffers; i++)
4279  {
4280  RelFileLocator *rlocator = NULL;
4281  BufferDesc *bufHdr = GetBufferDescriptor(i);
4282  uint32 buf_state;
4283 
4284  /*
4285  * As in DropRelationBuffers, an unlocked precheck should be safe and
4286  * saves some cycles.
4287  */
4288 
4289  if (!use_bsearch)
4290  {
4291  int j;
4292 
4293  for (j = 0; j < n; j++)
4294  {
4295  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
4296  {
4297  rlocator = &locators[j];
4298  break;
4299  }
4300  }
4301  }
4302  else
4303  {
4304  RelFileLocator locator;
4305 
4306  locator = BufTagGetRelFileLocator(&bufHdr->tag);
4307  rlocator = bsearch((const void *) &(locator),
4308  locators, n, sizeof(RelFileLocator),
4310  }
4311 
4312  /* buffer doesn't belong to any of the given relfilelocators; skip it */
4313  if (rlocator == NULL)
4314  continue;
4315 
4316  buf_state = LockBufHdr(bufHdr);
4317  if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4318  InvalidateBuffer(bufHdr); /* releases spinlock */
4319  else
4320  UnlockBufHdr(bufHdr, buf_state);
4321  }
4322 
4323  pfree(locators);
4324  pfree(rels);
4325 }
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:79
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition: localbuf.c:537
#define qsort(a, b, c, d)
Definition: port.h:453

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, if(), InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf)

Definition at line 6109 of file bufmgr.c.

6110 {
6111  BufferDesc *desc;
6112  uint32 buf_state;
6113  bool result;
6114 
6115  /* Make sure we can pin the buffer. */
6118 
6120  desc = GetBufferDescriptor(buf - 1);
6121 
6122  /* Lock the header and check if it's valid. */
6123  buf_state = LockBufHdr(desc);
6124  if ((buf_state & BM_VALID) == 0)
6125  {
6126  UnlockBufHdr(desc, buf_state);
6127  return false;
6128  }
6129 
6130  /* Check that it's not pinned already. */
6131  if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
6132  {
6133  UnlockBufHdr(desc, buf_state);
6134  return false;
6135  }
6136 
6137  PinBuffer_Locked(desc); /* releases spinlock */
6138 
6139  /* If it was dirty, try to clean it once. */
6140  if (buf_state & BM_DIRTY)
6141  {
6145  }
6146 
6147  /* This will return false if it becomes dirty or someone else pins it. */
6148  result = InvalidateVictimBuffer(desc);
6149 
6150  UnpinBuffer(desc);
6151 
6152  return result;
6153 }
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:3796
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:2775
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
Definition: bufmgr.c:1893
@ IOOBJECT_RELATION
Definition: pgstat.h:280

References Assert, BM_DIRTY, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock(), BufferIsLocal, CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), InvalidateVictimBuffer(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), and UnpinBuffer().

Referenced by pg_buffercache_evict().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 873 of file bufmgr.c.

877 {
878  Buffer buf;
879  uint32 extend_by = 1;
880 
881  ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
882  &buf, &extend_by);
883 
884  return buf;
885 }
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:905

References buf, and ExtendBufferedRelBy().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 905 of file bufmgr.c.

912 {
913  Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
914  Assert(bmr.smgr == NULL || bmr.relpersistence != 0);
915  Assert(extend_by > 0);
916 
917  if (bmr.smgr == NULL)
918  {
919  bmr.smgr = RelationGetSmgr(bmr.rel);
920  bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
921  }
922 
923  return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
924  extend_by, InvalidBlockNumber,
925  buffers, extended_by);
926 }
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2158
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
struct SMgrRelationData * smgr
Definition: bufmgr.h:103
Form_pg_class rd_rel
Definition: rel.h:111

References Assert, ExtendBufferedRelCommon(), InvalidBlockNumber, RelationData::rd_rel, BufferManagerRelation::rel, RelationGetSmgr(), BufferManagerRelation::relpersistence, and BufferManagerRelation::smgr.

Referenced by ExtendBufferedRel(), and RelationAddBlocks().

◆ ExtendBufferedRelCommon()

static BlockNumber ExtendBufferedRelCommon ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2158 of file bufmgr.c.

2166 {
2167  BlockNumber first_block;
2168 
2169  TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
2173  bmr.smgr->smgr_rlocator.backend,
2174  extend_by);
2175 
2176  if (bmr.relpersistence == RELPERSISTENCE_TEMP)
2177  first_block = ExtendBufferedRelLocal(bmr, fork, flags,
2178  extend_by, extend_upto,
2179  buffers, &extend_by);
2180  else
2181  first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags,
2182  extend_by, extend_upto,
2183  buffers, &extend_by);
2184  *extended_by = extend_by;
2185 
2186  TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
2190  bmr.smgr->smgr_rlocator.backend,
2191  *extended_by,
2192  first_block);
2193 
2194  return first_block;
2195 }
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2202
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: localbuf.c:313
RelFileNumber relNumber

References RelFileLocatorBackend::backend, RelFileLocator::dbOid, ExtendBufferedRelLocal(), ExtendBufferedRelShared(), RelFileLocatorBackend::locator, RelFileLocator::relNumber, BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, SMgrRelationData::smgr_rlocator, and RelFileLocator::spcOid.

Referenced by ExtendBufferedRelBy(), and ExtendBufferedRelTo().

◆ ExtendBufferedRelShared()

static BlockNumber ExtendBufferedRelShared ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2202 of file bufmgr.c.

2210 {
2211  BlockNumber first_block;
2212  IOContext io_context = IOContextForStrategy(strategy);
2213  instr_time io_start;
2214 
2215  LimitAdditionalPins(&extend_by);
2216 
2217  /*
2218  * Acquire victim buffers for extension without holding extension lock.
2219  * Writing out victim buffers is the most expensive part of extending the
2220  * relation, particularly when doing so requires WAL flushes. Zeroing out
2221  * the buffers is also quite expensive, so do that before holding the
2222  * extension lock as well.
2223  *
2224  * These pages are pinned by us and not valid. While we hold the pin they
2225  * can't be acquired as victim buffers by another backend.
2226  */
2227  for (uint32 i = 0; i < extend_by; i++)
2228  {
2229  Block buf_block;
2230 
2231  buffers[i] = GetVictimBuffer(strategy, io_context);
2232  buf_block = BufHdrGetBlock(GetBufferDescriptor(buffers[i] - 1));
2233 
2234  /* new buffers are zero-filled */
2235  MemSet((char *) buf_block, 0, BLCKSZ);
2236  }
2237 
2238  /*
2239  * Lock relation against concurrent extensions, unless requested not to.
2240  *
2241  * We use the same extension lock for all forks. That's unnecessarily
2242  * restrictive, but currently extensions for forks don't happen often
2243  * enough to make it worth locking more granularly.
2244  *
2245  * Note that another backend might have extended the relation by the time
2246  * we get the lock.
2247  */
2248  if (!(flags & EB_SKIP_EXTENSION_LOCK))
2250 
2251  /*
2252  * If requested, invalidate size cache, so that smgrnblocks asks the
2253  * kernel.
2254  */
2255  if (flags & EB_CLEAR_SIZE_CACHE)
2257 
2258  first_block = smgrnblocks(bmr.smgr, fork);
2259 
2260  /*
2261  * Now that we have the accurate relation size, check if the caller wants
2262  * us to extend to only up to a specific size. If there were concurrent
2263  * extensions, we might have acquired too many buffers and need to release
2264  * them.
2265  */
2266  if (extend_upto != InvalidBlockNumber)
2267  {
2268  uint32 orig_extend_by = extend_by;
2269 
2270  if (first_block > extend_upto)
2271  extend_by = 0;
2272  else if ((uint64) first_block + extend_by > extend_upto)
2273  extend_by = extend_upto - first_block;
2274 
2275  for (uint32 i = extend_by; i < orig_extend_by; i++)
2276  {
2277  BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
2278 
2279  /*
2280  * The victim buffer we acquired previously is clean and unused,
2281  * let it be found again quickly
2282  */
2283  StrategyFreeBuffer(buf_hdr);
2284  UnpinBuffer(buf_hdr);
2285  }
2286 
2287  if (extend_by == 0)
2288  {
2289  if (!(flags & EB_SKIP_EXTENSION_LOCK))
2291  *extended_by = extend_by;
2292  return first_block;
2293  }
2294  }
2295 
2296  /* Fail if relation is already at maximum possible length */
2297  if ((uint64) first_block + extend_by >= MaxBlockNumber)
2298  ereport(ERROR,
2299  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2300  errmsg("cannot extend relation %s beyond %u blocks",
2301  relpath(bmr.smgr->smgr_rlocator, fork),
2302  MaxBlockNumber)));
2303 
2304  /*
2305  * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
2306  *
2307  * This needs to happen before we extend the relation, because as soon as
2308  * we do, other backends can start to read in those pages.
2309  */
2310  for (uint32 i = 0; i < extend_by; i++)
2311  {
2312  Buffer victim_buf = buffers[i];
2313  BufferDesc *victim_buf_hdr = GetBufferDescriptor(victim_buf - 1);
2314  BufferTag tag;
2315  uint32 hash;
2316  LWLock *partition_lock;
2317  int existing_id;
2318 
2319  /* in case we need to pin an existing buffer below */
2322 
2323  InitBufferTag(&tag, &bmr.smgr->smgr_rlocator.locator, fork, first_block + i);
2324  hash = BufTableHashCode(&tag);
2325  partition_lock = BufMappingPartitionLock(hash);
2326 
2327  LWLockAcquire(partition_lock, LW_EXCLUSIVE);
2328 
2329  existing_id = BufTableInsert(&tag, hash, victim_buf_hdr->buf_id);
2330 
2331  /*
2332  * We get here only in the corner case where we are trying to extend
2333  * the relation but we found a pre-existing buffer. This can happen
2334  * because a prior attempt at extending the relation failed, and
2335  * because mdread doesn't complain about reads beyond EOF (when
2336  * zero_damaged_pages is ON) and so a previous attempt to read a block
2337  * beyond EOF could have left a "valid" zero-filled buffer.
2338  * Unfortunately, we have also seen this case occurring because of
2339  * buggy Linux kernels that sometimes return an lseek(SEEK_END) result
2340  * that doesn't account for a recent write. In that situation, the
2341  * pre-existing buffer would contain valid data that we don't want to
2342  * overwrite. Since the legitimate cases should always have left a
2343  * zero-filled buffer, complain if not PageIsNew.
2344  */
2345  if (existing_id >= 0)
2346  {
2347  BufferDesc *existing_hdr = GetBufferDescriptor(existing_id);
2348  Block buf_block;
2349  bool valid;
2350 
2351  /*
2352  * Pin the existing buffer before releasing the partition lock,
2353  * preventing it from being evicted.
2354  */
2355  valid = PinBuffer(existing_hdr, strategy);
2356 
2357  LWLockRelease(partition_lock);
2358 
2359  /*
2360  * The victim buffer we acquired previously is clean and unused,
2361  * let it be found again quickly
2362  */
2363  StrategyFreeBuffer(victim_buf_hdr);
2364  UnpinBuffer(victim_buf_hdr);
2365 
2366  buffers[i] = BufferDescriptorGetBuffer(existing_hdr);
2367  buf_block = BufHdrGetBlock(existing_hdr);
2368 
2369  if (valid && !PageIsNew((Page) buf_block))
2370  ereport(ERROR,
2371  (errmsg("unexpected data beyond EOF in block %u of relation %s",
2372  existing_hdr->tag.blockNum, relpath(bmr.smgr->smgr_rlocator, fork)),
2373  errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
2374 
2375  /*
2376  * We *must* do smgr[zero]extend before succeeding, else the page
2377  * will not be reserved by the kernel, and the next P_NEW call
2378  * will decide to return the same page. Clear the BM_VALID bit,
2379  * do StartBufferIO() and proceed.
2380  *
2381  * Loop to handle the very small possibility that someone re-sets
2382  * BM_VALID between our clearing it and StartBufferIO inspecting
2383  * it.
2384  */
2385  do
2386  {
2387  uint32 buf_state = LockBufHdr(existing_hdr);
2388 
2389  buf_state &= ~BM_VALID;
2390  UnlockBufHdr(existing_hdr, buf_state);
2391  } while (!StartBufferIO(existing_hdr, true, false));
2392  }
2393  else
2394  {
2395  uint32 buf_state;
2396 
2397  buf_state = LockBufHdr(victim_buf_hdr);
2398 
2399  /* some sanity checks while we hold the buffer header lock */
2400  Assert(!(buf_state & (BM_VALID | BM_TAG_VALID | BM_DIRTY | BM_JUST_DIRTIED)));
2401  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2402 
2403  victim_buf_hdr->tag = tag;
2404 
2405  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
2406  if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
2407  buf_state |= BM_PERMANENT;
2408 
2409  UnlockBufHdr(victim_buf_hdr, buf_state);
2410 
2411  LWLockRelease(partition_lock);
2412 
2413  /* XXX: could combine the locked operations in it with the above */
2414  StartBufferIO(victim_buf_hdr, true, false);
2415  }
2416  }
2417 
2419 
2420  /*
2421  * Note: if smgrzeroextend fails, we will end up with buffers that are
2422  * allocated but not marked BM_VALID. The next relation extension will
2423  * still select the same block number (because the relation didn't get any
2424  * longer on disk) and so future attempts to extend the relation will find
2425  * the same buffers (if they have not been recycled) but come right back
2426  * here to try smgrzeroextend again.
2427  *
2428  * We don't need to set checksum for all-zero pages.
2429  */
2430  smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);
2431 
2432  /*
2433  * Release the file-extension lock; it's now OK for someone else to extend
2434  * the relation some more.
2435  *
2436  * We remove IO_IN_PROGRESS after this, as waking up waiting backends can
2437  * take noticeable time.
2438  */
2439  if (!(flags & EB_SKIP_EXTENSION_LOCK))
2441 
2443  io_start, extend_by);
2444 
2445  /* Set BM_VALID, terminate IO, and wake up any waiters */
2446  for (uint32 i = 0; i < extend_by; i++)
2447  {
2448  Buffer buf = buffers[i];
2449  BufferDesc *buf_hdr = GetBufferDescriptor(buf - 1);
2450  bool lock = false;
2451 
2452  if (flags & EB_LOCK_FIRST && i == 0)
2453  lock = true;
2454  else if (flags & EB_LOCK_TARGET)
2455  {
2456  Assert(extend_upto != InvalidBlockNumber);
2457  if (first_block + i + 1 == extend_upto)
2458  lock = true;
2459  }
2460 
2461  if (lock)
2463 
2464  TerminateBufferIO(buf_hdr, false, BM_VALID, true);
2465  }
2466 
2467  pgBufferUsage.shared_blks_written += extend_by;
2468 
2469  *extended_by = extend_by;
2470 
2471  return first_block;
2472 }
#define MaxBlockNumber
Definition: block.h:35
#define BM_JUST_DIRTIED
Definition: buf_internals.h:66
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
bool track_io_timing
Definition: bufmgr.c:170
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:68
void LimitAdditionalPins(uint32 *additional_pins)
Definition: bufmgr.c:2127
static bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
Definition: bufmgr.c:5571
void * Block
Definition: bufmgr.h:25
@ EB_LOCK_TARGET
Definition: bufmgr.h:92
@ EB_CLEAR_SIZE_CACHE
Definition: bufmgr.h:89
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:74
@ EB_LOCK_FIRST
Definition: bufmgr.h:86
Pointer Page
Definition: bufpage.h:81
static bool PageIsNew(Page page)
Definition: bufpage.h:233
#define MemSet(start, val, len)
Definition: c.h:1020
int errhint(const char *fmt,...)
Definition: elog.c:1317
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:758
BufferUsage pgBufferUsage
Definition: instrument.c:20
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:420
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:470
#define ExclusiveLock
Definition: lockdefs.h:42
IOContext
Definition: pgstat.h:287
@ IOOP_EXTEND
Definition: pgstat.h:299
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:100
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:122
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
#define relpath(rlocator, forknum)
Definition: relpath.h:94
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:655
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:560
int64 shared_blks_written
Definition: instrument.h:29
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46

References Assert, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufHdrGetBlock, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), CurrentResourceOwner, EB_CLEAR_SIZE_CACHE, EB_LOCK_FIRST, EB_LOCK_TARGET, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), errhint(), errmsg(), ERROR, ExclusiveLock, GetBufferDescriptor(), GetVictimBuffer(), hash(), i, INIT_FORKNUM, InitBufferTag(), InvalidBlockNumber, IOContextForStrategy(), IOOBJECT_RELATION, IOOP_EXTEND, LimitAdditionalPins(), RelFileLocatorBackend::locator, LockBufHdr(), LockRelationForExtension(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MaxBlockNumber, MemSet, PageIsNew(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer(), BufferManagerRelation::rel, relpath, BufferManagerRelation::relpersistence, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_written, BufferManagerRelation::smgr, SMgrRelationData::smgr_cached_nblocks, SMgrRelationData::smgr_rlocator, smgrnblocks(), smgrzeroextend(), StartBufferIO(), StrategyFreeBuffer(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr(), UnlockRelationForExtension(), and UnpinBuffer().

Referenced by ExtendBufferedRelCommon().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 937 of file bufmgr.c.

943 {
945  uint32 extended_by = 0;
946  Buffer buffer = InvalidBuffer;
947  Buffer buffers[64];
948 
949  Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
950  Assert(bmr.smgr == NULL || bmr.relpersistence != 0);
951  Assert(extend_to != InvalidBlockNumber && extend_to > 0);
952 
953  if (bmr.smgr == NULL)
954  {
955  bmr.smgr = RelationGetSmgr(bmr.rel);
956  bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
957  }
958 
959  /*
960  * If desired, create the file if it doesn't exist. If
961  * smgr_cached_nblocks[fork] is positive then it must exist, no need for
962  * an smgrexists call.
963  */
964  if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
965  (bmr.smgr->smgr_cached_nblocks[fork] == 0 ||
967  !smgrexists(bmr.smgr, fork))
968  {
970 
971  /* recheck, fork might have been created concurrently */
972  if (!smgrexists(bmr.smgr, fork))
973  smgrcreate(bmr.smgr, fork, flags & EB_PERFORMING_RECOVERY);
974 
976  }
977 
978  /*
979  * If requested, invalidate size cache, so that smgrnblocks asks the
980  * kernel.
981  */
982  if (flags & EB_CLEAR_SIZE_CACHE)
984 
985  /*
986  * Estimate how many pages we'll need to extend by. This avoids acquiring
987  * unnecessarily many victim buffers.
988  */
989  current_size = smgrnblocks(bmr.smgr, fork);
990 
991  /*
992  * Since no-one else can be looking at the page contents yet, there is no
993  * difference between an exclusive lock and a cleanup-strength lock. Note
994  * that we pass the original mode to ReadBuffer_common() below, when
995  * falling back to reading the buffer to a concurrent relation extension.
996  */
998  flags |= EB_LOCK_TARGET;
999 
1000  while (current_size < extend_to)
1001  {
1002  uint32 num_pages = lengthof(buffers);
1003  BlockNumber first_block;
1004 
1005  if ((uint64) current_size + num_pages > extend_to)
1006  num_pages = extend_to - current_size;
1007 
1008  first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1009  num_pages, extend_to,
1010  buffers, &extended_by);
1011 
1012  current_size = first_block + extended_by;
1013  Assert(num_pages != 0 || current_size >= extend_to);
1014 
1015  for (uint32 i = 0; i < extended_by; i++)
1016  {
1017  if (first_block + i != extend_to - 1)
1018  ReleaseBuffer(buffers[i]);
1019  else
1020  buffer = buffers[i];
1021  }
1022  }
1023 
1024  /*
1025  * It's possible that another backend concurrently extended the relation.
1026  * In that case read the buffer.
1027  *
1028  * XXX: Should we control this via a flag?
1029  */
1030  if (buffer == InvalidBuffer)
1031  {
1032  Assert(extended_by == 0);
1033  buffer = ReadBuffer_common(bmr.rel, bmr.smgr, bmr.relpersistence,
1034  fork, extend_to - 1, mode, strategy);
1035  }
1036 
1037  return buffer;
1038 }
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:1217
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4936
@ EB_PERFORMING_RECOVERY
Definition: bufmgr.h:77
@ EB_CREATE_FORK_IF_NEEDED
Definition: bufmgr.h:83
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:48
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:46
#define lengthof(array)
Definition: c.h:788
static PgChecksumMode mode
Definition: pg_checksums.c:56
static int64 current_size
Definition: pg_checksums.c:64

References Assert, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RelationData::rd_rel, ReadBuffer_common(), BufferManagerRelation::rel, RelationGetSmgr(), ReleaseBuffer(), BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, SMgrRelationData::smgr_cached_nblocks, smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 4337 of file bufmgr.c.

4340 {
4341  BlockNumber curBlock;
4342 
4343  for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4344  {
4345  uint32 bufHash; /* hash value for tag */
4346  BufferTag bufTag; /* identity of requested block */
4347  LWLock *bufPartitionLock; /* buffer partition lock for it */
4348  int buf_id;
4349  BufferDesc *bufHdr;
4350  uint32 buf_state;
4351 
4352  /* create a tag so we can lookup the buffer */
4353  InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4354 
4355  /* determine its hash code and partition lock ID */
4356  bufHash = BufTableHashCode(&bufTag);
4357  bufPartitionLock = BufMappingPartitionLock(bufHash);
4358 
4359  /* Check that it is in the buffer pool. If not, do nothing. */
4360  LWLockAcquire(bufPartitionLock, LW_SHARED);
4361  buf_id = BufTableLookup(&bufTag, bufHash);
4362  LWLockRelease(bufPartitionLock);
4363 
4364  if (buf_id < 0)
4365  continue;
4366 
4367  bufHdr = GetBufferDescriptor(buf_id);
4368 
4369  /*
4370  * We need to lock the buffer header and recheck if the buffer is
4371  * still associated with the same block because the buffer could be
4372  * evicted by some other backend loading blocks for a different
4373  * relation after we release lock on the BufMapping table.
4374  */
4375  buf_state = LockBufHdr(bufHdr);
4376 
4377  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
4378  BufTagGetForkNum(&bufHdr->tag) == forkNum &&
4379  bufHdr->tag.blockNum >= firstDelBlock)
4380  InvalidateBuffer(bufHdr); /* releases spinlock */
4381  else
4382  UnlockBufHdr(bufHdr, buf_state);
4383  }
4384 }

References buftag::blockNum, BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), BufferDesc::tag, and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 3796 of file bufmgr.c.

3798 {
3799  XLogRecPtr recptr;
3800  ErrorContextCallback errcallback;
3801  instr_time io_start;
3802  Block bufBlock;
3803  char *bufToWrite;
3804  uint32 buf_state;
3805 
3806  /*
3807  * Try to start an I/O operation. If StartBufferIO returns false, then
3808  * someone else flushed the buffer before we could, so we need not do
3809  * anything.
3810  */
3811  if (!StartBufferIO(buf, false, false))
3812  return;
3813 
3814  /* Setup error traceback support for ereport() */
3816  errcallback.arg = (void *) buf;
3817  errcallback.previous = error_context_stack;
3818  error_context_stack = &errcallback;
3819 
3820  /* Find smgr relation for buffer */
3821  if (reln == NULL)
3823 
3824  TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag),
3825  buf->tag.blockNum,
3827  reln->smgr_rlocator.locator.dbOid,
3829 
3830  buf_state = LockBufHdr(buf);
3831 
3832  /*
3833  * Run PageGetLSN while holding header lock, since we don't have the
3834  * buffer locked exclusively in all cases.
3835  */
3836  recptr = BufferGetLSN(buf);
3837 
3838  /* To check if block content changes while flushing. - vadim 01/17/97 */
3839  buf_state &= ~BM_JUST_DIRTIED;
3840  UnlockBufHdr(buf, buf_state);
3841 
3842  /*
3843  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
3844  * rule that log updates must hit disk before any of the data-file changes
3845  * they describe do.
3846  *
3847  * However, this rule does not apply to unlogged relations, which will be
3848  * lost after a crash anyway. Most unlogged relation pages do not bear
3849  * LSNs since we never emit WAL records for them, and therefore flushing
3850  * up through the buffer LSN would be useless, but harmless. However,
3851  * GiST indexes use LSNs internally to track page-splits, and therefore
3852  * unlogged GiST pages bear "fake" LSNs generated by
3853  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
3854  * LSN counter could advance past the WAL insertion point; and if it did
3855  * happen, attempting to flush WAL through that location would fail, with
3856  * disastrous system-wide consequences. To make sure that can't happen,
3857  * skip the flush if the buffer isn't permanent.
3858  */
3859  if (buf_state & BM_PERMANENT)
3860  XLogFlush(recptr);
3861 
3862  /*
3863  * Now it's safe to write buffer to disk. Note that no one else should
3864  * have been able to write it while we were busy with log flushing because
3865  * only one process at a time can set the BM_IO_IN_PROGRESS bit.
3866  */
3867  bufBlock = BufHdrGetBlock(buf);
3868 
3869  /*
3870  * Update page checksum if desired. Since we have only shared lock on the
3871  * buffer, other processes might be updating hint bits in it, so we must
3872  * copy the page to private storage if we do checksumming.
3873  */
3874  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
3875 
3877 
3878  /*
3879  * bufToWrite is either the shared buffer or a copy, as appropriate.
3880  */
3881  smgrwrite(reln,
3882  BufTagGetForkNum(&buf->tag),
3883  buf->tag.blockNum,
3884  bufToWrite,
3885  false);
3886 
3887  /*
3888  * When a strategy is in use, only flushes of dirty buffers already in the
3889  * strategy ring are counted as strategy writes (IOCONTEXT
3890  * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
3891  * statistics tracking.
3892  *
3893  * If a shared buffer initially added to the ring must be flushed before
3894  * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
3895  *
3896  * If a shared buffer which was added to the ring later because the
3897  * current strategy buffer is pinned or in use or because all strategy
3898  * buffers were dirty and rejected (for BAS_BULKREAD operations only)
3899  * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
3900  * (from_ring will be false).
3901  *
3902  * When a strategy is not in use, the write can only be a "regular" write
3903  * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
3904  */
3906  IOOP_WRITE, io_start, 1);
3907 
3909 
3910  /*
3911  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
3912  * end the BM_IO_IN_PROGRESS state.
3913  */
3914  TerminateBufferIO(buf, true, 0, true);
3915 
3916  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(BufTagGetForkNum(&buf->tag),
3917  buf->tag.blockNum,
3919  reln->smgr_rlocator.locator.dbOid,
3921 
3922  /* Pop the error context stack */
3923  error_context_stack = errcallback.previous;
3924 }
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:69
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:5707
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1510
ErrorContextCallback * error_context_stack
Definition: elog.c:94
@ IOOP_WRITE
Definition: pgstat.h:304
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.h:121
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2794

References ErrorContextCallback::arg, BM_JUST_DIRTIED, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, RelFileLocator::dbOid, error_context_stack, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITE, RelFileLocatorBackend::locator, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), ErrorContextCallback::previous, RelFileLocator::relNumber, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rlocator, smgropen(), smgrwrite(), RelFileLocator::spcOid, StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdr(), and XLogFlush().

Referenced by EvictUnpinnedBuffer(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), and SyncOneBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 4874 of file bufmgr.c.

4875 {
4876  int i;
4877  BufferDesc *bufHdr;
4878 
4879  for (i = 0; i < NBuffers; i++)
4880  {
4881  uint32 buf_state;
4882 
4883  bufHdr = GetBufferDescriptor(i);
4884 
4885  /*
4886  * As in DropRelationBuffers, an unlocked precheck should be safe and
4887  * saves some cycles.
4888  */
4889  if (bufHdr->tag.dbOid != dbid)
4890  continue;
4891 
4892  /* Make sure we can handle the pin */
4895 
4896  buf_state = LockBufHdr(bufHdr);
4897  if (bufHdr->tag.dbOid == dbid &&
4898  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4899  {
4900  PinBuffer_Locked(bufHdr);
4904  UnpinBuffer(bufHdr);
4905  }
4906  else
4907  UnlockBufHdr(bufHdr, buf_state);
4908  }
4909 }

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), CurrentResourceOwner, buftag::dbOid, FlushBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 4916 of file bufmgr.c.

4917 {
4918  BufferDesc *bufHdr;
4919 
4920  /* currently not needed, but no fundamental reason not to support */
4921  Assert(!BufferIsLocal(buffer));
4922 
4923  Assert(BufferIsPinned(buffer));
4924 
4925  bufHdr = GetBufferDescriptor(buffer - 1);
4926 
4928 
4930 }
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1893

References Assert, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 4504 of file bufmgr.c.

4505 {
4506  int i;
4507  BufferDesc *bufHdr;
4508  SMgrRelation srel = RelationGetSmgr(rel);
4509 
4510  if (RelationUsesLocalBuffers(rel))
4511  {
4512  for (i = 0; i < NLocBuffer; i++)
4513  {
4514  uint32 buf_state;
4515  instr_time io_start;
4516 
4517  bufHdr = GetLocalBufferDescriptor(i);
4518  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4519  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
4520  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4521  {
4522  ErrorContextCallback errcallback;
4523  Page localpage;
4524 
4525  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
4526 
4527  /* Setup error traceback support for ereport() */
4529  errcallback.arg = (void *) bufHdr;
4530  errcallback.previous = error_context_stack;
4531  error_context_stack = &errcallback;
4532 
4533  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
4534 
4536 
4537  smgrwrite(srel,
4538  BufTagGetForkNum(&bufHdr->tag),
4539  bufHdr->tag.blockNum,
4540  localpage,
4541  false);
4542 
4545  io_start, 1);
4546 
4547  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
4548  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
4549 
4551 
4552  /* Pop the error context stack */
4553  error_context_stack = errcallback.previous;
4554  }
4555  }
4556 
4557  return;
4558  }
4559 
4560  for (i = 0; i < NBuffers; i++)
4561  {
4562  uint32 buf_state;
4563 
4564  bufHdr = GetBufferDescriptor(i);
4565 
4566  /*
4567  * As in DropRelationBuffers, an unlocked precheck should be safe and
4568  * saves some cycles.
4569  */
4570  if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator))
4571  continue;
4572 
4573  /* Make sure we can handle the pin */
4576 
4577  buf_state = LockBufHdr(bufHdr);
4578  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4579  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4580  {
4581  PinBuffer_Locked(bufHdr);
4585  UnpinBuffer(bufHdr);
4586  }
4587  else
4588  UnlockBufHdr(bufHdr, buf_state);
4589  }
4590 }
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:288
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:72
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:5727
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1542
int NLocBuffer
Definition: localbuf.c:42
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:281
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:637
int64 local_blks_written
Definition: instrument.h:33
RelFileLocator rd_locator
Definition: rel.h:57

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_WRITE, BufferUsage::local_blks_written, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), smgrwrite(), BufferDesc::state, BufferDesc::tag, track_io_timing, UnlockBufHdr(), and UnpinBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 4602 of file bufmgr.c.

4603 {
4604  int i;
4605  SMgrSortArray *srels;
4606  bool use_bsearch;
4607 
4608  if (nrels == 0)
4609  return;
4610 
4611  /* fill-in array for qsort */
4612  srels = palloc(sizeof(SMgrSortArray) * nrels);
4613 
4614  for (i = 0; i < nrels; i++)
4615  {
4616  Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
4617 
4618  srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
4619  srels[i].srel = smgrs[i];
4620  }
4621 
4622  /*
4623  * Save the bsearch overhead for low number of relations to sync. See
4624  * DropRelationsAllBuffers for details.
4625  */
4626  use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
4627 
4628  /* sort the list of SMgrRelations if necessary */
4629  if (use_bsearch)
4630  qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
4631 
4632  for (i = 0; i < NBuffers; i++)
4633  {
4634  SMgrSortArray *srelent = NULL;
4635  BufferDesc *bufHdr = GetBufferDescriptor(i);
4636  uint32 buf_state;
4637 
4638  /*
4639  * As in DropRelationBuffers, an unlocked precheck should be safe and
4640  * saves some cycles.
4641  */
4642 
4643  if (!use_bsearch)
4644  {
4645  int j;
4646 
4647  for (j = 0; j < nrels; j++)
4648  {
4649  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
4650  {
4651  srelent = &srels[j];
4652  break;
4653  }
4654  }
4655  }
4656  else
4657  {
4658  RelFileLocator rlocator;
4659 
4660  rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4661  srelent = bsearch((const void *) &(rlocator),
4662  srels, nrels, sizeof(SMgrSortArray),
4664  }
4665 
4666  /* buffer doesn't belong to any of the given relfilelocators; skip it */
4667  if (srelent == NULL)
4668  continue;
4669 
4670  /* Make sure we can handle the pin */
4673 
4674  buf_state = LockBufHdr(bufHdr);
4675  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
4676  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4677  {
4678  PinBuffer_Locked(bufHdr);
4680  FlushBuffer(bufHdr, srelent->srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
4682  UnpinBuffer(bufHdr);
4683  }
4684  else
4685  UnlockBufHdr(bufHdr, buf_state);
4686  }
4687 
4688  pfree(srels);
4689 }
SMgrRelation srel
Definition: bufmgr.c:136
RelFileLocator rlocator
Definition: bufmgr.c:135

References Assert, BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, RelFileLocatorBackend::locator, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, palloc(), pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrSortArray::rlocator, rlocator_comparator(), SMgrRelationData::smgr_rlocator, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 466 of file bufmgr.c.

467 {
468  Assert(ref->refcount == 0);
469 
470  if (ref >= &PrivateRefCountArray[0] &&
472  {
473  ref->buffer = InvalidBuffer;
474 
475  /*
476  * Mark the just used entry as reserved - in many scenarios that
477  * allows us to avoid ever having to search the array/hash for free
478  * entries.
479  */
480  ReservedRefCountEntry = ref;
481  }
482  else
483  {
484  bool found;
485  Buffer buffer = ref->buffer;
486 
487  hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found);
488  Assert(found);
491  }
492 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:239
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_REMOVE
Definition: hsearch.h:115

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountEntry.

Referenced by UnpinBufferNoOwner().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 443 of file bufmgr.c.

444 {
446 
447  Assert(BufferIsValid(buffer));
448  Assert(!BufferIsLocal(buffer));
449 
450  /*
451  * Not moving the entry - that's ok for the current users, but we might
452  * want to change this one day.
453  */
454  ref = GetPrivateRefCountEntry(buffer, false);
455 
456  if (ref == NULL)
457  return 0;
458  return ref->refcount;
459 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:369

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), MarkBufferDirtyHint(), and ReadRecentBuffer().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 369 of file bufmgr.c.

370 {
372  int i;
373 
374  Assert(BufferIsValid(buffer));
375  Assert(!BufferIsLocal(buffer));
376 
377  /*
378  * First search for references in the array, that'll be sufficient in the
379  * majority of cases.
380  */
381  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
382  {
384 
385  if (res->buffer == buffer)
386  return res;
387  }
388 
389  /*
390  * By here we know that the buffer, if already pinned, isn't residing in
391  * the array.
392  *
393  * Only look up the buffer in the hashtable if we've previously overflowed
394  * into it.
395  */
396  if (PrivateRefCountOverflowed == 0)
397  return NULL;
398 
399  res = hash_search(PrivateRefCountHash, &buffer, HASH_FIND, NULL);
400 
401  if (res == NULL)
402  return NULL;
403  else if (!do_move)
404  {
405  /* caller doesn't want us to move the hash entry into the array */
406  return res;
407  }
408  else
409  {
410  /* move buffer from hashtable into the free array slot */
411  bool found;
413 
414  /* Ensure there's a free array slot */
416 
417  /* Use up the reserved slot */
418  Assert(ReservedRefCountEntry != NULL);
420  ReservedRefCountEntry = NULL;
421  Assert(free->buffer == InvalidBuffer);
422 
423  /* and fill it */
424  free->buffer = buffer;
425  free->refcount = res->refcount;
426 
427  /* delete from hashtable */
428  hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found);
429  Assert(found);
432 
433  return free;
434  }
435 }
#define free(a)
Definition: header.h:65
@ HASH_FIND
Definition: hsearch.h:113

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBufferNoOwner().

◆ GetVictimBuffer()

static Buffer GetVictimBuffer ( BufferAccessStrategy  strategy,
IOContext  io_context 
)
static

Definition at line 1961 of file bufmgr.c.

1962 {
1963  BufferDesc *buf_hdr;
1964  Buffer buf;
1965  uint32 buf_state;
1966  bool from_ring;
1967 
1968  /*
1969  * Ensure, while the spinlock's not yet held, that there's a free refcount
1970  * entry, and a resource owner slot for the pin.
1971  */
1974 
1975  /* we return here if a prospective victim buffer gets used concurrently */
1976 again:
1977 
1978  /*
1979  * Select a victim buffer. The buffer is returned with its header
1980  * spinlock still held!
1981  */
1982  buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring);
1983  buf = BufferDescriptorGetBuffer(buf_hdr);
1984 
1985  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1986 
1987  /* Pin the buffer and then release the buffer spinlock */
1988  PinBuffer_Locked(buf_hdr);
1989 
1990  /*
1991  * We shouldn't have any other pins for this buffer.
1992  */
1994 
1995  /*
1996  * If the buffer was dirty, try to write it out. There is a race
1997  * condition here, in that someone might dirty it after we released the
1998  * buffer header lock above, or even while we are writing it out (since
1999  * our share-lock won't prevent hint-bit updates). We will recheck the
2000  * dirty bit after re-locking the buffer header.
2001  */
2002  if (buf_state & BM_DIRTY)
2003  {
2004  LWLock *content_lock;
2005 
2006  Assert(buf_state & BM_TAG_VALID);
2007  Assert(buf_state & BM_VALID);
2008 
2009  /*
2010  * We need a share-lock on the buffer contents to write it out (else
2011  * we might write invalid data, eg because someone else is compacting
2012  * the page contents while we write). We must use a conditional lock
2013  * acquisition here to avoid deadlock. Even though the buffer was not
2014  * pinned (and therefore surely not locked) when StrategyGetBuffer
2015  * returned it, someone else could have pinned and exclusive-locked it
2016  * by the time we get here. If we try to get the lock unconditionally,
2017  * we'd block waiting for them; if they later block waiting for us,
2018  * deadlock ensues. (This has been observed to happen when two
2019  * backends are both trying to split btree index pages, and the second
2020  * one just happens to be trying to split the page the first one got
2021  * from StrategyGetBuffer.)
2022  */
2023  content_lock = BufferDescriptorGetContentLock(buf_hdr);
2024  if (!LWLockConditionalAcquire(content_lock, LW_SHARED))
2025  {
2026  /*
2027  * Someone else has locked the buffer, so give it up and loop back
2028  * to get another one.
2029  */
2030  UnpinBuffer(buf_hdr);
2031  goto again;
2032  }
2033 
2034  /*
2035  * If using a nondefault strategy, and writing the buffer would
2036  * require a WAL flush, let the strategy decide whether to go ahead
2037  * and write/reuse the buffer or to choose another victim. We need a
2038  * lock to inspect the page LSN, so this can't be done inside
2039  * StrategyGetBuffer.
2040  */
2041  if (strategy != NULL)
2042  {
2043  XLogRecPtr lsn;
2044 
2045  /* Read the LSN while holding buffer header lock */
2046  buf_state = LockBufHdr(buf_hdr);
2047  lsn = BufferGetLSN(buf_hdr);
2048  UnlockBufHdr(buf_hdr, buf_state);
2049 
2050  if (XLogNeedsFlush(lsn)
2051  && StrategyRejectBuffer(strategy, buf_hdr, from_ring))
2052  {
2053  LWLockRelease(content_lock);
2054  UnpinBuffer(buf_hdr);
2055  goto again;
2056  }
2057  }
2058 
2059  /* OK, do the I/O */
2060  FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
2061  LWLockRelease(content_lock);
2062 
2064  &buf_hdr->tag);
2065  }
2066 
2067 
2068  if (buf_state & BM_VALID)
2069  {
2070  /*
2071  * When a BufferAccessStrategy is in use, blocks evicted from shared
2072  * buffers are counted as IOOP_EVICT in the corresponding context
2073  * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
2074  * strategy in two cases: 1) while initially claiming buffers for the
2075  * strategy ring 2) to replace an existing strategy ring buffer
2076  * because it is pinned or in use and cannot be reused.
2077  *
2078  * Blocks evicted from buffers already in the strategy ring are
2079  * counted as IOOP_REUSE in the corresponding strategy context.
2080  *
2081  * At this point, we can accurately count evictions and reuses,
2082  * because we have successfully claimed the valid buffer. Previously,
2083  * we may have been forced to release the buffer due to concurrent
2084  * pinners or erroring out.
2085  */
2087  from_ring ? IOOP_REUSE : IOOP_EVICT);
2088  }
2089 
2090  /*
2091  * If the buffer has an entry in the buffer mapping table, delete it. This
2092  * can fail because another backend could have pinned or dirtied the
2093  * buffer.
2094  */
2095  if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr))
2096  {
2097  UnpinBuffer(buf_hdr);
2098  goto again;
2099  }
2100 
2101  /* a final set of sanity checks */
2102 #ifdef USE_ASSERT_CHECKING
2103  buf_state = pg_atomic_read_u32(&buf_hdr->state);
2104 
2105  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2106  Assert(!(buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY)));
2107 
2109 #endif
2110 
2111  return buf;
2112 }
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition: bufmgr.c:5218
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
Definition: bufmgr.c:5928
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
Definition: freelist.c:196
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
Definition: freelist.c:798
@ IOOP_EVICT
Definition: pgstat.h:298
@ IOOP_REUSE
Definition: pgstat.h:303
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:77
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3125

References Assert, BackendWritebackContext, BM_DIRTY, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferGetLSN, CheckBufferIsPinnedOnce(), CurrentResourceOwner, FlushBuffer(), InvalidateVictimBuffer(), IOOBJECT_RELATION, IOOP_EVICT, IOOP_REUSE, LockBufHdr(), LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), pg_atomic_read_u32(), pgstat_count_io_op(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::state, StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by BufferAlloc(), and ExtendBufferedRelShared().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 5386 of file bufmgr.c.

5387 {
5388  int bufid = GetStartupBufferPinWaitBufId();
5389 
5390  /*
5391  * If we get woken slowly then it's possible that the Startup process was
5392  * already woken by other backends before we got here. Also possible that
5393  * we get here by multiple interrupts or interrupts at inappropriate
5394  * times, so make sure we do nothing if the bufid is not set.
5395  */
5396  if (bufid < 0)
5397  return false;
5398 
5399  if (GetPrivateRefCount(bufid + 1) > 0)
5400  return true;
5401 
5402  return false;
5403 }
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:671

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 4968 of file bufmgr.c.

4969 {
4970  Assert(BufferIsPinned(buffer));
4972  if (BufferIsLocal(buffer))
4973  LocalRefCount[-buffer - 1]++;
4974  else
4975  {
4976  PrivateRefCountEntry *ref;
4977 
4978  ref = GetPrivateRefCountEntry(buffer, true);
4979  Assert(ref != NULL);
4980  ref->refcount++;
4981  }
4983 }
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)

References Assert, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlarge(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), RelationAddBlocks(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferPoolAccess()

void InitBufferPoolAccess ( void  )

Definition at line 3588 of file bufmgr.c.

3589 {
3590  HASHCTL hash_ctl;
3591 
3592  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
3593 
3594  hash_ctl.keysize = sizeof(int32);
3595  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
3596 
3597  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
3598  HASH_ELEM | HASH_BLOBS);
3599 
3600  /*
3601  * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
3602  * the corresponding phase of backend shutdown.
3603  */
3604  Assert(MyProc != NULL);
3606 }
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:3613
struct PrivateRefCountEntry PrivateRefCountEntry
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
PGPROC * MyProc
Definition: proc.c:66
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76

References Assert, AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MyProc, on_shmem_exit(), PrivateRefCountArray, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1795 of file bufmgr.c.

1796 {
1797  BufferTag oldTag;
1798  uint32 oldHash; /* hash value for oldTag */
1799  LWLock *oldPartitionLock; /* buffer partition lock for it */
1800  uint32 oldFlags;
1801  uint32 buf_state;
1802 
1803  /* Save the original buffer tag before dropping the spinlock */
1804  oldTag = buf->tag;
1805 
1806  buf_state = pg_atomic_read_u32(&buf->state);
1807  Assert(buf_state & BM_LOCKED);
1808  UnlockBufHdr(buf, buf_state);
1809 
1810  /*
1811  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1812  * worth storing the hashcode in BufferDesc so we need not recompute it
1813  * here? Probably not.
1814  */
1815  oldHash = BufTableHashCode(&oldTag);
1816  oldPartitionLock = BufMappingPartitionLock(oldHash);
1817 
1818 retry:
1819 
1820  /*
1821  * Acquire exclusive mapping lock in preparation for changing the buffer's
1822  * association.
1823  */
1824  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1825 
1826  /* Re-lock the buffer header */
1827  buf_state = LockBufHdr(buf);
1828 
1829  /* If it's changed while we were waiting for lock, do nothing */
1830  if (!BufferTagsEqual(&buf->tag, &oldTag))
1831  {
1832  UnlockBufHdr(buf, buf_state);
1833  LWLockRelease(oldPartitionLock);
1834  return;
1835  }
1836 
1837  /*
1838  * We assume the only reason for it to be pinned is that someone else is
1839  * flushing the page out. Wait for them to finish. (This could be an
1840  * infinite loop if the refcount is messed up... it would be nice to time
1841  * out after awhile, but there seems no way to be sure how many loops may
1842  * be needed. Note that if the other guy has pinned the buffer but not
1843  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1844  * be busy-looping here.)
1845  */
1846  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1847  {
1848  UnlockBufHdr(buf, buf_state);
1849  LWLockRelease(oldPartitionLock);
1850  /* safety check: should definitely not be our *own* pin */
1852  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1853  WaitIO(buf);
1854  goto retry;
1855  }
1856 
1857  /*
1858  * Clear out the buffer's tag and flags. We must do this to ensure that
1859  * linear scans of the buffer array don't think the buffer is valid.
1860  */
1861  oldFlags = buf_state & BUF_FLAG_MASK;
1862  ClearBufferTag(&buf->tag);
1863  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1864  UnlockBufHdr(buf, buf_state);
1865 
1866  /*
1867  * Remove the buffer from the lookup hashtable, if it was in there.
1868  */
1869  if (oldFlags & BM_TAG_VALID)
1870  BufTableDelete(&oldTag, oldHash);
1871 
1872  /*
1873  * Done with mapping lock.
1874  */
1875  LWLockRelease(oldPartitionLock);
1876 
1877  /*
1878  * Insert the buffer at the head of the list of free buffers.
1879  */
1881 }
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:45
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
#define BM_LOCKED
Definition: buf_internals.h:60
static void ClearBufferTag(BufferTag *tag)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:148
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:5522

References Assert, BM_LOCKED, BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), StrategyFreeBuffer(), UnlockBufHdr(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ InvalidateVictimBuffer()

static bool InvalidateVictimBuffer ( BufferDesc buf_hdr)
static

Definition at line 1893 of file bufmgr.c.

1894 {
1895  uint32 buf_state;
1896  uint32 hash;
1897  LWLock *partition_lock;
1898  BufferTag tag;
1899 
1901 
1902  /* have buffer pinned, so it's safe to read tag without lock */
1903  tag = buf_hdr->tag;
1904 
1905  hash = BufTableHashCode(&tag);
1906  partition_lock = BufMappingPartitionLock(hash);
1907 
1908  LWLockAcquire(partition_lock, LW_EXCLUSIVE);
1909 
1910  /* lock the buffer header */
1911  buf_state = LockBufHdr(buf_hdr);
1912 
1913  /*
1914  * We have the buffer pinned nobody else should have been able to unset
1915  * this concurrently.
1916  */
1917  Assert(buf_state & BM_TAG_VALID);
1918  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1919  Assert(BufferTagsEqual(&buf_hdr->tag, &tag));
1920 
1921  /*
1922  * If somebody else pinned the buffer since, or even worse, dirtied it,
1923  * give up on this buffer: It's clearly in use.
1924  */
1925  if (BUF_STATE_GET_REFCOUNT(buf_state) != 1 || (buf_state & BM_DIRTY))
1926  {
1927  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1928 
1929  UnlockBufHdr(buf_hdr, buf_state);
1930  LWLockRelease(partition_lock);
1931 
1932  return false;
1933  }
1934 
1935  /*
1936  * Clear out the buffer's tag and flags and usagecount. This is not
1937  * strictly required, as BM_TAG_VALID/BM_VALID needs to be checked before
1938  * doing anything with the buffer. But currently it's beneficial, as the
1939  * cheaper pre-check for several linear scans of shared buffers use the
1940  * tag (see e.g. FlushDatabaseBuffers()).
1941  */
1942  ClearBufferTag(&buf_hdr->tag);
1943  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1944  UnlockBufHdr(buf_hdr, buf_state);
1945 
1946  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1947 
1948  /* finally delete buffer from the buffer mapping table */
1949  BufTableDelete(&tag, hash);
1950 
1951  LWLockRelease(partition_lock);
1952 
1953  Assert(!(buf_state & (BM_DIRTY | BM_VALID | BM_TAG_VALID)));
1954  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1956 
1957  return true;
1958 }

References Assert, BM_DIRTY, BM_TAG_VALID, BM_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), GetPrivateRefCount(), hash(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by EvictUnpinnedBuffer(), and GetVictimBuffer().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 5468 of file bufmgr.c.

5469 {
5470  BufferDesc *bufHdr;
5471  uint32 buf_state;
5472 
5473  Assert(BufferIsValid(buffer));
5474 
5475  if (BufferIsLocal(buffer))
5476  {
5477  /* There should be exactly one pin */
5478  if (LocalRefCount[-buffer - 1] != 1)
5479  return false;
5480  /* Nobody else to wait for */
5481  return true;
5482  }
5483 
5484  /* There should be exactly one local pin */
5485  if (GetPrivateRefCount(buffer) != 1)
5486  return false;
5487 
5488  bufHdr = GetBufferDescriptor(buffer - 1);
5489 
5490  /* caller must hold exclusive lock on buffer */
5492  LW_EXCLUSIVE));
5493 
5494  buf_state = LockBufHdr(bufHdr);
5495 
5496  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5497  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5498  {
5499  /* pincount is OK. */
5500  UnlockBufHdr(bufHdr, buf_state);
5501  return true;
5502  }
5503 
5504  UnlockBufHdr(bufHdr, buf_state);
5505  return false;
5506 }

References Assert, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsValid(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext wb_context,
IOContext  io_context 
)

Definition at line 5973 of file bufmgr.c.

5974 {
5975  instr_time io_start;
5976  int i;
5977 
5978  if (wb_context->nr_pending == 0)
5979  return;
5980 
5981  /*
5982  * Executing the writes in-order can make them a lot faster, and allows to
5983  * merge writeback requests to consecutive blocks into larger writebacks.
5984  */
5985  sort_pending_writebacks(wb_context->pending_writebacks,
5986  wb_context->nr_pending);
5987 
5989 
5990  /*
5991  * Coalesce neighbouring writes, but nothing else. For that we iterate
5992  * through the, now sorted, array of pending flushes, and look forward to
5993  * find all neighbouring (or identical) writes.
5994  */
5995  for (i = 0; i < wb_context->nr_pending; i++)
5996  {
5999  SMgrRelation reln;
6000  int ahead;
6001  BufferTag tag;
6002  RelFileLocator currlocator;
6003  Size nblocks = 1;
6004 
6005  cur = &wb_context->pending_writebacks[i];
6006  tag = cur->tag;
6007  currlocator = BufTagGetRelFileLocator(&tag);
6008 
6009  /*
6010  * Peek ahead, into following writeback requests, to see if they can
6011  * be combined with the current one.
6012  */
6013  for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
6014  {
6015 
6016  next = &wb_context->pending_writebacks[i + ahead + 1];
6017 
6018  /* different file, stop */
6019  if (!RelFileLocatorEquals(currlocator,
6020  BufTagGetRelFileLocator(&next->tag)) ||
6021  BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
6022  break;
6023 
6024  /* ok, block queued twice, skip */
6025  if (cur->tag.blockNum == next->tag.blockNum)
6026  continue;
6027 
6028  /* only merge consecutive writes */
6029  if (cur->tag.blockNum + 1 != next->tag.blockNum)
6030  break;
6031 
6032  nblocks++;
6033  cur = next;
6034  }
6035 
6036  i += ahead;
6037 
6038  /* and finally tell the kernel to write the data to storage */
6039  reln = smgropen(currlocator, INVALID_PROC_NUMBER);
6040  smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
6041  }
6042 
6043  /*
6044  * Assume that writeback requests are only issued for buffers containing
6045  * blocks of permanent relations.
6046  */
6048  IOOP_WRITEBACK, io_start, wb_context->nr_pending);
6049 
6050  wb_context->nr_pending = 0;
6051 }
static int32 next
Definition: blutils.c:221
struct cursor * cur
Definition: ecpg.c:28
@ IOOP_WRITEBACK
Definition: pgstat.h:305
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:643
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, i, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITEBACK, copy_storage_using_buffer_read_stream_private::nblocks, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RelFileLocatorEquals, smgropen(), smgrwriteback(), and track_io_timing.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2127 of file bufmgr.c.

2128 {
2129  uint32 max_backends;
2130  int max_proportional_pins;
2131 
2132  if (*additional_pins <= 1)
2133  return;
2134 
2135  max_backends = MaxBackends + NUM_AUXILIARY_PROCS;
2136  max_proportional_pins = NBuffers / max_backends;
2137 
2138  /*
2139  * Subtract the approximate number of buffers already pinned by this
2140  * backend. We get the number of "overflowed" pins for free, but don't
2141  * know the number of pins in PrivateRefCountArray. The cost of
2142  * calculating that exactly doesn't seem worth it, so just assume the max.
2143  */
2144  max_proportional_pins -= PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES;
2145 
2146  if (max_proportional_pins <= 0)
2147  max_proportional_pins = 1;
2148 
2149  if (*additional_pins > max_proportional_pins)
2150  *additional_pins = max_proportional_pins;
2151 }
int MaxBackends
Definition: globals.c:144
#define NUM_AUXILIARY_PROCS
Definition: proc.h:439

References MaxBackends, NBuffers, NUM_AUXILIARY_PROCS, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by ExtendBufferedRelShared(), and read_stream_begin_impl().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 5727 of file bufmgr.c.

5728 {
5729  BufferDesc *bufHdr = (BufferDesc *) arg;
5730 
5731  if (bufHdr != NULL)
5732  {
5733  char *path = relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
5734  MyProcNumber,
5735  BufTagGetForkNum(&bufHdr->tag));
5736 
5737  errcontext("writing block %u of relation %s",
5738  bufHdr->tag.blockNum, path);
5739  pfree(path);
5740  }
5741 }
#define errcontext
Definition: elog.h:196
void * arg

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, MyProcNumber, pfree(), relpathbackend, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 5171 of file bufmgr.c.

5172 {
5173  BufferDesc *buf;
5174 
5175  Assert(BufferIsPinned(buffer));
5176  if (BufferIsLocal(buffer))
5177  return; /* local buffers need no lock */
5178 
5179  buf = GetBufferDescriptor(buffer - 1);
5180 
5181  if (mode == BUFFER_LOCK_UNLOCK)
5183  else if (mode == BUFFER_LOCK_SHARE)
5185  else if (mode == BUFFER_LOCK_EXCLUSIVE)
5187  else
5188  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
5189 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), and mode.

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_prepare_pagescan(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgettup(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), ScanSourceDatabasePgClass(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferForRedoExtended(), XLogRecordPageWithFreeSpace(), and ZeroAndLockBuffer().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 5251 of file bufmgr.c.

5252 {
5253  BufferDesc *bufHdr;
5254  TimestampTz waitStart = 0;
5255  bool waiting = false;
5256  bool logged_recovery_conflict = false;
5257 
5258  Assert(BufferIsPinned(buffer));
5259  Assert(PinCountWaitBuf == NULL);
5260 
5261  CheckBufferIsPinnedOnce(buffer);
5262 
5263  /* Nobody else to wait for */
5264  if (BufferIsLocal(buffer))
5265  return;
5266 
5267  bufHdr = GetBufferDescriptor(buffer - 1);
5268 
5269  for (;;)
5270  {
5271  uint32 buf_state;
5272 
5273  /* Try to acquire lock */
5275  buf_state = LockBufHdr(bufHdr);
5276 
5277  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5278  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5279  {
5280  /* Successfully acquired exclusive lock with pincount 1 */
5281  UnlockBufHdr(bufHdr, buf_state);
5282 
5283  /*
5284  * Emit the log message if recovery conflict on buffer pin was
5285  * resolved but the startup process waited longer than
5286  * deadlock_timeout for it.
5287  */
5288  if (logged_recovery_conflict)
5290  waitStart, GetCurrentTimestamp(),
5291  NULL, false);
5292 
5293  if (waiting)
5294  {
5295  /* reset ps display to remove the suffix if we added one */
5297  waiting = false;
5298  }
5299  return;
5300  }
5301  /* Failed, so mark myself as waiting for pincount 1 */
5302  if (buf_state & BM_PIN_COUNT_WAITER)
5303  {
5304  UnlockBufHdr(bufHdr, buf_state);
5305  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5306  elog(ERROR, "multiple backends attempting to wait for pincount 1");
5307  }
5309  PinCountWaitBuf = bufHdr;
5310  buf_state |= BM_PIN_COUNT_WAITER;
5311  UnlockBufHdr(bufHdr, buf_state);
5312  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5313 
5314  /* Wait to be signaled by UnpinBuffer() */
5315  if (InHotStandby)
5316  {
5317  if (!waiting)
5318  {
5319  /* adjust the process title to indicate that it's waiting */
5320  set_ps_display_suffix("waiting");
5321  waiting = true;
5322  }
5323 
5324  /*
5325  * Emit the log message if the startup process is waiting longer
5326  * than deadlock_timeout for recovery conflict on buffer pin.
5327  *
5328  * Skip this if first time through because the startup process has
5329  * not started waiting yet in this case. So, the wait start
5330  * timestamp is set after this logic.
5331  */
5332  if (waitStart != 0 && !logged_recovery_conflict)
5333  {
5335 
5336  if (TimestampDifferenceExceeds(waitStart, now,
5337  DeadlockTimeout))
5338  {
5340  waitStart, now, NULL, true);
5341  logged_recovery_conflict = true;
5342  }
5343  }
5344 
5345  /*
5346  * Set the wait start timestamp if logging is enabled and first
5347  * time through.
5348  */
5349  if (log_recovery_conflict_waits && waitStart == 0)
5350  waitStart = GetCurrentTimestamp();
5351 
5352  /* Publish the bufid that Startup process waits on */
5353  SetStartupBufferPinWaitBufId(buffer - 1);
5354  /* Set alarm and then wait to be signaled by UnpinBuffer() */
5356  /* Reset the published bufid */
5358  }
5359  else
5360  ProcWaitForSignal(WAIT_EVENT_BUFFER_PIN);
5361 
5362  /*
5363  * Remove flag marking us as waiter. Normally this will not be set
5364  * anymore, but ProcWaitForSignal() can return for other signals as
5365  * well. We take care to only reset the flag if we're the waiter, as
5366  * theoretically another backend could have started waiting. That's
5367  * impossible with the current usages due to table level locking, but
5368  * better be safe.
5369  */
5370  buf_state = LockBufHdr(bufHdr);
5371  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5373  buf_state &= ~BM_PIN_COUNT_WAITER;
5374  UnlockBufHdr(bufHdr, buf_state);
5375 
5376  PinCountWaitBuf = NULL;
5377  /* Loop back and try again */
5378  }
5379 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1791
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1655
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1619
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:67
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:203
int64 TimestampTz
Definition: timestamp.h:39
static volatile sig_atomic_t waiting
Definition: latch.c:162
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:421
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:369
int DeadlockTimeout
Definition: proc.c:57
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:659
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1866
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:792
bool log_recovery_conflict_waits
Definition: standby.c:41
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:273
int wait_backend_pgprocno
#define InHotStandby
Definition: xlogutils.h:60

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), BufferDesc::wait_backend_pgprocno, and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroAndLockBuffer().

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 5774 of file bufmgr.c.

5775 {
5776  SpinDelayStatus delayStatus;
5777  uint32 old_buf_state;
5778 
5780 
5781  init_local_spin_delay(&delayStatus);
5782 
5783  while (true)
5784  {
5785  /* set BM_LOCKED flag */
5786  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
5787  /* if it wasn't set before we're OK */
5788  if (!(old_buf_state & BM_LOCKED))
5789  break;
5790  perform_spin_delay(&delayStatus);
5791  }
5792  finish_spin_delay(&delayStatus);
5793  return old_buf_state | BM_LOCKED;
5794 }
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:403
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:127
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:187
#define init_local_spin_delay(status)
Definition: s_lock.h:778

References Assert, BM_LOCKED, BufferDescriptorGetBuffer(), BufferIsLocal, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), GetVictimBuffer(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadRecentBuffer(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBufferNoOwner(), and WaitIO().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 2543 of file bufmgr.c.

2544 {
2545  BufferDesc *bufHdr;
2546  uint32 buf_state;
2547  uint32 old_buf_state;
2548 
2549  if (!BufferIsValid(buffer))
2550  elog(ERROR, "bad buffer ID: %d", buffer);
2551 
2552  if (BufferIsLocal(buffer))
2553  {
2554  MarkLocalBufferDirty(buffer);
2555  return;
2556  }
2557 
2558  bufHdr = GetBufferDescriptor(buffer - 1);
2559 
2560  Assert(BufferIsPinned(buffer));
2562  LW_EXCLUSIVE));
2563 
2564  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
2565  for (;;)
2566  {
2567  if (old_buf_state & BM_LOCKED)
2568  old_buf_state = WaitBufHdrUnlocked(bufHdr);
2569 
2570  buf_state = old_buf_state;
2571 
2572  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2573  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
2574 
2575  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
2576  buf_state))
2577  break;
2578  }
2579 
2580  /*
2581  * If the buffer was not dirty already, do vacuum accounting.
2582  */
2583  if (!(old_buf_state & BM_DIRTY))
2584  {
2585  VacuumPageDirty++;
2587  if (VacuumCostActive)
2589  }
2590 }
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:342
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:5804
bool VacuumCostActive
Definition: globals.c:160
int64 VacuumPageDirty
Definition: globals.c:157
int VacuumCostBalance
Definition: globals.c:159
int VacuumCostPageDirty
Definition: globals.c:151
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:449
int64 shared_blks_dirtied
Definition: instrument.h:28

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), do_setval(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 5000 of file bufmgr.c.

5001 {
5002  BufferDesc *bufHdr;
5003  Page page = BufferGetPage(buffer);
5004 
5005  if (!BufferIsValid(buffer))
5006  elog(ERROR, "bad buffer ID: %d", buffer);
5007 
5008  if (BufferIsLocal(buffer))
5009  {
5010  MarkLocalBufferDirty(buffer);
5011  return;
5012  }
5013 
5014  bufHdr = GetBufferDescriptor(buffer - 1);
5015 
5016  Assert(GetPrivateRefCount(buffer) > 0);
5017  /* here, either share or exclusive lock is OK */
5019 
5020  /*
5021  * This routine might get called many times on the same page, if we are
5022  * making the first scan after commit of an xact that added/deleted many
5023  * tuples. So, be as quick as we can if the buffer is already dirty. We
5024  * do this by not acquiring spinlock if it looks like the status bits are
5025  * already set. Since we make this test unlocked, there's a chance we
5026  * might fail to notice that the flags have just been cleared, and failed
5027  * to reset them, due to memory-ordering issues. But since this function
5028  * is only intended to be used in cases where failing to write out the
5029  * data would be harmless anyway, it doesn't really matter.
5030  */
5031  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
5033  {
5035  bool dirtied = false;
5036  bool delayChkptFlags = false;
5037  uint32 buf_state;
5038 
5039  /*
5040  * If we need to protect hint bit updates from torn writes, WAL-log a
5041  * full page image of the page. This full page image is only necessary
5042  * if the hint bit update is the first change to the page since the
5043  * last checkpoint.
5044  *
5045  * We don't check full_page_writes here because that logic is included
5046  * when we call XLogInsert() since the value changes dynamically.
5047  */
5048  if (XLogHintBitIsNeeded() &&
5049  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
5050  {
5051  /*
5052  * If we must not write WAL, due to a relfilelocator-specific
5053  * condition or being in recovery, don't dirty the page. We can
5054  * set the hint, just not dirty the page as a result so the hint
5055  * is lost when we evict the page or shutdown.
5056  *
5057  * See src/backend/storage/page/README for longer discussion.
5058  */
5059  if (RecoveryInProgress() ||
5061  return;
5062 
5063  /*
5064  * If the block is already dirty because we either made a change
5065  * or set a hint already, then we don't need to write a full page
5066  * image. Note that aggressive cleaning of blocks dirtied by hint
5067  * bit setting would increase the call rate. Bulk setting of hint
5068  * bits would reduce the call rate...
5069  *
5070  * We must issue the WAL record before we mark the buffer dirty.
5071  * Otherwise we might write the page before we write the WAL. That
5072  * causes a race condition, since a checkpoint might occur between
5073  * writing the WAL record and marking the buffer dirty. We solve
5074  * that with a kluge, but one that is already in use during
5075  * transaction commit to prevent race conditions. Basically, we
5076  * simply prevent the checkpoint WAL record from being written
5077  * until we have marked the buffer dirty. We don't start the
5078  * checkpoint flush until we have marked dirty, so our checkpoint
5079  * must flush the change to disk successfully or the checkpoint
5080  * never gets written, so crash recovery will fix.
5081  *
5082  * It's possible we may enter here without an xid, so it is
5083  * essential that CreateCheckPoint waits for virtual transactions
5084  * rather than full transactionids.
5085  */
5088  delayChkptFlags = true;
5089  lsn = XLogSaveBufferForHint(buffer, buffer_std);
5090  }
5091 
5092  buf_state = LockBufHdr(bufHdr);
5093 
5094  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5095 
5096  if (!(buf_state & BM_DIRTY))
5097  {
5098  dirtied = true; /* Means "will be dirtied by this action" */
5099 
5100  /*
5101  * Set the page LSN if we wrote a backup block. We aren't supposed
5102  * to set this when only holding a share lock but as long as we
5103  * serialise it somehow we're OK. We choose to set LSN while
5104  * holding the buffer header lock, which causes any reader of an
5105  * LSN who holds only a share lock to also obtain a buffer header
5106  * lock before using PageGetLSN(), which is enforced in
5107  * BufferGetLSNAtomic().
5108  *
5109  * If checksums are enabled, you might think we should reset the
5110  * checksum here. That will happen when the page is written
5111  * sometime later in this checkpoint cycle.
5112  */
5113  if (!XLogRecPtrIsInvalid(lsn))
5114  PageSetLSN(page, lsn);
5115  }
5116 
5117  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
5118  UnlockBufHdr(bufHdr, buf_state);
5119 
5120  if (delayChkptFlags)
5122 
5123  if (dirtied)
5124  {
5125  VacuumPageDirty++;
5127  if (VacuumCostActive)
5129  }
5130  }
5131 }
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
#define DELAY_CHKPT_START
Definition: proc.h:114
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition: storage.c:532
int delayChkptFlags
Definition: proc.h:235
bool RecoveryInProgress(void)
Definition: xlog.c:6304
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:1065

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock(), BufferGetPage(), BufferIsLocal, BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 343 of file bufmgr.c.

344 {
346 
347  /* only allowed to be called when a reservation has been made */
348  Assert(ReservedRefCountEntry != NULL);
349 
350  /* use up the reserved entry */
352  ReservedRefCountEntry = NULL;
353 
354  /* and fill it */
355  res->buffer = buffer;
356  res->refcount = 0;
357 
358  return res;
359 }

References Assert, PrivateRefCountEntry::buffer, res, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 2664 of file bufmgr.c.

2665 {
2667  bool result;
2668  PrivateRefCountEntry *ref;
2669 
2670  Assert(!BufferIsLocal(b));
2671  Assert(ReservedRefCountEntry != NULL);
2672 
2673  ref = GetPrivateRefCountEntry(b, true);
2674 
2675  if (ref == NULL)
2676  {
2677  uint32 buf_state;
2678  uint32 old_buf_state;
2679 
2680  ref = NewPrivateRefCountEntry(b);
2681 
2682  old_buf_state = pg_atomic_read_u32(&buf->state);
2683  for (;;)
2684  {
2685  if (old_buf_state & BM_LOCKED)
2686  old_buf_state = WaitBufHdrUnlocked(buf);
2687 
2688  buf_state = old_buf_state;
2689 
2690  /* increase refcount */
2691  buf_state += BUF_REFCOUNT_ONE;
2692 
2693  if (strategy == NULL)
2694  {
2695  /* Default case: increase usagecount unless already max. */
2697  buf_state += BUF_USAGECOUNT_ONE;
2698  }
2699  else
2700  {
2701  /*
2702  * Ring buffers shouldn't evict others from pool. Thus we
2703  * don't make usagecount more than 1.
2704  */
2705  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
2706  buf_state += BUF_USAGECOUNT_ONE;
2707  }
2708 
2709  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
2710  buf_state))
2711  {
2712  result = (buf_state & BM_VALID) != 0;
2713 
2714  /*
2715  * Assume that we acquired a buffer pin for the purposes of
2716  * Valgrind buffer client checks (even in !result case) to
2717  * keep things simple. Buffers that are unsafe to access are
2718  * not generally guaranteed to be marked undefined or
2719  * non-accessible in any case.
2720  */
2722  break;
2723  }
2724  }
2725  }
2726  else
2727  {
2728  /*
2729  * If we previously pinned the buffer, it is likely to be valid, but
2730  * it may not be if StartReadBuffers() was called and
2731  * WaitReadBuffers() hasn't been called yet. We'll check by loading
2732  * the flags without locking. This is racy, but it's OK to return
2733  * false spuriously: when WaitReadBuffers() calls StartBufferIO(),
2734  * it'll see that it's now valid.
2735  *
2736  * Note: We deliberately avoid a Valgrind client request here.
2737  * Individual access methods can optionally superimpose buffer page
2738  * client requests on top of our client requests to enforce that
2739  * buffers are only accessed while locked (and pinned). It's possible
2740  * that the buffer page is legitimately non-accessible here. We
2741  * cannot meddle with that.
2742  */
2743  result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0;
2744  }
2745 
2746  ref->refcount++;
2747  Assert(ref->refcount > 0);
2749  return result;
2750 }
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:78
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:43
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:52
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:343
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26

References Assert, b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservedRefCountEntry, ResourceOwnerRememberBuffer(), VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 2775 of file bufmgr.c.

2776 {
2777  Buffer b;
2778  PrivateRefCountEntry *ref;
2779  uint32 buf_state;
2780 
2781  /*
2782  * As explained, We don't expect any preexisting pins. That allows us to
2783  * manipulate the PrivateRefCount after releasing the spinlock
2784  */
2786 
2787  /*
2788  * Buffer can't have a preexisting pin, so mark its page as defined to
2789  * Valgrind (this is similar to the PinBuffer() case where the backend
2790  * doesn't already have a buffer pin)
2791  */
2793 
2794  /*
2795  * Since we hold the buffer spinlock, we can update the buffer state and
2796  * release the lock in one operation.
2797  */
2798  buf_state = pg_atomic_read_u32(&buf->state);
2799  Assert(buf_state & BM_LOCKED);
2800  buf_state += BUF_REFCOUNT_ONE;
2801  UnlockBufHdr(buf, buf_state);
2802 
2804 
2805  ref = NewPrivateRefCountEntry(b);
2806  ref->refcount++;
2807 
2809 }

References Assert, b, BM_LOCKED, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), UnlockBufHdr(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by EvictUnpinnedBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ PinBufferForBlock()

static pg_attribute_always_inline Buffer PinBufferForBlock ( Relation  rel,
SMgrRelation  smgr,
char  persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1133 of file bufmgr.c.

1140 {
1141  BufferDesc *bufHdr;
1142  IOContext io_context;
1143  IOObject io_object;
1144 
1145  Assert(blockNum != P_NEW);
1146 
1147  /* Persistence should be set before */
1148  Assert((persistence == RELPERSISTENCE_TEMP ||
1149  persistence == RELPERSISTENCE_PERMANENT ||
1150  persistence == RELPERSISTENCE_UNLOGGED));
1151 
1152  if (persistence == RELPERSISTENCE_TEMP)
1153  {
1154  io_context = IOCONTEXT_NORMAL;
1155  io_object = IOOBJECT_TEMP_RELATION;
1156  }
1157  else
1158  {
1159  io_context = IOContextForStrategy(strategy);
1160  io_object = IOOBJECT_RELATION;
1161  }
1162 
1163  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1165  smgr->smgr_rlocator.locator.dbOid,
1167  smgr->smgr_rlocator.backend);
1168 
1169  if (persistence == RELPERSISTENCE_TEMP)
1170  {
1171  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1172  if (*foundPtr)
1174  }
1175  else
1176  {
1177  bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1178  strategy, foundPtr, io_context);
1179  if (*foundPtr)
1181  }
1182  if (rel)
1183  {
1184  /*
1185  * While pgBufferUsage's "read" counter isn't bumped unless we reach
1186  * WaitReadBuffers() (so, not for hits, and not for buffers that are
1187  * zeroed instead), the per-relation stats always count them.
1188  */
1190  if (*foundPtr)
1192  }
1193  if (*foundPtr)
1194  {
1195  VacuumPageHit++;
1196  pgstat_count_io_op(io_object, io_context, IOOP_HIT);
1197  if (VacuumCostActive)
1199 
1200  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1202  smgr->smgr_rlocator.locator.dbOid,
1204  smgr->smgr_rlocator.backend,
1205  true);
1206  }
1207 
1208  return BufferDescriptorGetBuffer(bufHdr);
1209 }
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
Definition: bufmgr.c:1617
#define P_NEW
Definition: bufmgr.h:184
int64 VacuumPageHit
Definition: globals.c:155
int VacuumCostPageHit
Definition: globals.c:149
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition: localbuf.c:116
IOObject
Definition: pgstat.h:279
#define pgstat_count_buffer_read(rel)
Definition: pgstat.h:635
@ IOOP_HIT
Definition: pgstat.h:301
#define pgstat_count_buffer_hit(rel)
Definition: pgstat.h:640
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_hit
Definition: instrument.h:26

References Assert, RelFileLocatorBackend::backend, BufferAlloc(), BufferDescriptorGetBuffer(), RelFileLocator::dbOid, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, BufferUsage::local_blks_hit, LocalBufferAlloc(), RelFileLocatorBackend::locator, P_NEW, pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_buffer_read, pgstat_count_io_op(), RelFileLocator::relNumber, BufferUsage::shared_blks_hit, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, and VacuumPageHit.

Referenced by ReadBuffer_common(), and StartReadBuffersImpl().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 666 of file bufmgr.c.

667 {
668  Assert(RelationIsValid(reln));
669  Assert(BlockNumberIsValid(blockNum));
670 
671  if (RelationUsesLocalBuffers(reln))
672  {
673  /* see comments in ReadBufferExtended */
674  if (RELATION_IS_OTHER_TEMP(reln))
675  ereport(ERROR,
676  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
677  errmsg("cannot access temporary tables of other sessions")));
678 
679  /* pass it off to localbuf.c */
680  return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
681  }
682  else
683  {
684  /* pass it to the shared buffer version */
685  return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
686  }
687 }
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:576
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:69
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:658
#define RelationIsValid(relation)
Definition: rel.h:478

References Assert, BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by BitmapPrefetch(), count_nondeletable_pages(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 576 of file bufmgr.c.

579 {
580  PrefetchBufferResult result = {InvalidBuffer, false};
581  BufferTag newTag; /* identity of requested block */
582  uint32 newHash; /* hash value for newTag */
583  LWLock *newPartitionLock; /* buffer partition lock for it */
584  int buf_id;
585 
586  Assert(BlockNumberIsValid(blockNum));
587 
588  /* create a tag so we can lookup the buffer */
589  InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
590  forkNum, blockNum);
591 
592  /* determine its hash code and partition lock ID */
593  newHash = BufTableHashCode(&newTag);
594  newPartitionLock = BufMappingPartitionLock(newHash);
595 
596  /* see if the block is in the buffer pool already */
597  LWLockAcquire(newPartitionLock, LW_SHARED);
598  buf_id = BufTableLookup(&newTag, newHash);
599  LWLockRelease(newPartitionLock);
600 
601  /* If not in buffers, initiate prefetch */
602  if (buf_id < 0)
603  {
604 #ifdef USE_PREFETCH
605  /*
606  * Try to initiate an asynchronous read. This returns false in
607  * recovery if the relation file doesn't exist.
608  */
609  if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
610  smgrprefetch(smgr_reln, forkNum, blockNum, 1))
611  {
612  result.initiated_io = true;
613  }
614 #endif /* USE_PREFETCH */
615  }
616  else
617  {
618  /*
619  * Report the buffer it was in at that time. The caller may be able
620  * to avoid a buffer table lookup, but it's not pinned and it must be
621  * rechecked!
622  */
623  result.recent_buffer = buf_id + 1;
624  }
625 
626  /*
627  * If the block *is* in buffers, we do nothing. This is not really ideal:
628  * the block might be just about to be evicted, which would be stupid
629  * since we know we are going to need it soon. But the only easy answer
630  * is to bump the usage_count, which does not seem like a great solution:
631  * when the caller does ultimately touch the block, usage_count would get
632  * bumped again, resulting in too much favoritism for blocks that are
633  * involved in a prefetch sequence. A real fix would involve some
634  * additional per-buffer state, and it's not clear that there's enough of
635  * a problem to justify that.
636  */
637 
638  return result;
639 }
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_DATA
Definition: fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:585
Buffer recent_buffer
Definition: bufmgr.h:60

References Assert, BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, RelFileLocatorBackend::locator, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rlocator, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 773 of file bufmgr.c.

774 {
775  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
776 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:820
@ RBM_NORMAL
Definition: bufmgr.h:45

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static pg_attribute_always_inline Buffer ReadBuffer_common ( Relation  rel,
SMgrRelation  smgr,
char  smgr_persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
static

Definition at line 1217 of file bufmgr.c.

1221 {
1222  ReadBuffersOperation operation;
1223  Buffer buffer;
1224  int flags;
1225  char persistence;
1226 
1227  /*
1228  * Backward compatibility path, most code should use ExtendBufferedRel()
1229  * instead, as acquiring the extension lock inside ExtendBufferedRel()
1230  * scales a lot better.
1231  */
1232  if (unlikely(blockNum == P_NEW))
1233  {
1235 
1236  /*
1237  * Since no-one else can be looking at the page contents yet, there is
1238  * no difference between an exclusive lock and a cleanup-strength
1239  * lock.
1240  */
1242  flags |= EB_LOCK_FIRST;
1243 
1244  return ExtendBufferedRel(BMR_REL(rel), forkNum, strategy, flags);
1245  }
1246 
1247  if (rel)
1248  persistence = rel->rd_rel->relpersistence;
1249  else
1250  persistence = smgr_persistence;
1251 
1253  mode == RBM_ZERO_AND_LOCK))
1254  {
1255  bool found;
1256 
1257  buffer = PinBufferForBlock(rel, smgr, persistence,
1258  forkNum, blockNum, strategy, &found);
1259  ZeroAndLockBuffer(buffer, mode, found);
1260  return buffer;
1261  }
1262 
1263  if (mode == RBM_ZERO_ON_ERROR)
1265  else
1266  flags = 0;
1267  operation.smgr = smgr;
1268  operation.rel = rel;
1269  operation.persistence = persistence;
1270  operation.forknum = forkNum;
1271  operation.strategy = strategy;
1272  if (StartReadBuffer(&operation,
1273  &buffer,
1274  blockNum,
1275  flags))
1276  WaitReadBuffers(&operation);
1277 
1278  return buffer;
1279 }
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:873
static void ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
Definition: bufmgr.c:1046
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition: bufmgr.c:1133
void WaitReadBuffers(ReadBuffersOperation *operation)
Definition: bufmgr.c:1420
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Definition: bufmgr.c:1392
#define READ_BUFFERS_ZERO_ON_ERROR
Definition: bufmgr.h:111
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:50
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
#define unlikely(x)
Definition: c.h:311
ForkNumber forknum
Definition: bufmgr.h:121
BufferAccessStrategy strategy
Definition: bufmgr.h:122
struct SMgrRelationData * smgr
Definition: bufmgr.h:119

References BMR_REL, PrivateRefCountEntry::buffer, EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, ExtendBufferedRel(), ReadBuffersOperation::forknum, mode, P_NEW, ReadBuffersOperation::persistence, PinBufferForBlock(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelationData::rd_rel, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, StartReadBuffer(), ReadBuffersOperation::strategy, unlikely, WaitReadBuffers(), and ZeroAndLockBuffer().

Referenced by ExtendBufferedRelTo(), ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 820 of file bufmgr.c.

822 {
823  Buffer buf;
824 
825  /*
826  * Reject attempts to read non-local temporary relations; we would be
827  * likely to get wrong data since we have no visibility into the owning
828  * session's local buffers.
829  */
830  if (RELATION_IS_OTHER_TEMP(reln))
831  ereport(ERROR,
832  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
833  errmsg("cannot access temporary tables of other sessions")));
834 
835  /*
836  * Read the buffer, and update pgstat counters to reflect a cache hit or
837  * miss.
838  */
839  buf = ReadBuffer_common(reln, RelationGetSmgr(reln), 0,
840  forkNum, blockNum, mode, strategy);
841 
842  return buf;
843 }

References buf, ereport, errcode(), errmsg(), ERROR, mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), autoprewarm_database_main(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), brin_vacuum_scan(), bt_recheck_sibling_links(), btvacuumpage(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), fsm_readbuf(), get_raw_page_internal(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), log_newpage_range(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), spgvacuumpage(), statapprox_heap(), verify_heapam(), and vm_readbuf().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 857 of file bufmgr.c.

860 {
861  SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
862 
863  return ReadBuffer_common(NULL, smgr,
864  permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
865  forkNum, blockNum,
866  mode, strategy);
867 }

References INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 697 of file bufmgr.c.

699 {
700  BufferDesc *bufHdr;
701  BufferTag tag;
702  uint32 buf_state;
703  bool have_private_ref;
704 
705  Assert(BufferIsValid(recent_buffer));
706 
709  InitBufferTag(&tag, &rlocator, forkNum, blockNum);
710 
711  if (BufferIsLocal(recent_buffer))
712  {
713  int b = -recent_buffer - 1;
714 
715  bufHdr = GetLocalBufferDescriptor(b);
716  buf_state = pg_atomic_read_u32(&bufHdr->state);
717 
718  /* Is it still valid and holding the right tag? */
719  if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
720  {
721  PinLocalBuffer(bufHdr, true);
722 
724 
725  return true;
726  }
727  }
728  else
729  {
730  bufHdr = GetBufferDescriptor(recent_buffer - 1);
731  have_private_ref = GetPrivateRefCount(recent_buffer) > 0;
732 
733  /*
734  * Do we already have this buffer pinned with a private reference? If
735  * so, it must be valid and it is safe to check the tag without
736  * locking. If not, we have to lock the header first and then check.
737  */
738  if (have_private_ref)
739  buf_state = pg_atomic_read_u32(&bufHdr->state);
740  else
741  buf_state = LockBufHdr(bufHdr);
742 
743  if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
744  {
745  /*
746  * It's now safe to pin the buffer. We can't pin first and ask
747  * questions later, because it might confuse code paths like
748  * InvalidateBuffer() if we pinned a random non-matching buffer.
749  */
750  if (have_private_ref)
751  PinBuffer(bufHdr, NULL); /* bump pin count */
752  else
753  PinBuffer_Locked(bufHdr); /* pin for first time */
754 
756 
757  return true;
758  }
759 
760  /* If we locked the header above, now unlock. */
761  if (!have_private_ref)
762  UnlockBufHdr(bufHdr, buf_state);
763  }
764 
765  return false;
766 }
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition: localbuf.c:655

References Assert, b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), InitBufferTag(), BufferUsage::local_blks_hit, LockBufHdr(), pg_atomic_read_u32(), pgBufferUsage, PinBuffer(), PinBuffer_Locked(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by XLogReadBufferExtended().

◆ RelationCopyStorageUsingBuffer()

static void RelationCopyStorageUsingBuffer ( RelFileLocator  srclocator,
RelFileLocator  dstlocator,
ForkNumber  forkNum,
bool  permanent 
)
static

Definition at line 4702 of file bufmgr.c.

4705 {
4706  Buffer srcBuf;
4707  Buffer dstBuf;
4708  Page srcPage;
4709  Page dstPage;
4710  bool use_wal;
4711  BlockNumber nblocks;
4712  BlockNumber blkno;
4714  BufferAccessStrategy bstrategy_src;
4715  BufferAccessStrategy bstrategy_dst;
4717  ReadStream *src_stream;
4718  SMgrRelation src_smgr;
4719 
4720  /*
4721  * In general, we want to write WAL whenever wal_level > 'minimal', but we
4722  * can skip it when copying any fork of an unlogged relation other than
4723  * the init fork.
4724  */
4725  use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
4726 
4727  /* Get number of blocks in the source relation. */
4729  forkNum);
4730 
4731  /* Nothing to copy; just return. */
4732  if (nblocks == 0)
4733  return;
4734 
4735  /*
4736  * Bulk extend the destination relation of the same size as the source
4737  * relation before starting to copy block by block.
4738  */
4739  memset(buf.data, 0, BLCKSZ);
4740  smgrextend(smgropen(dstlocator, INVALID_PROC_NUMBER), forkNum, nblocks - 1,
4741  buf.data, true);
4742 
4743  /* This is a bulk operation, so use buffer access strategies. */
4744  bstrategy_src = GetAccessStrategy(BAS_BULKREAD);
4745  bstrategy_dst = GetAccessStrategy(BAS_BULKWRITE);
4746 
4747  /* Initalize streaming read */
4748  p.blocknum = 0;
4749  p.nblocks = nblocks;
4750  src_smgr = smgropen(srclocator, INVALID_PROC_NUMBER);
4752  bstrategy_src,
4753  src_smgr,
4754  permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
4755  forkNum,
4757  &p,
4758  0);
4759 
4760  /* Iterate over each block of the source relation file. */
4761  for (blkno = 0; blkno < nblocks; blkno++)
4762  {
4764 
4765  /* Read block from source relation. */
4766  srcBuf = read_stream_next_buffer(src_stream, NULL);
4767  LockBuffer(srcBuf, BUFFER_LOCK_SHARE);
4768  srcPage = BufferGetPage(srcBuf);
4769 
4770  dstBuf = ReadBufferWithoutRelcache(dstlocator, forkNum,
4771  BufferGetBlockNumber(srcBuf),
4772  RBM_ZERO_AND_LOCK, bstrategy_dst,
4773  permanent);
4774  dstPage = BufferGetPage(dstBuf);
4775 
4777 
4778  /* Copy page data from the source to the destination. */
4779  memcpy(dstPage, srcPage, BLCKSZ);
4780  MarkBufferDirty(dstBuf);
4781 
4782  /* WAL-log the copied page. */
4783  if (use_wal)
4784  log_newpage_buffer(dstBuf, true);
4785 
4786  END_CRIT_SECTION();
4787 
4788  UnlockReleaseBuffer(dstBuf);
4789  UnlockReleaseBuffer(srcBuf);
4790  }
4791  Assert(read_stream_next_buffer(src_stream, NULL) == InvalidBuffer);
4792  read_stream_end(src_stream);
4793 
4794  FreeAccessStrategy(bstrategy_src);
4795  FreeAccessStrategy(bstrategy_dst);
4796 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3736
static BlockNumber copy_storage_using_buffer_read_stream_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: bufmgr.c:154
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4953
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2543
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
Definition: bufmgr.c:857
@ BAS_BULKREAD
Definition: bufmgr.h:36
@ BAS_BULKWRITE
Definition: bufmgr.h:38
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:681
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:620
ReadStream * read_stream_begin_smgr_relation(int flags, BufferAccessStrategy strategy, SMgrRelation smgr, char smgr_persistence, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:590
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:850
#define READ_STREAM_FULL
Definition: read_stream.h:43
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:535
#define XLogIsNeeded()
Definition: xlog.h:109
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237

References Assert, BAS_BULKREAD, BAS_BULKWRITE, copy_storage_using_buffer_read_stream_private::blocknum, buf, BUFFER_LOCK_SHARE, BufferGetBlockNumber(), BufferGetPage(), CHECK_FOR_INTERRUPTS, copy_storage_using_buffer_read_stream_next_block(), END_CRIT_SECTION, FreeAccessStrategy(), GetAccessStrategy(), INIT_FORKNUM, INVALID_PROC_NUMBER, InvalidBuffer, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), copy_storage_using_buffer_read_stream_private::nblocks, RBM_ZERO_AND_LOCK, read_stream_begin_smgr_relation(), read_stream_end(), READ_STREAM_FULL, read_stream_next_buffer(), ReadBufferWithoutRelcache(), smgrextend(), smgrnblocks(), smgropen(), START_CRIT_SECTION, UnlockReleaseBuffer(), and XLogIsNeeded.

Referenced by CreateAndCopyRelationData().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 3935 of file bufmgr.c.

3936 {
3937  if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
3938  {
3939  /*
3940  * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
3941  * tableam returns the size in bytes - but for the purpose of this
3942  * routine, we want the number of blocks. Therefore divide, rounding
3943  * up.
3944  */
3945  uint64 szbytes;
3946 
3947  szbytes = table_relation_size(relation, forkNum);
3948 
3949  return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
3950  }
3951  else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
3952  {
3953  return smgrnblocks(RelationGetSmgr(relation), forkNum);
3954  }
3955  else
3956  Assert(false);
3957 
3958  return 0; /* keep compiler quiet */
3959 }
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1868

References Assert, RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 2606 of file bufmgr.c.

2609 {
2610  ForkNumber forkNum = MAIN_FORKNUM;
2611  BufferDesc *bufHdr;
2612 
2613  if (BufferIsValid(buffer))
2614  {
2615  Assert(BufferIsPinned(buffer));
2616  if (BufferIsLocal(buffer))
2617  {
2618  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2619  if (bufHdr->tag.blockNum == blockNum &&
2620  BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
2621  BufTagGetForkNum(&bufHdr->tag) == forkNum)
2622  return buffer;
2623  UnpinLocalBuffer(buffer);
2624  }
2625  else
2626  {
2627  bufHdr = GetBufferDescriptor(buffer - 1);
2628  /* we have pin, so it's ok to examine tag without spinlock */
2629  if (bufHdr->tag.blockNum == blockNum &&
2630  BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
2631  BufTagGetForkNum(&bufHdr->tag) == forkNum)
2632  return buffer;
2633  UnpinBuffer(bufHdr);
2634  }
2635  }
2636 
2637  return ReadBuffer(relation, blockNum);
2638 }
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:773
void UnpinLocalBuffer(Buffer buffer)
Definition: localbuf.c:681

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), BufferDesc::tag, UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 4936 of file bufmgr.c.

4937 {
4938  if (!BufferIsValid(buffer))
4939  elog(ERROR, "bad buffer ID: %d", buffer);
4940 
4941  if (BufferIsLocal(buffer))
4942  UnpinLocalBuffer(buffer);
4943  else
4944  UnpinBuffer(GetBufferDescriptor(buffer - 1));
4945 }

References BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_drop_lock_and_maybe_pin(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndBitmapHeapScan(), ExecEndIndexOnlyScan(), ExecReScanBitmapHeapScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_search(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_fetch_next_buffer(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgettup(), heapgettup_pagemode(), lazy_scan_heap(), lazy_vacuum_heap_rel(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pg_visibility_map_summary(), pgstatindex_impl(), read_stream_reset(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ ReservePrivateRefCountEntry()

static void ReservePrivateRefCountEntry ( void  )
static

Definition at line 277 of file bufmgr.c.

278 {
279  /* Already reserved (or freed), nothing to do */
280  if (ReservedRefCountEntry != NULL)
281  return;
282 
283  /*
284  * First search for a free entry the array, that'll be sufficient in the
285  * majority of cases.
286  */
287  {
288  int i;
289 
290  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
291  {
293 
295 
296  if (res->buffer == InvalidBuffer)
297  {
299  return;
300  }
301  }
302  }
303 
304  /*
305  * No luck. All array entries are full. Move one array entry into the hash
306  * table.
307  */
308  {
309  /*
310  * Move entry from the current clock position in the array into the
311  * hashtable. Use that slot.
312  */
313  PrivateRefCountEntry *hashent;
314  bool found;
315 
316  /* select victim slot */
319 
320  /* Better be used, otherwise we shouldn't get here. */
322 
323  /* enter victim array entry into hashtable */
326  HASH_ENTER,
327  &found);
328  Assert(!found);
330 
331  /* clear the now free array slot */
334 
336  }
337 }
static uint32 PrivateRefCountClock
Definition: bufmgr.c:238
@ HASH_ENTER
Definition: hsearch.h:114

References Assert, PrivateRefCountEntry::buffer, HASH_ENTER, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountClock, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, res, and ReservedRefCountEntry.

Referenced by BufferAlloc(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetPrivateRefCountEntry(), GetVictimBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ ResOwnerPrintBufferIO()

static char * ResOwnerPrintBufferIO ( Datum  res)
static

Definition at line 6064 of file bufmgr.c.

6065 {
6066  Buffer buffer = DatumGetInt32(res);
6067 
6068  return psprintf("lost track of buffer IO on buffer %d", buffer);
6069 }
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202

References DatumGetInt32(), psprintf(), and res.

◆ ResOwnerPrintBufferPin()

static char * ResOwnerPrintBufferPin ( Datum  res)
static

Definition at line 6087 of file bufmgr.c.

6088 {
6090 }

References DatumGetInt32(), DebugPrintBufferRefcount(), and res.

◆ ResOwnerReleaseBufferIO()

static void ResOwnerReleaseBufferIO ( Datum  res)
static

Definition at line 6056 of file bufmgr.c.

6057 {
6058  Buffer buffer = DatumGetInt32(res);
6059 
6060  AbortBufferIO(buffer);
6061 }
static void AbortBufferIO(Buffer buffer)
Definition: bufmgr.c:5665

References AbortBufferIO(), DatumGetInt32(), and res.

◆ ResOwnerReleaseBufferPin()

static void ResOwnerReleaseBufferPin ( Datum  res)
static

Definition at line 6072 of file bufmgr.c.

6073 {
6074  Buffer buffer = DatumGetInt32(res);
6075 
6076  /* Like ReleaseBuffer, but don't call ResourceOwnerForgetBuffer */
6077  if (!BufferIsValid(buffer))
6078  elog(ERROR, "bad buffer ID: %d", buffer);
6079 
6080  if (BufferIsLocal(buffer))
6081  UnpinLocalBufferNoOwner(buffer);
6082  else
6084 }
static void UnpinBufferNoOwner(BufferDesc *buf)
Definition: bufmgr.c:2827
void UnpinLocalBufferNoOwner(Buffer buffer)
Definition: localbuf.c:688

References BufferIsLocal, BufferIsValid(), DatumGetInt32(), elog, ERROR, GetBufferDescriptor(), res, UnpinBufferNoOwner(), and UnpinLocalBufferNoOwner().

◆ rlocator_comparator()

static int rlocator_comparator ( const void *  p1,
const void *  p2 
)
static

Definition at line 5747 of file bufmgr.c.

5748 {
5749  RelFileLocator n1 = *(const RelFileLocator *) p1;
5750  RelFileLocator n2 = *(const RelFileLocator *) p2;
5751 
5752  if (n1.relNumber < n2.relNumber)
5753  return -1;
5754  else if (n1.relNumber > n2.relNumber)
5755  return 1;
5756 
5757  if (n1.dbOid < n2.dbOid)
5758  return -1;
5759  else if (n1.dbOid > n2.dbOid)
5760  return 1;
5761 
5762  if (n1.spcOid < n2.spcOid)
5763  return -1;
5764  else if (n1.spcOid > n2.spcOid)
5765  return 1;
5766  else
5767  return 0;
5768 }

References RelFileLocator::dbOid, p2, RelFileLocator::relNumber, and RelFileLocator::spcOid.

Referenced by buffertag_comparator(), DropRelationsAllBuffers(), and FlushRelationsAllBuffers().

◆ ScheduleBufferTagForWriteback()

void ScheduleBufferTagForWriteback ( WritebackContext wb_context,
IOContext  io_context,
BufferTag tag 
)

Definition at line 5928 of file bufmgr.c.

5930 {
5931  PendingWriteback *pending;
5932 
5934  return;
5935 
5936  /*
5937  * Add buffer to the pending writeback array, unless writeback control is
5938  * disabled.
5939  */
5940  if (*wb_context->max_pending > 0)
5941  {
5943 
5944  pending = &wb_context->pending_writebacks[wb_context->nr_pending++];
5945 
5946  pending->tag = *tag;
5947  }
5948 
5949  /*
5950  * Perform pending flushes if the writeback limit is exceeded. This
5951  * includes the case where previously an item has been added, but control
5952  * is now disabled.
5953  */
5954  if (wb_context->nr_pending >= *wb_context->max_pending)
5955  IssuePendingWritebacks(wb_context, io_context);
5956 }
#define WRITEBACK_MAX_PENDING_FLUSHES

References Assert, IO_DIRECT_DATA, io_direct_flags, IssuePendingWritebacks(), WritebackContext::max_pending, WritebackContext::nr_pending, WritebackContext::pending_writebacks, PendingWriteback::tag, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by GetVictimBuffer(), and SyncOneBuffer().

◆ shared_buffer_write_error_callback()

static void shared_buffer_write_error_callback ( void *  arg)
static

Definition at line 5707 of file bufmgr.c.

5708 {
5709  BufferDesc *bufHdr = (BufferDesc *) arg;
5710 
5711  /* Buffer is pinned, so we can read the tag without locking the spinlock */
5712  if (bufHdr != NULL)
5713  {
5714  char *path = relpathperm(BufTagGetRelFileLocator(&bufHdr->tag),
5715  BufTagGetForkNum(&bufHdr->tag));
5716 
5717  errcontext("writing block %u of relation %s",
5718  bufHdr->tag.blockNum, path);
5719  pfree(path);
5720  }
5721 }

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, pfree(), relpathperm, and BufferDesc::tag.

Referenced by FlushBuffer().

◆ StartBufferIO()

static bool StartBufferIO ( BufferDesc buf,
bool  forInput,
bool  nowait 
)
static

Definition at line 5571 of file bufmgr.c.

5572 {
5573  uint32 buf_state;
5574 
5576 
5577  for (;;)
5578  {
5579  buf_state = LockBufHdr(buf);
5580 
5581  if (!(buf_state & BM_IO_IN_PROGRESS))
5582  break;
5583  UnlockBufHdr(buf, buf_state);
5584  if (nowait)
5585  return false;
5586  WaitIO(buf);
5587  }
5588 
5589  /* Once we get here, there is definitely no I/O active on this buffer */
5590 
5591  if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
5592  {
5593  /* someone else already did the I/O */
5594  UnlockBufHdr(buf, buf_state);
5595  return false;
5596  }
5597 
5598  buf_state |= BM_IO_IN_PROGRESS;
5599  UnlockBufHdr(buf, buf_state);
5600 
5603 
5604  return true;
5605 }
static void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)

References BM_DIRTY, BM_IO_IN_PROGRESS, BM_VALID, buf, BufferDescriptorGetBuffer(), CurrentResourceOwner, LockBufHdr(), ResourceOwnerEnlarge(), ResourceOwnerRememberBufferIO(), UnlockBufHdr(), and WaitIO().

Referenced by ExtendBufferedRelShared(), FlushBuffer(), WaitReadBuffersCanStartIO(), and ZeroAndLockBuffer().

◆ StartReadBuffer()

bool StartReadBuffer ( ReadBuffersOperation operation,
Buffer buffer,
BlockNumber  blocknum,
int  flags 
)

Definition at line 1392 of file bufmgr.c.

1396 {
1397  int nblocks = 1;
1398  bool result;
1399 
1400  result = StartReadBuffersImpl(operation, buffer, blocknum, &nblocks, flags);
1401  Assert(nblocks == 1); /* single block can't be short */
1402 
1403  return result;
1404 }
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
Definition: bufmgr.c:1282

References Assert, PrivateRefCountEntry::buffer, and StartReadBuffersImpl().

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ StartReadBuffers()

bool StartReadBuffers ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int *  nblocks,
int  flags 
)

Definition at line 1377 of file bufmgr.c.

1382 {
1383  return StartReadBuffersImpl(operation, buffers, blockNum, nblocks, flags);
1384 }

References StartReadBuffersImpl().

Referenced by read_stream_start_pending_read().

◆ StartReadBuffersImpl()

static pg_attribute_always_inline bool StartReadBuffersImpl ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int *  nblocks,
int  flags 
)
static

Definition at line 1282 of file bufmgr.c.

1287 {
1288  int actual_nblocks = *nblocks;
1289  int io_buffers_len = 0;
1290 
1291  Assert(*nblocks > 0);
1292  Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
1293 
1294  for (int i = 0; i < actual_nblocks; ++i)
1295  {
1296  bool found;
1297 
1298  buffers[i] = PinBufferForBlock(operation->rel,
1299  operation->smgr,
1300  operation->persistence,
1301  operation->forknum,
1302  blockNum + i,
1303  operation->strategy,
1304  &found);
1305 
1306  if (found)
1307  {
1308  /*
1309  * Terminate the read as soon as we get a hit. It could be a
1310  * single buffer hit, or it could be a hit that follows a readable
1311  * range. We don't want to create more than one readable range,
1312  * so we stop here.
1313  */
1314  actual_nblocks = i + 1;
1315  break;
1316  }
1317  else
1318  {
1319  /* Extend the readable range to cover this block. */
1320  io_buffers_len++;
1321  }
1322  }
1323  *nblocks = actual_nblocks;
1324 
1325  if (likely(io_buffers_len == 0))
1326  return false;
1327 
1328  /* Populate information needed for I/O. */
1329  operation->buffers = buffers;
1330  operation->blocknum = blockNum;
1331  operation->flags = flags;
1332  operation->nblocks = actual_nblocks;
1333  operation->io_buffers_len = io_buffers_len;
1334 
1335  if (flags & READ_BUFFERS_ISSUE_ADVICE)
1336  {
1337  /*
1338  * In theory we should only do this if PinBufferForBlock() had to
1339  * allocate new buffers above. That way, if two calls to
1340  * StartReadBuffers() were made for the same blocks before
1341  * WaitReadBuffers(), only the first would issue the advice. That'd be
1342  * a better simulation of true asynchronous I/O, which would only
1343  * start the I/O once, but isn't done here for simplicity. Note also
1344  * that the following call might actually issue two advice calls if we
1345  * cross a segment boundary; in a true asynchronous version we might
1346  * choose to process only one real I/O at a time in that case.
1347  */
1348  smgrprefetch(operation->smgr,
1349  operation->forknum,
1350  blockNum,
1351  operation->io_buffers_len);
1352  }
1353 
1354  /* Indicate that WaitReadBuffers() should be called. */
1355  return true;
1356 }
#define READ_BUFFERS_ISSUE_ADVICE
Definition: bufmgr.h:113
#define MAX_IO_COMBINE_LIMIT
Definition: bufmgr.h:164
#define likely(x)
Definition: c.h:310
int16 io_buffers_len
Definition: bufmgr.h:133
Buffer * buffers
Definition: bufmgr.h:129
BlockNumber blocknum
Definition: bufmgr.h:130

References Assert, ReadBuffersOperation::blocknum, ReadBuffersOperation::buffers, ReadBuffersOperation::flags, ReadBuffersOperation::forknum, i, ReadBuffersOperation::io_buffers_len, likely, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::persistence, PinBufferForBlock(), READ_BUFFERS_ISSUE_ADVICE, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, smgrprefetch(), and ReadBuffersOperation::strategy.

Referenced by StartReadBuffer(), and StartReadBuffers().

◆ SyncOneBuffer()

static int SyncOneBuffer ( int  buf_id,
bool  skip_recently_used,
WritebackContext wb_context 
)
static

Definition at line 3498 of file bufmgr.c.

3499 {
3500  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
3501  int result = 0;
3502  uint32 buf_state;
3503  BufferTag tag;
3504 
3505  /* Make sure we can handle the pin */
3508 
3509  /*
3510  * Check whether buffer needs writing.
3511  *
3512  * We can make this check without taking the buffer content lock so long
3513  * as we mark pages dirty in access methods *before* logging changes with
3514  * XLogInsert(): if someone marks the buffer dirty just after our check we
3515  * don't worry because our checkpoint.redo points before log record for
3516  * upcoming changes and so we are not required to write such dirty buffer.
3517  */
3518  buf_state = LockBufHdr(bufHdr);
3519 
3520  if (BUF_STATE_GET_REFCOUNT(buf_state) == 0 &&
3521  BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
3522  {
3523  result |= BUF_REUSABLE;
3524  }
3525  else if (skip_recently_used)
3526  {
3527  /* Caller told us not to write recently-used buffers */
3528  UnlockBufHdr(bufHdr, buf_state);
3529  return result;
3530  }
3531 
3532  if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
3533  {
3534  /* It's clean, so nothing to do */
3535  UnlockBufHdr(bufHdr, buf_state);
3536  return result;
3537  }
3538 
3539  /*
3540  * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
3541  * buffer is clean by the time we've locked it.)
3542  */
3543  PinBuffer_Locked(bufHdr);
3545 
3547 
3549 
3550  tag = bufHdr->tag;
3551 
3552  UnpinBuffer(bufHdr);
3553 
3554  /*
3555  * SyncOneBuffer() is only called by checkpointer and bgwriter, so
3556  * IOContext will always be IOCONTEXT_NORMAL.
3557  */
3559 
3560  return result | BUF_WRITTEN;
3561 }

References BM_DIRTY, BM_VALID, BUF_REUSABLE, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BUF_WRITTEN, BufferDescriptorGetContentLock(), CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by BgBufferSync(), and BufferSync().

◆ TerminateBufferIO()

static void TerminateBufferIO ( BufferDesc buf,
bool  clear_dirty,
uint32  set_flag_bits,
bool  forget_owner 
)
static

Definition at line 5628 of file bufmgr.c.

5630 {
5631  uint32 buf_state;
5632 
5633  buf_state = LockBufHdr(buf);
5634 
5635  Assert(buf_state & BM_IO_IN_PROGRESS);
5636 
5637  buf_state &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
5638  if (clear_dirty && !(buf_state & BM_JUST_DIRTIED))
5639  buf_state &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
5640 
5641  buf_state |= set_flag_bits;
5642  UnlockBufHdr(buf, buf_state);
5643 
5644  if (forget_owner)
5647 
5649 }
static void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
void ConditionVariableBroadcast(ConditionVariable *cv)

References Assert, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_JUST_DIRTIED, buf, BufferDescriptorGetBuffer(), BufferDescriptorGetIOCV(), ConditionVariableBroadcast(), CurrentResourceOwner, LockBufHdr(), ResourceOwnerForgetBufferIO(), and UnlockBufHdr().

Referenced by AbortBufferIO(), ExtendBufferedRelShared(), FlushBuffer(), WaitReadBuffers(), and ZeroAndLockBuffer().

◆ ts_ckpt_progress_comparator()

static int ts_ckpt_progress_comparator ( Datum  a,
Datum  b,
void *  arg 
)
static

Definition at line 5893 of file bufmgr.c.

5894 {
5895  CkptTsStatus *sa = (CkptTsStatus *) a;
5896  CkptTsStatus *sb = (CkptTsStatus *) b;
5897 
5898  /* we want a min-heap, so return 1 for the a < b */
5899  if (sa->progress < sb->progress)
5900  return 1;
5901  else if (sa->progress == sb->progress)
5902  return 0;
5903  else
5904  return -1;
5905 }

References a, b, and CkptTsStatus::progress.

Referenced by BufferSync().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 5143 of file bufmgr.c.

5144 {
5146 
5147  if (buf)
5148  {
5149  uint32 buf_state;
5150 
5151  buf_state = LockBufHdr(buf);
5152 
5153  /*
5154  * Don't complain if flag bit not set; it could have been reset but we
5155  * got a cancel/die interrupt before getting the signal.
5156  */
5157  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5158  buf->wait_backend_pgprocno == MyProcNumber)
5159  buf_state &= ~BM_PIN_COUNT_WAITER;
5160 
5161  UnlockBufHdr(buf, buf_state);
5162 
5163  PinCountWaitBuf = NULL;
5164  }
5165 }

References BM_PIN_COUNT_WAITER, buf, LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdr().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 4953 of file bufmgr.c.

4954 {
4955  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4956  ReleaseBuffer(buffer);
4957 }

References BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), do_setval(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ UnpinBuffer()

◆ UnpinBufferNoOwner()

static void UnpinBufferNoOwner ( BufferDesc buf)
static

Definition at line 2827 of file bufmgr.c.

2828 {
2829  PrivateRefCountEntry *ref;
2831 
2832  Assert(!BufferIsLocal(b));
2833 
2834  /* not moving as we're likely deleting it soon anyway */
2835  ref = GetPrivateRefCountEntry(b, false);
2836  Assert(ref != NULL);
2837  Assert(ref->refcount > 0);
2838  ref->refcount--;
2839  if (ref->refcount == 0)
2840  {
2841  uint32 buf_state;
2842  uint32 old_buf_state;
2843 
2844  /*
2845  * Mark buffer non-accessible to Valgrind.
2846  *
2847  * Note that the buffer may have already been marked non-accessible
2848  * within access method code that enforces that buffers are only
2849  * accessed while a buffer lock is held.
2850  */
2852 
2853  /* I'd better not still hold the buffer content lock */
2855 
2856  /*
2857  * Decrement the shared reference count.
2858  *
2859  * Since buffer spinlock holder can update status using just write,
2860  * it's not safe to use atomic decrement here; thus use a CAS loop.
2861  */
2862  old_buf_state = pg_atomic_read_u32(&buf->state);
2863  for (;;)
2864  {
2865  if (old_buf_state & BM_LOCKED)
2866  old_buf_state = WaitBufHdrUnlocked(buf);
2867 
2868  buf_state = old_buf_state;
2869 
2870  buf_state -= BUF_REFCOUNT_ONE;
2871 
2872  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
2873  buf_state))
2874  break;
2875  }
2876 
2877  /* Support LockBufferForCleanup() */
2878  if (buf_state & BM_PIN_COUNT_WAITER)
2879  {
2880  /*
2881  * Acquire the buffer header lock, re-check that there's a waiter.
2882  * Another backend could have unpinned this buffer, and already
2883  * woken up the waiter. There's no danger of the buffer being
2884  * replaced after we unpinned it above, as it's pinned by the
2885  * waiter.
2886  */
2887  buf_state = LockBufHdr(buf);
2888 
2889  if ((buf_state & BM_PIN_COUNT_WAITER) &&
2890  BUF_STATE_GET_REFCOUNT(buf_state) == 1)
2891  {
2892  /* we just released the last pin other than the waiter's */
2893  int wait_backend_pgprocno = buf->wait_backend_pgprocno;
2894 
2895  buf_state &= ~BM_PIN_COUNT_WAITER;
2896  UnlockBufHdr(buf, buf_state);
2897  ProcSendSignal(wait_backend_pgprocno);
2898  }
2899  else
2900  UnlockBufHdr(buf, buf_state);
2901  }
2903  }
2904 }
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
Definition: bufmgr.c:466
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition: memdebug.h:27
void ProcSendSignal(ProcNumber procNumber)
Definition: proc.c:1878

References Assert, b, BM_LOCKED, BM_PIN_COUNT_WAITER, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferIsLocal, BufHdrGetBlock, ForgetPrivateRefCountEntry(), GetPrivateRefCountEntry(), LockBufHdr(), LWLockHeldByMe(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), ProcSendSignal(), PrivateRefCountEntry::refcount, UnlockBufHdr(), VALGRIND_MAKE_MEM_NOACCESS, and WaitBufHdrUnlocked().

Referenced by ResOwnerReleaseBufferPin(), and UnpinBuffer().

◆ WaitBufHdrUnlocked()

static uint32 WaitBufHdrUnlocked ( BufferDesc buf)
static

Definition at line 5804 of file bufmgr.c.

5805 {
5806  SpinDelayStatus delayStatus;
5807  uint32 buf_state;
5808 
5809  init_local_spin_delay(&delayStatus);
5810 
5811  buf_state = pg_atomic_read_u32(&buf->state);
5812 
5813  while (buf_state & BM_LOCKED)
5814  {
5815  perform_spin_delay(&delayStatus);
5816  buf_state = pg_atomic_read_u32(&buf->state);
5817  }
5818 
5819  finish_spin_delay(&delayStatus);
5820 
5821  return buf_state;
5822 }

References BM_LOCKED, buf, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), and pg_atomic_read_u32().

Referenced by MarkBufferDirty(), PinBuffer(), and UnpinBufferNoOwner().

◆ WaitIO()

static void WaitIO ( BufferDesc buf)
static

Definition at line 5522 of file bufmgr.c.

5523 {
5525 
5527  for (;;)
5528  {
5529  uint32 buf_state;
5530 
5531  /*
5532  * It may not be necessary to acquire the spinlock to check the flag
5533  * here, but since this test is essential for correctness, we'd better
5534  * play it safe.
5535  */
5536  buf_state = LockBufHdr(buf);
5537  UnlockBufHdr(buf, buf_state);
5538 
5539  if (!(buf_state & BM_IO_IN_PROGRESS))
5540  break;
5541  ConditionVariableSleep(cv, WAIT_EVENT_BUFFER_IO);
5542  }
5544 }
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)

References BM_IO_IN_PROGRESS, buf, BufferDescriptorGetIOCV(), ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), LockBufHdr(), and UnlockBufHdr().

Referenced by InvalidateBuffer(), and StartBufferIO().

◆ WaitReadBuffers()

void WaitReadBuffers ( ReadBuffersOperation operation)

Definition at line 1420 of file bufmgr.c.

1421 {
1422  Buffer *buffers;
1423  int nblocks;
1424  BlockNumber blocknum;
1425  ForkNumber forknum;
1426  IOContext io_context;
1427  IOObject io_object;
1428  char persistence;
1429 
1430  /*
1431  * Currently operations are only allowed to include a read of some range,
1432  * with an optional extra buffer that is already pinned at the end. So
1433  * nblocks can be at most one more than io_buffers_len.
1434  */
1435  Assert((operation->nblocks == operation->io_buffers_len) ||
1436  (operation->nblocks == operation->io_buffers_len + 1));
1437 
1438  /* Find the range of the physical read we need to perform. */
1439  nblocks = operation->io_buffers_len;
1440  if (nblocks == 0)
1441  return; /* nothing to do */
1442 
1443  buffers = &operation->buffers[0];
1444  blocknum = operation->blocknum;
1445  forknum = operation->forknum;
1446  persistence = operation->persistence;
1447 
1448  if (persistence == RELPERSISTENCE_TEMP)
1449  {
1450  io_context = IOCONTEXT_NORMAL;
1451  io_object = IOOBJECT_TEMP_RELATION;
1452  }
1453  else
1454  {
1455  io_context = IOContextForStrategy(operation->strategy);
1456  io_object = IOOBJECT_RELATION;
1457  }
1458 
1459  /*
1460  * We count all these blocks as read by this backend. This is traditional
1461  * behavior, but might turn out to be not true if we find that someone
1462  * else has beaten us and completed the read of some of these blocks. In
1463  * that case the system globally double-counts, but we traditionally don't
1464  * count this as a "hit", and we don't have a separate counter for "miss,
1465  * but another backend completed the read".
1466  */
1467  if (persistence == RELPERSISTENCE_TEMP)
1468  pgBufferUsage.local_blks_read += nblocks;
1469  else
1470  pgBufferUsage.shared_blks_read += nblocks;
1471 
1472  for (int i = 0; i < nblocks; ++i)
1473  {
1474  int io_buffers_len;
1475  Buffer io_buffers[MAX_IO_COMBINE_LIMIT];
1476  void *io_pages[MAX_IO_COMBINE_LIMIT];
1477  instr_time io_start;
1478  BlockNumber io_first_block;
1479 
1480  /*
1481  * Skip this block if someone else has already completed it. If an
1482  * I/O is already in progress in another backend, this will wait for
1483  * the outcome: either done, or something went wrong and we will
1484  * retry.
1485  */
1486  if (!WaitReadBuffersCanStartIO(buffers[i], false))
1487  {
1488  /*
1489  * Report this as a 'hit' for this backend, even though it must
1490  * have started out as a miss in PinBufferForBlock().
1491  */
1492  TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + i,
1493  operation->smgr->smgr_rlocator.locator.spcOid,
1494  operation->smgr->smgr_rlocator.locator.dbOid,
1495  operation->smgr->smgr_rlocator.locator.relNumber,
1496  operation->smgr->smgr_rlocator.backend,
1497  true);
1498  continue;
1499  }
1500 
1501  /* We found a buffer that we need to read in. */
1502  io_buffers[0] = buffers[i];
1503  io_pages[0] = BufferGetBlock(buffers[i]);
1504  io_first_block = blocknum + i;
1505  io_buffers_len = 1;
1506 
1507  /*
1508  * How many neighboring-on-disk blocks can we can scatter-read into
1509  * other buffers at the same time? In this case we don't wait if we
1510  * see an I/O already in progress. We already hold BM_IO_IN_PROGRESS
1511  * for the head block, so we should get on with that I/O as soon as
1512  * possible. We'll come back to this block again, above.
1513  */
1514  while ((i + 1) < nblocks &&
1515  WaitReadBuffersCanStartIO(buffers[i + 1], true))
1516  {
1517  /* Must be consecutive block numbers. */
1518  Assert(BufferGetBlockNumber(buffers[i + 1]) ==
1519  BufferGetBlockNumber(buffers[i]) + 1);
1520 
1521  io_buffers[io_buffers_len] = buffers[++i];
1522  io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
1523  }
1524 
1526  smgrreadv(operation->smgr, forknum, io_first_block, io_pages, io_buffers_len);
1527  pgstat_count_io_op_time(io_object, io_context, IOOP_READ, io_start,
1528  io_buffers_len);
1529 
1530  /* Verify each block we read, and terminate the I/O. */
1531  for (int j = 0; j < io_buffers_len; ++j)
1532  {
1533  BufferDesc *bufHdr;
1534  Block bufBlock;
1535 
1536  if (persistence == RELPERSISTENCE_TEMP)
1537  {
1538  bufHdr = GetLocalBufferDescriptor(-io_buffers[j] - 1);
1539  bufBlock = LocalBufHdrGetBlock(bufHdr);
1540  }
1541  else
1542  {
1543  bufHdr = GetBufferDescriptor(io_buffers[j] - 1);
1544  bufBlock = BufHdrGetBlock(bufHdr);
1545  }
1546 
1547  /* check for garbage data */
1548  if (!PageIsVerifiedExtended((Page) bufBlock, io_first_block + j,
1550  {
1551  if ((operation->flags & READ_BUFFERS_ZERO_ON_ERROR) || zero_damaged_pages)
1552  {
1553  ereport(WARNING,
1555  errmsg("invalid page in block %u of relation %s; zeroing out page",
1556  io_first_block + j,
1557  relpath(operation->smgr->smgr_rlocator, forknum))));
1558  memset(bufBlock, 0, BLCKSZ);
1559  }
1560  else
1561  ereport(ERROR,
1563  errmsg("invalid page in block %u of relation %s",
1564  io_first_block + j,
1565  relpath(operation->smgr->smgr_rlocator, forknum))));
1566  }
1567 
1568  /* Terminate I/O and set BM_VALID. */
1569  if (persistence == RELPERSISTENCE_TEMP)
1570  {
1571  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
1572 
1573  buf_state |= BM_VALID;
1574  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
1575  }
1576  else
1577  {
1578  /* Set BM_VALID, terminate IO, and wake up any waiters */
1579  TerminateBufferIO(bufHdr, false, BM_VALID, true);
1580  }
1581 
1582  /* Report I/Os as completing individually. */
1583  TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, io_first_block + j,
1584  operation->smgr->smgr_rlocator.locator.spcOid,
1585  operation->smgr->smgr_rlocator.locator.dbOid,
1586  operation->smgr->smgr_rlocator.locator.relNumber,
1587  operation->smgr->smgr_rlocator.backend,
1588  false);
1589  }
1590 
1591  VacuumPageMiss += io_buffers_len;
1592  if (VacuumCostActive)
1593  VacuumCostBalance += VacuumCostPageMiss * io_buffers_len;
1594  }
1595 }
static bool WaitReadBuffersCanStartIO(Buffer buffer, bool nowait)
Definition: bufmgr.c:1407
bool zero_damaged_pages
Definition: bufmgr.c:167
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:367
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
#define PIV_LOG_WARNING
Definition: bufpage.h:468
#define PIV_REPORT_STAT
Definition: bufpage.h:469
int VacuumCostPageMiss
Definition: globals.c:150
int64 VacuumPageMiss
Definition: globals.c:156
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
@ IOOP_READ
Definition: pgstat.h:302
void smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: smgr.c:600
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31

References Assert, RelFileLocatorBackend::backend, ReadBuffersOperation::blocknum, BM_VALID, BufferGetBlock(), BufferGetBlockNumber(), ReadBuffersOperation::buffers, BufHdrGetBlock, RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg(), ERROR, ReadBuffersOperation::flags, ReadBuffersOperation::forknum, GetBufferDescriptor(), GetLocalBufferDescriptor(), i, ReadBuffersOperation::io_buffers_len, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_READ, j, BufferUsage::local_blks_read, LocalBufHdrGetBlock, RelFileLocatorBackend::locator, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, PageIsVerifiedExtended(), ReadBuffersOperation::persistence, pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PIV_LOG_WARNING, PIV_REPORT_STAT, READ_BUFFERS_ZERO_ON_ERROR, RelFileLocator::relNumber, relpath, BufferUsage::shared_blks_read, ReadBuffersOperation::smgr, SMgrRelationData::smgr_rlocator, smgrreadv(), RelFileLocator::spcOid, BufferDesc::state, ReadBuffersOperation::strategy, TerminateBufferIO(), track_io_timing, VacuumCostActive, VacuumCostBalance, VacuumCostPageMiss, VacuumPageMiss, WaitReadBuffersCanStartIO(), WARNING, and zero_damaged_pages.

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ WaitReadBuffersCanStartIO()

static bool WaitReadBuffersCanStartIO ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1407 of file bufmgr.c.

1408 {
1409  if (BufferIsLocal(buffer))
1410  {
1411  BufferDesc *bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1412 
1413  return (pg_atomic_read_u32(&bufHdr->state) & BM_VALID) == 0;
1414  }
1415  else
1416  return StartBufferIO(GetBufferDescriptor(buffer - 1), true, nowait);
1417 }

References BM_VALID, PrivateRefCountEntry::buffer, BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), pg_atomic_read_u32(), StartBufferIO(), and BufferDesc::state.

Referenced by WaitReadBuffers().

◆ WritebackContextInit()

void WritebackContextInit ( WritebackContext context,
int *  max_pending 
)

Definition at line 5916 of file bufmgr.c.

5917 {
5918  Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES);
5919 
5920  context->max_pending = max_pending;
5921  context->nr_pending = 0;
5922 }
tree context
Definition: radixtree.h:1835

References Assert, context, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by BackgroundWriterMain(), BufferSync(), and InitBufferPool().

◆ ZeroAndLockBuffer()

static void ZeroAndLockBuffer ( Buffer  buffer,
ReadBufferMode  mode,
bool  already_valid 
)
static

Definition at line 1046 of file bufmgr.c.

1047 {
1048  BufferDesc *bufHdr;
1049  bool need_to_zero;
1050  bool isLocalBuf = BufferIsLocal(buffer);
1051 
1053 
1054  if (already_valid)
1055  {
1056  /*
1057  * If the caller already knew the buffer was valid, we can skip some
1058  * header interaction. The caller just wants to lock the buffer.
1059  */
1060  need_to_zero = false;
1061  }
1062  else if (isLocalBuf)
1063  {
1064  /* Simple case for non-shared buffers. */
1065  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1066  need_to_zero = (pg_atomic_read_u32(&bufHdr->state) & BM_VALID) == 0;
1067  }
1068  else
1069  {
1070  /*
1071  * Take BM_IO_IN_PROGRESS, or discover that BM_VALID has been set
1072  * concurrently. Even though we aren't doing I/O, that ensures that
1073  * we don't zero a page that someone else has pinned. An exclusive
1074  * content lock wouldn't be enough, because readers are allowed to
1075  * drop the content lock after determining that a tuple is visible
1076  * (see buffer access rules in README).
1077  */
1078  bufHdr = GetBufferDescriptor(buffer - 1);
1079  need_to_zero = StartBufferIO(bufHdr, true, false);
1080  }
1081 
1082  if (need_to_zero)
1083  {
1084  memset(BufferGetPage(buffer), 0, BLCKSZ);
1085 
1086  /*
1087  * Grab the buffer content lock before marking the page as valid, to
1088  * make sure that no other backend sees the zeroed page before the
1089  * caller has had a chance to initialize it.
1090  *
1091  * Since no-one else can be looking at the page contents yet, there is
1092  * no difference between an exclusive lock and a cleanup-strength
1093  * lock. (Note that we cannot use LockBuffer() or
1094  * LockBufferForCleanup() here, because they assert that the buffer is
1095  * already valid.)
1096  */
1097  if (!isLocalBuf)
1099 
1100  if (isLocalBuf)
1101  {
1102  /* Only need to adjust flags */
1103  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
1104 
1105  buf_state |= BM_VALID;
1106  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
1107  }
1108  else
1109  {
1110  /* Set BM_VALID, terminate IO, and wake up any waiters */
1111  TerminateBufferIO(bufHdr, false, BM_VALID, true);
1112  }
1113  }
1114  else if (!isLocalBuf)
1115  {
1116  /*
1117  * The buffer is valid, so we can't zero it. The caller still expects
1118  * the page to be locked on return.
1119  */
1120  if (mode == RBM_ZERO_AND_LOCK)
1122  else
1123  LockBufferForCleanup(buffer);
1124  }
1125 }
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5251

References Assert, BM_VALID, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferDescriptorGetContentLock(), BufferGetPage(), BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), LockBuffer(), LockBufferForCleanup(), LW_EXCLUSIVE, LWLockAcquire(), mode, pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, StartBufferIO(), BufferDesc::state, and TerminateBufferIO().

Referenced by ReadBuffer_common().

Variable Documentation

◆ backend_flush_after

int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER

Definition at line 200 of file bufmgr.c.

Referenced by InitBufferPool().

◆ bgwriter_flush_after

int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER

Definition at line 199 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

int bgwriter_lru_maxpages = 100

Definition at line 168 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

double bgwriter_lru_multiplier = 2.0

Definition at line 169 of file bufmgr.c.

Referenced by BgBufferSync().

◆ buffer_io_resowner_desc

const ResourceOwnerDesc buffer_io_resowner_desc
Initial value:
=
{
.name = "buffer io",
.release_priority = RELEASE_PRIO_BUFFER_IOS,
.ReleaseResource = ResOwnerReleaseBufferIO,
.DebugPrint = ResOwnerPrintBufferIO
}
static void ResOwnerReleaseBufferIO(Datum res)
Definition: bufmgr.c:6056
static char * ResOwnerPrintBufferIO(Datum res)
Definition: bufmgr.c:6064
#define RELEASE_PRIO_BUFFER_IOS
Definition: resowner.h:62
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition: resowner.h:54

Definition at line 253 of file bufmgr.c.

Referenced by ResourceOwnerForgetBufferIO(), and ResourceOwnerRememberBufferIO().

◆ buffer_pin_resowner_desc

const ResourceOwnerDesc buffer_pin_resowner_desc
Initial value:
=
{
.name = "buffer pin",
.release_priority = RELEASE_PRIO_BUFFER_PINS,
.ReleaseResource = ResOwnerReleaseBufferPin,
.DebugPrint = ResOwnerPrintBufferPin
}
static char * ResOwnerPrintBufferPin(Datum res)
Definition: bufmgr.c:6087
static void ResOwnerReleaseBufferPin(Datum res)
Definition: bufmgr.c:6072
#define RELEASE_PRIO_BUFFER_PINS
Definition: resowner.h:63

Definition at line 262 of file bufmgr.c.

Referenced by ResourceOwnerForgetBuffer(), and ResourceOwnerRememberBuffer().

◆ checkpoint_flush_after

int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER

Definition at line 198 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY

◆ io_combine_limit

◆ maintenance_io_concurrency

◆ PinCountWaitBuf

BufferDesc* PinCountWaitBuf = NULL
static

Definition at line 203 of file bufmgr.c.

Referenced by LockBufferForCleanup(), and UnlockBuffers().

◆ PrivateRefCountArray

◆ PrivateRefCountClock

uint32 PrivateRefCountClock = 0
static

Definition at line 238 of file bufmgr.c.

Referenced by ReservePrivateRefCountEntry().

◆ PrivateRefCountHash

HTAB* PrivateRefCountHash = NULL
static

◆ PrivateRefCountOverflowed

◆ ReservedRefCountEntry

◆ track_io_timing

◆ zero_damaged_pages

bool zero_damaged_pages = false

Definition at line 167 of file bufmgr.c.

Referenced by mdreadv(), and WaitReadBuffers().