PostgreSQL Source Code  git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include <lib/sort_template.h>
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static void ResOwnerReleaseBufferIO (Datum res)
 
static char * ResOwnerPrintBufferIO (Datum res)
 
static void ResOwnerReleaseBufferPin (Datum res)
 
static char * ResOwnerPrintBufferPin (Datum res)
 
static Buffer ReadBuffer_common (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
 
static BlockNumber ExtendBufferedRelCommon (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static BlockNumber ExtendBufferedRelShared (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void UnpinBufferNoOwner (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner)
 
static void AbortBufferIO (Buffer buffer)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
 
static Buffer GetVictimBuffer (BufferAccessStrategy strategy, IOContext io_context)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
static void InvalidateBuffer (BufferDesc *buf)
 
static bool InvalidateVictimBuffer (BufferDesc *buf_hdr)
 
static void LimitAdditionalPins (uint32 *additional_pins)
 
bool BufferIsExclusiveLocked (Buffer buffer)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
char * DebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *wb_context, IOContext io_context)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 
const ResourceOwnerDesc buffer_io_resowner_desc
 
const ResourceOwnerDesc buffer_pin_resowner_desc
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 82 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 72 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 71 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 64 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:404
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
int32 * LocalRefCount
Definition: localbuf.c:46

Definition at line 462 of file bufmgr.c.

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 63 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 67 of file bufmgr.c.

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 91 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 74 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 5576 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 5576 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 5578 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 5578 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 5575 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 5575 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 5577 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 5577 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 5574 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 5574 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

static void AbortBufferIO ( Buffer  buffer)
static

Definition at line 5281 of file bufmgr.c.

5282 {
5283  BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1);
5284  uint32 buf_state;
5285 
5286  buf_state = LockBufHdr(buf_hdr);
5287  Assert(buf_state & (BM_IO_IN_PROGRESS | BM_TAG_VALID));
5288 
5289  if (!(buf_state & BM_VALID))
5290  {
5291  Assert(!(buf_state & BM_DIRTY));
5292  UnlockBufHdr(buf_hdr, buf_state);
5293  }
5294  else
5295  {
5296  Assert(buf_state & BM_DIRTY);
5297  UnlockBufHdr(buf_hdr, buf_state);
5298 
5299  /* Issue notice if this is not the first failure... */
5300  if (buf_state & BM_IO_ERROR)
5301  {
5302  /* Buffer is pinned, so we can read tag without spinlock */
5303  char *path;
5304 
5305  path = relpathperm(BufTagGetRelFileLocator(&buf_hdr->tag),
5306  BufTagGetForkNum(&buf_hdr->tag));
5307  ereport(WARNING,
5308  (errcode(ERRCODE_IO_ERROR),
5309  errmsg("could not write block %u of %s",
5310  buf_hdr->tag.blockNum, path),
5311  errdetail("Multiple failures --- write error might be permanent.")));
5312  pfree(path);
5313  }
5314  }
5315 
5316  TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false);
5317 }
#define BM_TAG_VALID
Definition: buf_internals.h:63
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static BufferDesc * GetBufferDescriptor(uint32 id)
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
#define BM_DIRTY
Definition: buf_internals.h:61
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:64
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
Definition: buf_internals.h:62
#define BM_IO_ERROR
Definition: buf_internals.h:65
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner)
Definition: bufmgr.c:5244
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:5390
unsigned int uint32
Definition: c.h:493
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define WARNING
Definition: elog.h:36
#define ereport(elevel,...)
Definition: elog.h:149
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1508
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90
BufferTag tag
BlockNumber blockNum
Definition: buf_internals.h:98

References Assert(), buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), GetBufferDescriptor(), LockBufHdr(), pfree(), relpathperm, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by ResOwnerReleaseBufferIO().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 3212 of file bufmgr.c.

3213 {
3215 
3216  AtEOXact_LocalBuffers(isCommit);
3217 
3219 }
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:3272
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:198
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:819

References Assert(), AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 3254 of file bufmgr.c.

3255 {
3256  UnlockBuffers();
3257 
3259 
3260  /* localbuf.c needs a chance too */
3262 }
void UnlockBuffers(void)
Definition: bufmgr.c:4767
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:830

References AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 2841 of file bufmgr.c.

2842 {
2843  /* info obtained from freelist.c */
2844  int strategy_buf_id;
2845  uint32 strategy_passes;
2846  uint32 recent_alloc;
2847 
2848  /*
2849  * Information saved between calls so we can determine the strategy
2850  * point's advance rate and avoid scanning already-cleaned buffers.
2851  */
2852  static bool saved_info_valid = false;
2853  static int prev_strategy_buf_id;
2854  static uint32 prev_strategy_passes;
2855  static int next_to_clean;
2856  static uint32 next_passes;
2857 
2858  /* Moving averages of allocation rate and clean-buffer density */
2859  static float smoothed_alloc = 0;
2860  static float smoothed_density = 10.0;
2861 
2862  /* Potentially these could be tunables, but for now, not */
2863  float smoothing_samples = 16;
2864  float scan_whole_pool_milliseconds = 120000.0;
2865 
2866  /* Used to compute how far we scan ahead */
2867  long strategy_delta;
2868  int bufs_to_lap;
2869  int bufs_ahead;
2870  float scans_per_alloc;
2871  int reusable_buffers_est;
2872  int upcoming_alloc_est;
2873  int min_scan_buffers;
2874 
2875  /* Variables for the scanning loop proper */
2876  int num_to_scan;
2877  int num_written;
2878  int reusable_buffers;
2879 
2880  /* Variables for final smoothed_density update */
2881  long new_strategy_delta;
2882  uint32 new_recent_alloc;
2883 
2884  /*
2885  * Find out where the freelist clock sweep currently is, and how many
2886  * buffer allocations have happened since our last call.
2887  */
2888  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
2889 
2890  /* Report buffer alloc counts to pgstat */
2891  PendingBgWriterStats.buf_alloc += recent_alloc;
2892 
2893  /*
2894  * If we're not running the LRU scan, just stop after doing the stats
2895  * stuff. We mark the saved state invalid so that we can recover sanely
2896  * if LRU scan is turned back on later.
2897  */
2898  if (bgwriter_lru_maxpages <= 0)
2899  {
2900  saved_info_valid = false;
2901  return true;
2902  }
2903 
2904  /*
2905  * Compute strategy_delta = how many buffers have been scanned by the
2906  * clock sweep since last time. If first time through, assume none. Then
2907  * see if we are still ahead of the clock sweep, and if so, how many
2908  * buffers we could scan before we'd catch up with it and "lap" it. Note:
2909  * weird-looking coding of xxx_passes comparisons are to avoid bogus
2910  * behavior when the passes counts wrap around.
2911  */
2912  if (saved_info_valid)
2913  {
2914  int32 passes_delta = strategy_passes - prev_strategy_passes;
2915 
2916  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
2917  strategy_delta += (long) passes_delta * NBuffers;
2918 
2919  Assert(strategy_delta >= 0);
2920 
2921  if ((int32) (next_passes - strategy_passes) > 0)
2922  {
2923  /* we're one pass ahead of the strategy point */
2924  bufs_to_lap = strategy_buf_id - next_to_clean;
2925 #ifdef BGW_DEBUG
2926  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2927  next_passes, next_to_clean,
2928  strategy_passes, strategy_buf_id,
2929  strategy_delta, bufs_to_lap);
2930 #endif
2931  }
2932  else if (next_passes == strategy_passes &&
2933  next_to_clean >= strategy_buf_id)
2934  {
2935  /* on same pass, but ahead or at least not behind */
2936  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
2937 #ifdef BGW_DEBUG
2938  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2939  next_passes, next_to_clean,
2940  strategy_passes, strategy_buf_id,
2941  strategy_delta, bufs_to_lap);
2942 #endif
2943  }
2944  else
2945  {
2946  /*
2947  * We're behind, so skip forward to the strategy point and start
2948  * cleaning from there.
2949  */
2950 #ifdef BGW_DEBUG
2951  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
2952  next_passes, next_to_clean,
2953  strategy_passes, strategy_buf_id,
2954  strategy_delta);
2955 #endif
2956  next_to_clean = strategy_buf_id;
2957  next_passes = strategy_passes;
2958  bufs_to_lap = NBuffers;
2959  }
2960  }
2961  else
2962  {
2963  /*
2964  * Initializing at startup or after LRU scanning had been off. Always
2965  * start at the strategy point.
2966  */
2967 #ifdef BGW_DEBUG
2968  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
2969  strategy_passes, strategy_buf_id);
2970 #endif
2971  strategy_delta = 0;
2972  next_to_clean = strategy_buf_id;
2973  next_passes = strategy_passes;
2974  bufs_to_lap = NBuffers;
2975  }
2976 
2977  /* Update saved info for next time */
2978  prev_strategy_buf_id = strategy_buf_id;
2979  prev_strategy_passes = strategy_passes;
2980  saved_info_valid = true;
2981 
2982  /*
2983  * Compute how many buffers had to be scanned for each new allocation, ie,
2984  * 1/density of reusable buffers, and track a moving average of that.
2985  *
2986  * If the strategy point didn't move, we don't update the density estimate
2987  */
2988  if (strategy_delta > 0 && recent_alloc > 0)
2989  {
2990  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
2991  smoothed_density += (scans_per_alloc - smoothed_density) /
2992  smoothing_samples;
2993  }
2994 
2995  /*
2996  * Estimate how many reusable buffers there are between the current
2997  * strategy point and where we've scanned ahead to, based on the smoothed
2998  * density estimate.
2999  */
3000  bufs_ahead = NBuffers - bufs_to_lap;
3001  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3002 
3003  /*
3004  * Track a moving average of recent buffer allocations. Here, rather than
3005  * a true average we want a fast-attack, slow-decline behavior: we
3006  * immediately follow any increase.
3007  */
3008  if (smoothed_alloc <= (float) recent_alloc)
3009  smoothed_alloc = recent_alloc;
3010  else
3011  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3012  smoothing_samples;
3013 
3014  /* Scale the estimate by a GUC to allow more aggressive tuning. */
3015  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
3016 
3017  /*
3018  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3019  * eventually underflow to zero, and the underflows produce annoying
3020  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3021  * zero, there's no point in tracking smaller and smaller values of
3022  * smoothed_alloc, so just reset it to exactly zero to avoid this
3023  * syndrome. It will pop back up as soon as recent_alloc increases.
3024  */
3025  if (upcoming_alloc_est == 0)
3026  smoothed_alloc = 0;
3027 
3028  /*
3029  * Even in cases where there's been little or no buffer allocation
3030  * activity, we want to make a small amount of progress through the buffer
3031  * cache so that as many reusable buffers as possible are clean after an
3032  * idle period.
3033  *
3034  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3035  * the BGW will be called during the scan_whole_pool time; slice the
3036  * buffer pool into that many sections.
3037  */
3038  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3039 
3040  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3041  {
3042 #ifdef BGW_DEBUG
3043  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3044  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3045 #endif
3046  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3047  }
3048 
3049  /*
3050  * Now write out dirty reusable buffers, working forward from the
3051  * next_to_clean point, until we have lapped the strategy scan, or cleaned
3052  * enough buffers to match our estimate of the next cycle's allocation
3053  * requirements, or hit the bgwriter_lru_maxpages limit.
3054  */
3055 
3056  num_to_scan = bufs_to_lap;
3057  num_written = 0;
3058  reusable_buffers = reusable_buffers_est;
3059 
3060  /* Execute the LRU scan */
3061  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3062  {
3063  int sync_state = SyncOneBuffer(next_to_clean, true,
3064  wb_context);
3065 
3066  if (++next_to_clean >= NBuffers)
3067  {
3068  next_to_clean = 0;
3069  next_passes++;
3070  }
3071  num_to_scan--;
3072 
3073  if (sync_state & BUF_WRITTEN)
3074  {
3075  reusable_buffers++;
3076  if (++num_written >= bgwriter_lru_maxpages)
3077  {
3079  break;
3080  }
3081  }
3082  else if (sync_state & BUF_REUSABLE)
3083  reusable_buffers++;
3084  }
3085 
3086  PendingBgWriterStats.buf_written_clean += num_written;
3087 
3088 #ifdef BGW_DEBUG
3089  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3090  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3091  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3092  bufs_to_lap - num_to_scan,
3093  num_written,
3094  reusable_buffers - reusable_buffers_est);
3095 #endif
3096 
3097  /*
3098  * Consider the above scan as being like a new allocation scan.
3099  * Characterize its density and update the smoothed one based on it. This
3100  * effectively halves the moving average period in cases where both the
3101  * strategy and the background writer are doing some useful scanning,
3102  * which is helpful because a long memory isn't as desirable on the
3103  * density estimates.
3104  */
3105  new_strategy_delta = bufs_to_lap - num_to_scan;
3106  new_recent_alloc = reusable_buffers - reusable_buffers_est;
3107  if (new_strategy_delta > 0 && new_recent_alloc > 0)
3108  {
3109  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3110  smoothed_density += (scans_per_alloc - smoothed_density) /
3111  smoothing_samples;
3112 
3113 #ifdef BGW_DEBUG
3114  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3115  new_recent_alloc, new_strategy_delta,
3116  scans_per_alloc, smoothed_density);
3117 #endif
3118  }
3119 
3120  /* Return true if OK to hibernate */
3121  return (bufs_to_lap == 0 && recent_alloc == 0);
3122 }
int BgWriterDelay
Definition: bgwriter.c:57
#define BUF_REUSABLE
Definition: bufmgr.c:72
double bgwriter_lru_multiplier
Definition: bufmgr.c:137
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:3139
int bgwriter_lru_maxpages
Definition: bufmgr.c:136
#define BUF_WRITTEN
Definition: bufmgr.c:71
signed int int32
Definition: c.h:481
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:224
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:394
int NBuffers
Definition: globals.c:139
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition: pgstat.h:255
PgStat_Counter maxwritten_clean
Definition: pgstat.h:256
PgStat_Counter buf_alloc
Definition: pgstat.h:257

References Assert(), bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ BufferAlloc()

static BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr,
IOContext  io_context 
)
static

Definition at line 1239 of file bufmgr.c.

1243 {
1244  BufferTag newTag; /* identity of requested block */
1245  uint32 newHash; /* hash value for newTag */
1246  LWLock *newPartitionLock; /* buffer partition lock for it */
1247  int existing_buf_id;
1248  Buffer victim_buffer;
1249  BufferDesc *victim_buf_hdr;
1250  uint32 victim_buf_state;
1251 
1252  /* Make sure we will have room to remember the buffer pin */
1255 
1256  /* create a tag so we can lookup the buffer */
1257  InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
1258 
1259  /* determine its hash code and partition lock ID */
1260  newHash = BufTableHashCode(&newTag);
1261  newPartitionLock = BufMappingPartitionLock(newHash);
1262 
1263  /* see if the block is in the buffer pool already */
1264  LWLockAcquire(newPartitionLock, LW_SHARED);
1265  existing_buf_id = BufTableLookup(&newTag, newHash);
1266  if (existing_buf_id >= 0)
1267  {
1268  BufferDesc *buf;
1269  bool valid;
1270 
1271  /*
1272  * Found it. Now, pin the buffer so no one can steal it from the
1273  * buffer pool, and check to see if the correct data has been loaded
1274  * into the buffer.
1275  */
1276  buf = GetBufferDescriptor(existing_buf_id);
1277 
1278  valid = PinBuffer(buf, strategy);
1279 
1280  /* Can release the mapping lock as soon as we've pinned it */
1281  LWLockRelease(newPartitionLock);
1282 
1283  *foundPtr = true;
1284 
1285  if (!valid)
1286  {
1287  /*
1288  * We can only get here if (a) someone else is still reading in
1289  * the page, or (b) a previous read attempt failed. We have to
1290  * wait for any active read attempt to finish, and then set up our
1291  * own read attempt if the page is still not BM_VALID.
1292  * StartBufferIO does it all.
1293  */
1294  if (StartBufferIO(buf, true))
1295  {
1296  /*
1297  * If we get here, previous attempts to read the buffer must
1298  * have failed ... but we shall bravely try again.
1299  */
1300  *foundPtr = false;
1301  }
1302  }
1303 
1304  return buf;
1305  }
1306 
1307  /*
1308  * Didn't find it in the buffer pool. We'll have to initialize a new
1309  * buffer. Remember to unlock the mapping lock while doing the work.
1310  */
1311  LWLockRelease(newPartitionLock);
1312 
1313  /*
1314  * Acquire a victim buffer. Somebody else might try to do the same, we
1315  * don't hold any conflicting locks. If so we'll have to undo our work
1316  * later.
1317  */
1318  victim_buffer = GetVictimBuffer(strategy, io_context);
1319  victim_buf_hdr = GetBufferDescriptor(victim_buffer - 1);
1320 
1321  /*
1322  * Try to make a hashtable entry for the buffer under its new tag. If
1323  * somebody else inserted another buffer for the tag, we'll release the
1324  * victim buffer we acquired and use the already inserted one.
1325  */
1326  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1327  existing_buf_id = BufTableInsert(&newTag, newHash, victim_buf_hdr->buf_id);
1328  if (existing_buf_id >= 0)
1329  {
1330  BufferDesc *existing_buf_hdr;
1331  bool valid;
1332 
1333  /*
1334  * Got a collision. Someone has already done what we were about to do.
1335  * We'll just handle this as if it were found in the buffer pool in
1336  * the first place. First, give up the buffer we were planning to
1337  * use.
1338  *
1339  * We could do this after releasing the partition lock, but then we'd
1340  * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry()
1341  * before acquiring the lock, for the rare case of such a collision.
1342  */
1343  UnpinBuffer(victim_buf_hdr);
1344 
1345  /*
1346  * The victim buffer we acquired previously is clean and unused, let
1347  * it be found again quickly
1348  */
1349  StrategyFreeBuffer(victim_buf_hdr);
1350 
1351  /* remaining code should match code at top of routine */
1352 
1353  existing_buf_hdr = GetBufferDescriptor(existing_buf_id);
1354 
1355  valid = PinBuffer(existing_buf_hdr, strategy);
1356 
1357  /* Can release the mapping lock as soon as we've pinned it */
1358  LWLockRelease(newPartitionLock);
1359 
1360  *foundPtr = true;
1361 
1362  if (!valid)
1363  {
1364  /*
1365  * We can only get here if (a) someone else is still reading in
1366  * the page, or (b) a previous read attempt failed. We have to
1367  * wait for any active read attempt to finish, and then set up our
1368  * own read attempt if the page is still not BM_VALID.
1369  * StartBufferIO does it all.
1370  */
1371  if (StartBufferIO(existing_buf_hdr, true))
1372  {
1373  /*
1374  * If we get here, previous attempts to read the buffer must
1375  * have failed ... but we shall bravely try again.
1376  */
1377  *foundPtr = false;
1378  }
1379  }
1380 
1381  return existing_buf_hdr;
1382  }
1383 
1384  /*
1385  * Need to lock the buffer header too in order to change its tag.
1386  */
1387  victim_buf_state = LockBufHdr(victim_buf_hdr);
1388 
1389  /* some sanity checks while we hold the buffer header lock */
1390  Assert(BUF_STATE_GET_REFCOUNT(victim_buf_state) == 1);
1391  Assert(!(victim_buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY | BM_IO_IN_PROGRESS)));
1392 
1393  victim_buf_hdr->tag = newTag;
1394 
1395  /*
1396  * Make sure BM_PERMANENT is set for buffers that must be written at every
1397  * checkpoint. Unlogged buffers only need to be written at shutdown
1398  * checkpoints, except for their "init" forks, which need to be treated
1399  * just like permanent relations.
1400  */
1401  victim_buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1402  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1403  victim_buf_state |= BM_PERMANENT;
1404 
1405  UnlockBufHdr(victim_buf_hdr, victim_buf_state);
1406 
1407  LWLockRelease(newPartitionLock);
1408 
1409  /*
1410  * Buffer contents are currently invalid. Try to obtain the right to
1411  * start I/O. If StartBufferIO returns false, then someone else managed
1412  * to read it before we did, so there's nothing left for BufferAlloc() to
1413  * do.
1414  */
1415  if (StartBufferIO(victim_buf_hdr, true))
1416  *foundPtr = false;
1417  else
1418  *foundPtr = true;
1419 
1420  return victim_buf_hdr;
1421 }
int Buffer
Definition: buf.h:23
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_PERMANENT
Definition: buf_internals.h:69
static LWLock * BufMappingPartitionLock(uint32 hashcode)
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:46
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:51
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:78
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:118
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:2310
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
Definition: bufmgr.c:1607
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:5189
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:238
static void UnpinBuffer(BufferDesc *buf)
Definition: bufmgr.c:2459
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:363
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1172
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1785
@ LW_SHARED
Definition: lwlock.h:117
@ LW_EXCLUSIVE
Definition: lwlock.h:116
static char * buf
Definition: pg_test_fsync.c:73
@ INIT_FORKNUM
Definition: relpath.h:53
ResourceOwner CurrentResourceOwner
Definition: resowner.c:165
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition: resowner.c:442
Definition: lwlock.h:41
RelFileLocator locator
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:37

References Assert(), BM_DIRTY, BM_IO_IN_PROGRESS, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), CurrentResourceOwner, GetBufferDescriptor(), GetVictimBuffer(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrRelationData::smgr_rlocator, StartBufferIO(), StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by ReadBuffer_common().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 3377 of file bufmgr.c.

3378 {
3379  BufferDesc *bufHdr;
3380 
3381  Assert(BufferIsPinned(buffer));
3382 
3383  if (BufferIsLocal(buffer))
3384  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3385  else
3386  bufHdr = GetBufferDescriptor(buffer - 1);
3387 
3388  /* pinned, so OK to read tag without spinlock */
3389  return bufHdr->tag.blockNum;
3390 }
#define BufferIsLocal(buffer)
Definition: buf.h:37
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:462

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_page_prune(), heap_prune_chain(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), index_compute_xid_horizon_for_tuples(), lazy_scan_noprune(), lazy_scan_prune(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), and visibilitymap_set().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 3638 of file bufmgr.c.

3639 {
3640  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
3641  char *page = BufferGetPage(buffer);
3642  XLogRecPtr lsn;
3643  uint32 buf_state;
3644 
3645  /*
3646  * If we don't need locking for correctness, fastpath out.
3647  */
3648  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
3649  return PageGetLSN(page);
3650 
3651  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3652  Assert(BufferIsValid(buffer));
3653  Assert(BufferIsPinned(buffer));
3654 
3655  buf_state = LockBufHdr(bufHdr);
3656  lsn = PageGetLSN(page);
3657  UnlockBufHdr(bufHdr, buf_state);
3658 
3659  return lsn;
3660 }
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
#define XLogHintBitIsNeeded()
Definition: xlog.h:118
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 3398 of file bufmgr.c.

3400 {
3401  BufferDesc *bufHdr;
3402 
3403  /* Do the same checks as BufferGetBlockNumber. */
3404  Assert(BufferIsPinned(buffer));
3405 
3406  if (BufferIsLocal(buffer))
3407  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3408  else
3409  bufHdr = GetBufferDescriptor(buffer - 1);
3410 
3411  /* pinned, so OK to read tag without spinlock */
3412  *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
3413  *forknum = BufTagGetForkNum(&bufHdr->tag);
3414  *blknum = bufHdr->tag.blockNum;
3415 }

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 2157 of file bufmgr.c.

2158 {
2159  BufferDesc *bufHdr;
2160 
2161  if (BufferIsLocal(buffer))
2162  {
2163  int bufid = -buffer - 1;
2164 
2165  bufHdr = GetLocalBufferDescriptor(bufid);
2166  }
2167  else
2168  {
2169  bufHdr = GetBufferDescriptor(buffer - 1);
2170  }
2171 
2172  Assert(BufferIsPinned(buffer));
2174  LW_EXCLUSIVE));
2175 
2176  return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
2177 }
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:234
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1941
pg_atomic_uint32 state

References Assert(), BM_DIRTY, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by XLogRegisterBuffer().

◆ BufferIsExclusiveLocked()

bool BufferIsExclusiveLocked ( Buffer  buffer)

Definition at line 2128 of file bufmgr.c.

2129 {
2130  BufferDesc *bufHdr;
2131 
2132  if (BufferIsLocal(buffer))
2133  {
2134  int bufid = -buffer - 1;
2135 
2136  bufHdr = GetLocalBufferDescriptor(bufid);
2137  }
2138  else
2139  {
2140  bufHdr = GetBufferDescriptor(buffer - 1);
2141  }
2142 
2143  Assert(BufferIsPinned(buffer));
2145  LW_EXCLUSIVE);
2146 }

References Assert(), PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), LW_EXCLUSIVE, and LWLockHeldByMeInMode().

Referenced by XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 3608 of file bufmgr.c.

3609 {
3610  BufferDesc *bufHdr;
3611 
3612  /* Local buffers are used only for temp relations. */
3613  if (BufferIsLocal(buffer))
3614  return false;
3615 
3616  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3617  Assert(BufferIsValid(buffer));
3618  Assert(BufferIsPinned(buffer));
3619 
3620  /*
3621  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
3622  * need not bother with the buffer header spinlock. Even if someone else
3623  * changes the buffer header state while we're doing this, the state is
3624  * changed atomically, so we'll read the old value or the new value, but
3625  * not random garbage.
3626  */
3627  bufHdr = GetBufferDescriptor(buffer - 1);
3628  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
3629 }

References Assert(), BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 2565 of file bufmgr.c.

2566 {
2567  uint32 buf_state;
2568  int buf_id;
2569  int num_to_scan;
2570  int num_spaces;
2571  int num_processed;
2572  int num_written;
2573  CkptTsStatus *per_ts_stat = NULL;
2574  Oid last_tsid;
2575  binaryheap *ts_heap;
2576  int i;
2577  int mask = BM_DIRTY;
2578  WritebackContext wb_context;
2579 
2580  /*
2581  * Unless this is a shutdown checkpoint or we have been explicitly told,
2582  * we write only permanent, dirty buffers. But at shutdown or end of
2583  * recovery, we write all dirty buffers.
2584  */
2587  mask |= BM_PERMANENT;
2588 
2589  /*
2590  * Loop over all buffers, and mark the ones that need to be written with
2591  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
2592  * can estimate how much work needs to be done.
2593  *
2594  * This allows us to write only those pages that were dirty when the
2595  * checkpoint began, and not those that get dirtied while it proceeds.
2596  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
2597  * later in this function, or by normal backends or the bgwriter cleaning
2598  * scan, the flag is cleared. Any buffer dirtied after this point won't
2599  * have the flag set.
2600  *
2601  * Note that if we fail to write some buffer, we may leave buffers with
2602  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
2603  * certainly need to be written for the next checkpoint attempt, too.
2604  */
2605  num_to_scan = 0;
2606  for (buf_id = 0; buf_id < NBuffers; buf_id++)
2607  {
2608  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
2609 
2610  /*
2611  * Header spinlock is enough to examine BM_DIRTY, see comment in
2612  * SyncOneBuffer.
2613  */
2614  buf_state = LockBufHdr(bufHdr);
2615 
2616  if ((buf_state & mask) == mask)
2617  {
2618  CkptSortItem *item;
2619 
2620  buf_state |= BM_CHECKPOINT_NEEDED;
2621 
2622  item = &CkptBufferIds[num_to_scan++];
2623  item->buf_id = buf_id;
2624  item->tsId = bufHdr->tag.spcOid;
2625  item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
2626  item->forkNum = BufTagGetForkNum(&bufHdr->tag);
2627  item->blockNum = bufHdr->tag.blockNum;
2628  }
2629 
2630  UnlockBufHdr(bufHdr, buf_state);
2631 
2632  /* Check for barrier events in case NBuffers is large. */
2635  }
2636 
2637  if (num_to_scan == 0)
2638  return; /* nothing to do */
2639 
2641 
2642  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
2643 
2644  /*
2645  * Sort buffers that need to be written to reduce the likelihood of random
2646  * IO. The sorting is also important for the implementation of balancing
2647  * writes between tablespaces. Without balancing writes we'd potentially
2648  * end up writing to the tablespaces one-by-one; possibly overloading the
2649  * underlying system.
2650  */
2651  sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
2652 
2653  num_spaces = 0;
2654 
2655  /*
2656  * Allocate progress status for each tablespace with buffers that need to
2657  * be flushed. This requires the to-be-flushed array to be sorted.
2658  */
2659  last_tsid = InvalidOid;
2660  for (i = 0; i < num_to_scan; i++)
2661  {
2662  CkptTsStatus *s;
2663  Oid cur_tsid;
2664 
2665  cur_tsid = CkptBufferIds[i].tsId;
2666 
2667  /*
2668  * Grow array of per-tablespace status structs, every time a new
2669  * tablespace is found.
2670  */
2671  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
2672  {
2673  Size sz;
2674 
2675  num_spaces++;
2676 
2677  /*
2678  * Not worth adding grow-by-power-of-2 logic here - even with a
2679  * few hundred tablespaces this should be fine.
2680  */
2681  sz = sizeof(CkptTsStatus) * num_spaces;
2682 
2683  if (per_ts_stat == NULL)
2684  per_ts_stat = (CkptTsStatus *) palloc(sz);
2685  else
2686  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
2687 
2688  s = &per_ts_stat[num_spaces - 1];
2689  memset(s, 0, sizeof(*s));
2690  s->tsId = cur_tsid;
2691 
2692  /*
2693  * The first buffer in this tablespace. As CkptBufferIds is sorted
2694  * by tablespace all (s->num_to_scan) buffers in this tablespace
2695  * will follow afterwards.
2696  */
2697  s->index = i;
2698 
2699  /*
2700  * progress_slice will be determined once we know how many buffers
2701  * are in each tablespace, i.e. after this loop.
2702  */
2703 
2704  last_tsid = cur_tsid;
2705  }
2706  else
2707  {
2708  s = &per_ts_stat[num_spaces - 1];
2709  }
2710 
2711  s->num_to_scan++;
2712 
2713  /* Check for barrier events. */
2716  }
2717 
2718  Assert(num_spaces > 0);
2719 
2720  /*
2721  * Build a min-heap over the write-progress in the individual tablespaces,
2722  * and compute how large a portion of the total progress a single
2723  * processed buffer is.
2724  */
2725  ts_heap = binaryheap_allocate(num_spaces,
2727  NULL);
2728 
2729  for (i = 0; i < num_spaces; i++)
2730  {
2731  CkptTsStatus *ts_stat = &per_ts_stat[i];
2732 
2733  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
2734 
2735  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
2736  }
2737 
2738  binaryheap_build(ts_heap);
2739 
2740  /*
2741  * Iterate through to-be-checkpointed buffers and write the ones (still)
2742  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
2743  * tablespaces; otherwise the sorting would lead to only one tablespace
2744  * receiving writes at a time, making inefficient use of the hardware.
2745  */
2746  num_processed = 0;
2747  num_written = 0;
2748  while (!binaryheap_empty(ts_heap))
2749  {
2750  BufferDesc *bufHdr = NULL;
2751  CkptTsStatus *ts_stat = (CkptTsStatus *)
2753 
2754  buf_id = CkptBufferIds[ts_stat->index].buf_id;
2755  Assert(buf_id != -1);
2756 
2757  bufHdr = GetBufferDescriptor(buf_id);
2758 
2759  num_processed++;
2760 
2761  /*
2762  * We don't need to acquire the lock here, because we're only looking
2763  * at a single bit. It's possible that someone else writes the buffer
2764  * and clears the flag right after we check, but that doesn't matter
2765  * since SyncOneBuffer will then do nothing. However, there is a
2766  * further race condition: it's conceivable that between the time we
2767  * examine the bit here and the time SyncOneBuffer acquires the lock,
2768  * someone else not only wrote the buffer but replaced it with another
2769  * page and dirtied it. In that improbable case, SyncOneBuffer will
2770  * write the buffer though we didn't need to. It doesn't seem worth
2771  * guarding against this, though.
2772  */
2774  {
2775  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
2776  {
2777  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
2779  num_written++;
2780  }
2781  }
2782 
2783  /*
2784  * Measure progress independent of actually having to flush the buffer
2785  * - otherwise writing become unbalanced.
2786  */
2787  ts_stat->progress += ts_stat->progress_slice;
2788  ts_stat->num_scanned++;
2789  ts_stat->index++;
2790 
2791  /* Have all the buffers from the tablespace been processed? */
2792  if (ts_stat->num_scanned == ts_stat->num_to_scan)
2793  {
2794  binaryheap_remove_first(ts_heap);
2795  }
2796  else
2797  {
2798  /* update heap with the new progress */
2799  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
2800  }
2801 
2802  /*
2803  * Sleep to throttle our I/O rate.
2804  *
2805  * (This will check for barrier events even if it doesn't sleep.)
2806  */
2807  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
2808  }
2809 
2810  /*
2811  * Issue all pending flushes. Only checkpointer calls BufferSync(), so
2812  * IOContext will always be IOCONTEXT_NORMAL.
2813  */
2815 
2816  pfree(per_ts_stat);
2817  per_ts_stat = NULL;
2818  binaryheap_free(ts_heap);
2819 
2820  /*
2821  * Update checkpoint statistics. As noted above, this doesn't include
2822  * buffers written by other backends or bgwriter scan.
2823  */
2824  CheckpointStats.ckpt_bufs_written += num_written;
2825 
2826  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
2827 }
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:138
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:255
bh_node_type binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:177
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:192
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:39
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:75
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:116
#define binaryheap_empty(h)
Definition: binaryheap.h:65
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:68
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:5509
int checkpoint_flush_after
Definition: bufmgr.c:159
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:5532
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
Definition: bufmgr.c:5589
struct CkptTsStatus CkptTsStatus
double float8
Definition: c.h:617
size_t Size
Definition: c.h:592
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:711
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:38
int i
Definition: isn.c:73
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1528
void * palloc(Size size)
Definition: mcxt.c:1304
@ IOCONTEXT_NORMAL
Definition: pgstat.h:290
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:464
int ckpt_bufs_written
Definition: xlog.h:165
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition: bufmgr.c:110
int index
Definition: bufmgr.c:118
int num_scanned
Definition: bufmgr.c:115
float8 progress
Definition: bufmgr.c:109
int num_to_scan
Definition: bufmgr.c:113
Oid tsId
Definition: bufmgr.c:100
PgStat_Counter buffers_written
Definition: pgstat.h:270
Oid spcOid
Definition: buf_internals.h:94
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:138
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:141
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:137

References Assert(), binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buffers_written, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, DatumGetPointer(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IOCONTEXT_NORMAL, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u32(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), buftag::spcOid, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 5444 of file bufmgr.c.

5445 {
5446  int ret;
5447  RelFileLocator rlocatora;
5448  RelFileLocator rlocatorb;
5449 
5450  rlocatora = BufTagGetRelFileLocator(ba);
5451  rlocatorb = BufTagGetRelFileLocator(bb);
5452 
5453  ret = rlocator_comparator(&rlocatora, &rlocatorb);
5454 
5455  if (ret != 0)
5456  return ret;
5457 
5458  if (BufTagGetForkNum(ba) < BufTagGetForkNum(bb))
5459  return -1;
5460  if (BufTagGetForkNum(ba) > BufTagGetForkNum(bb))
5461  return 1;
5462 
5463  if (ba->blockNum < bb->blockNum)
5464  return -1;
5465  if (ba->blockNum > bb->blockNum)
5466  return 1;
5467 
5468  return 0;
5469 }
static int rlocator_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:5363

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), and rlocator_comparator().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 4842 of file bufmgr.c.

4843 {
4844  if (BufferIsLocal(buffer))
4845  {
4846  if (LocalRefCount[-buffer - 1] != 1)
4847  elog(ERROR, "incorrect local pin count: %d",
4848  LocalRefCount[-buffer - 1]);
4849  }
4850  else
4851  {
4852  if (GetPrivateRefCount(buffer) != 1)
4853  elog(ERROR, "incorrect local pin count: %d",
4854  GetPrivateRefCount(buffer));
4855  }
4856 }
#define ERROR
Definition: elog.h:39

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), and LockBufferForCleanup().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 3272 of file bufmgr.c.

3273 {
3274 #ifdef USE_ASSERT_CHECKING
3275  int RefCountErrors = 0;
3277  int i;
3278  char *s;
3279 
3280  /* check the array */
3281  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
3282  {
3284 
3285  if (res->buffer != InvalidBuffer)
3286  {
3287  s = DebugPrintBufferRefcount(res->buffer);
3288  elog(WARNING, "buffer refcount leak: %s", s);
3289  pfree(s);
3290 
3291  RefCountErrors++;
3292  }
3293  }
3294 
3295  /* if necessary search the hash */
3297  {
3298  HASH_SEQ_STATUS hstat;
3299 
3301  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
3302  {
3303  s = DebugPrintBufferRefcount(res->buffer);
3304  elog(WARNING, "buffer refcount leak: %s", s);
3305  pfree(s);
3306  RefCountErrors++;
3307  }
3308  }
3309 
3310  Assert(RefCountErrors == 0);
3311 #endif
3312 }
#define InvalidBuffer
Definition: buf.h:25
char * DebugPrintBufferRefcount(Buffer buffer)
Definition: bufmgr.c:3318
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:91
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:196
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:197
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1395
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385

References Assert(), DebugPrintBufferRefcount(), elog, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, pfree(), PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, and WARNING.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 3363 of file bufmgr.c.

3364 {
3365  BufferSync(flags);
3366 }
static void BufferSync(int flags)
Definition: bufmgr.c:2565

References BufferSync().

Referenced by CheckPointGuts().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 5478 of file bufmgr.c.

5479 {
5480  /* compare tablespace */
5481  if (a->tsId < b->tsId)
5482  return -1;
5483  else if (a->tsId > b->tsId)
5484  return 1;
5485  /* compare relation */
5486  if (a->relNumber < b->relNumber)
5487  return -1;
5488  else if (a->relNumber > b->relNumber)
5489  return 1;
5490  /* compare fork */
5491  else if (a->forkNum < b->forkNum)
5492  return -1;
5493  else if (a->forkNum > b->forkNum)
5494  return 1;
5495  /* compare block number */
5496  else if (a->blockNum < b->blockNum)
5497  return -1;
5498  else if (a->blockNum > b->blockNum)
5499  return 1;
5500  /* equal page IDs are unlikely, but not impossible */
5501  return 0;
5502 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69

References a, and b.

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 4821 of file bufmgr.c.

4822 {
4823  BufferDesc *buf;
4824 
4825  Assert(BufferIsPinned(buffer));
4826  if (BufferIsLocal(buffer))
4827  return true; /* act as though we got it */
4828 
4829  buf = GetBufferDescriptor(buffer - 1);
4830 
4832  LW_EXCLUSIVE);
4833 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1343

References Assert(), buf, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 5036 of file bufmgr.c.

5037 {
5038  BufferDesc *bufHdr;
5039  uint32 buf_state,
5040  refcount;
5041 
5042  Assert(BufferIsValid(buffer));
5043 
5044  if (BufferIsLocal(buffer))
5045  {
5046  refcount = LocalRefCount[-buffer - 1];
5047  /* There should be exactly one pin */
5048  Assert(refcount > 0);
5049  if (refcount != 1)
5050  return false;
5051  /* Nobody else to wait for */
5052  return true;
5053  }
5054 
5055  /* There should be exactly one local pin */
5056  refcount = GetPrivateRefCount(buffer);
5057  Assert(refcount);
5058  if (refcount != 1)
5059  return false;
5060 
5061  /* Try to acquire lock */
5062  if (!ConditionalLockBuffer(buffer))
5063  return false;
5064 
5065  bufHdr = GetBufferDescriptor(buffer - 1);
5066  buf_state = LockBufHdr(bufHdr);
5067  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
5068 
5069  Assert(refcount > 0);
5070  if (refcount == 1)
5071  {
5072  /* Successfully acquired exclusive lock with pincount 1 */
5073  UnlockBufHdr(bufHdr, buf_state);
5074  return true;
5075  }
5076 
5077  /* Failed, so release the lock */
5078  UnlockBufHdr(bufHdr, buf_state);
5079  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5080  return false;
5081 }
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:4821
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4795
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 4434 of file bufmgr.c.

4436 {
4437  char relpersistence;
4438  SMgrRelation src_rel;
4439  SMgrRelation dst_rel;
4440 
4441  /* Set the relpersistence. */
4442  relpersistence = permanent ?
4443  RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
4444 
4445  src_rel = smgropen(src_rlocator, INVALID_PROC_NUMBER);
4446  dst_rel = smgropen(dst_rlocator, INVALID_PROC_NUMBER);
4447 
4448  /*
4449  * Create and copy all forks of the relation. During create database we
4450  * have a separate cleanup mechanism which deletes complete database
4451  * directory. Therefore, each individual relation doesn't need to be
4452  * registered for cleanup.
4453  */
4454  RelationCreateStorage(dst_rlocator, relpersistence, false);
4455 
4456  /* copy main fork. */
4457  RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM,
4458  permanent);
4459 
4460  /* copy those extra forks that exist */
4461  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
4462  forkNum <= MAX_FORKNUM; forkNum++)
4463  {
4464  if (smgrexists(src_rel, forkNum))
4465  {
4466  smgrcreate(dst_rel, forkNum, false);
4467 
4468  /*
4469  * WAL log creation if the relation is persistent, or this is the
4470  * init fork of an unlogged relation.
4471  */
4472  if (permanent || forkNum == INIT_FORKNUM)
4473  log_smgrcreate(&dst_rlocator, forkNum);
4474 
4475  /* Copy a fork's data, block by block. */
4476  RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
4477  permanent);
4478  }
4479  }
4480 }
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition: bufmgr.c:4343
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
ForkNumber
Definition: relpath.h:48
@ MAIN_FORKNUM
Definition: relpath.h:50
#define MAX_FORKNUM
Definition: relpath.h:62
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:198
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:411
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:398
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:121
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:186

References INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char* DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 3318 of file bufmgr.c.

3319 {
3320  BufferDesc *buf;
3321  int32 loccount;
3322  char *path;
3323  char *result;
3324  ProcNumber backend;
3325  uint32 buf_state;
3326 
3327  Assert(BufferIsValid(buffer));
3328  if (BufferIsLocal(buffer))
3329  {
3330  buf = GetLocalBufferDescriptor(-buffer - 1);
3331  loccount = LocalRefCount[-buffer - 1];
3332  backend = MyProcNumber;
3333  }
3334  else
3335  {
3336  buf = GetBufferDescriptor(buffer - 1);
3337  loccount = GetPrivateRefCount(buffer);
3338  backend = INVALID_PROC_NUMBER;
3339  }
3340 
3341  /* theoretically we should lock the bufhdr here */
3342  path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend,
3343  BufTagGetForkNum(&buf->tag));
3344  buf_state = pg_atomic_read_u32(&buf->state);
3345 
3346  result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
3347  buffer, path,
3348  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
3349  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
3350  pfree(path);
3351  return result;
3352 }
#define BUF_FLAG_MASK
Definition: buf_internals.h:48
ProcNumber MyProcNumber
Definition: globals.c:87
int ProcNumber
Definition: procnumber.h:24
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:85

References Assert(), buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pfree(), pg_atomic_read_u32(), psprintf(), and relpathbackend.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBufferPin().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 4039 of file bufmgr.c.

4040 {
4041  int i;
4042 
4043  /*
4044  * We needn't consider local buffers, since by assumption the target
4045  * database isn't our own.
4046  */
4047 
4048  for (i = 0; i < NBuffers; i++)
4049  {
4050  BufferDesc *bufHdr = GetBufferDescriptor(i);
4051  uint32 buf_state;
4052 
4053  /*
4054  * As in DropRelationBuffers, an unlocked precheck should be safe and
4055  * saves some cycles.
4056  */
4057  if (bufHdr->tag.dbOid != dbid)
4058  continue;
4059 
4060  buf_state = LockBufHdr(bufHdr);
4061  if (bufHdr->tag.dbOid == dbid)
4062  InvalidateBuffer(bufHdr); /* releases spinlock */
4063  else
4064  UnlockBufHdr(bufHdr, buf_state);
4065  }
4066 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1441
Oid dbOid
Definition: buf_internals.h:95

References buftag::dbOid, GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, BufferDesc::tag, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 3684 of file bufmgr.c.

3686 {
3687  int i;
3688  int j;
3689  RelFileLocatorBackend rlocator;
3690  BlockNumber nForkBlock[MAX_FORKNUM];
3691  uint64 nBlocksToInvalidate = 0;
3692 
3693  rlocator = smgr_reln->smgr_rlocator;
3694 
3695  /* If it's a local relation, it's localbuf.c's problem. */
3696  if (RelFileLocatorBackendIsTemp(rlocator))
3697  {
3698  if (rlocator.backend == MyProcNumber)
3699  {
3700  for (j = 0; j < nforks; j++)
3701  DropRelationLocalBuffers(rlocator.locator, forkNum[j],
3702  firstDelBlock[j]);
3703  }
3704  return;
3705  }
3706 
3707  /*
3708  * To remove all the pages of the specified relation forks from the buffer
3709  * pool, we need to scan the entire buffer pool but we can optimize it by
3710  * finding the buffers from BufMapping table provided we know the exact
3711  * size of each fork of the relation. The exact size is required to ensure
3712  * that we don't leave any buffer for the relation being dropped as
3713  * otherwise the background writer or checkpointer can lead to a PANIC
3714  * error while flushing buffers corresponding to files that don't exist.
3715  *
3716  * To know the exact size, we rely on the size cached for each fork by us
3717  * during recovery which limits the optimization to recovery and on
3718  * standbys but we can easily extend it once we have shared cache for
3719  * relation size.
3720  *
3721  * In recovery, we cache the value returned by the first lseek(SEEK_END)
3722  * and the future writes keeps the cached value up-to-date. See
3723  * smgrextend. It is possible that the value of the first lseek is smaller
3724  * than the actual number of existing blocks in the file due to buggy
3725  * Linux kernels that might not have accounted for the recent write. But
3726  * that should be fine because there must not be any buffers after that
3727  * file size.
3728  */
3729  for (i = 0; i < nforks; i++)
3730  {
3731  /* Get the number of blocks for a relation's fork */
3732  nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
3733 
3734  if (nForkBlock[i] == InvalidBlockNumber)
3735  {
3736  nBlocksToInvalidate = InvalidBlockNumber;
3737  break;
3738  }
3739 
3740  /* calculate the number of blocks to be invalidated */
3741  nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
3742  }
3743 
3744  /*
3745  * We apply the optimization iff the total number of blocks to invalidate
3746  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3747  */
3748  if (BlockNumberIsValid(nBlocksToInvalidate) &&
3749  nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3750  {
3751  for (j = 0; j < nforks; j++)
3752  FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
3753  nForkBlock[j], firstDelBlock[j]);
3754  return;
3755  }
3756 
3757  for (i = 0; i < NBuffers; i++)
3758  {
3759  BufferDesc *bufHdr = GetBufferDescriptor(i);
3760  uint32 buf_state;
3761 
3762  /*
3763  * We can make this a tad faster by prechecking the buffer tag before
3764  * we attempt to lock the buffer; this saves a lot of lock
3765  * acquisitions in typical cases. It should be safe because the
3766  * caller must have AccessExclusiveLock on the relation, or some other
3767  * reason to be certain that no one is loading new pages of the rel
3768  * into the buffer pool. (Otherwise we might well miss such pages
3769  * entirely.) Therefore, while the tag might be changing while we
3770  * look at it, it can't be changing *to* a value we care about, only
3771  * *away* from such a value. So false negatives are impossible, and
3772  * false positives are safe because we'll recheck after getting the
3773  * buffer lock.
3774  *
3775  * We could check forkNum and blockNum as well as the rlocator, but
3776  * the incremental win from doing so seems small.
3777  */
3778  if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
3779  continue;
3780 
3781  buf_state = LockBufHdr(bufHdr);
3782 
3783  for (j = 0; j < nforks; j++)
3784  {
3785  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
3786  BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
3787  bufHdr->tag.blockNum >= firstDelBlock[j])
3788  {
3789  InvalidateBuffer(bufHdr); /* releases spinlock */
3790  break;
3791  }
3792  }
3793  if (j >= nforks)
3794  UnlockBufHdr(bufHdr, buf_state);
3795  }
3796 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:82
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3978
int j
Definition: isn.c:74
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:489
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:679

References RelFileLocatorBackend::backend, buftag::blockNum, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, SMgrRelationData::smgr_rlocator, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 3807 of file bufmgr.c.

3808 {
3809  int i;
3810  int n = 0;
3811  SMgrRelation *rels;
3812  BlockNumber (*block)[MAX_FORKNUM + 1];
3813  uint64 nBlocksToInvalidate = 0;
3814  RelFileLocator *locators;
3815  bool cached = true;
3816  bool use_bsearch;
3817 
3818  if (nlocators == 0)
3819  return;
3820 
3821  rels = palloc(sizeof(SMgrRelation) * nlocators); /* non-local relations */
3822 
3823  /* If it's a local relation, it's localbuf.c's problem. */
3824  for (i = 0; i < nlocators; i++)
3825  {
3826  if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
3827  {
3828  if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
3829  DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
3830  }
3831  else
3832  rels[n++] = smgr_reln[i];
3833  }
3834 
3835  /*
3836  * If there are no non-local relations, then we're done. Release the
3837  * memory and return.
3838  */
3839  if (n == 0)
3840  {
3841  pfree(rels);
3842  return;
3843  }
3844 
3845  /*
3846  * This is used to remember the number of blocks for all the relations
3847  * forks.
3848  */
3849  block = (BlockNumber (*)[MAX_FORKNUM + 1])
3850  palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
3851 
3852  /*
3853  * We can avoid scanning the entire buffer pool if we know the exact size
3854  * of each of the given relation forks. See DropRelationBuffers.
3855  */
3856  for (i = 0; i < n && cached; i++)
3857  {
3858  for (int j = 0; j <= MAX_FORKNUM; j++)
3859  {
3860  /* Get the number of blocks for a relation's fork. */
3861  block[i][j] = smgrnblocks_cached(rels[i], j);
3862 
3863  /* We need to only consider the relation forks that exists. */
3864  if (block[i][j] == InvalidBlockNumber)
3865  {
3866  if (!smgrexists(rels[i], j))
3867  continue;
3868  cached = false;
3869  break;
3870  }
3871 
3872  /* calculate the total number of blocks to be invalidated */
3873  nBlocksToInvalidate += block[i][j];
3874  }
3875  }
3876 
3877  /*
3878  * We apply the optimization iff the total number of blocks to invalidate
3879  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3880  */
3881  if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3882  {
3883  for (i = 0; i < n; i++)
3884  {
3885  for (int j = 0; j <= MAX_FORKNUM; j++)
3886  {
3887  /* ignore relation forks that doesn't exist */
3888  if (!BlockNumberIsValid(block[i][j]))
3889  continue;
3890 
3891  /* drop all the buffers for a particular relation fork */
3892  FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
3893  j, block[i][j], 0);
3894  }
3895  }
3896 
3897  pfree(block);
3898  pfree(rels);
3899  return;
3900  }
3901 
3902  pfree(block);
3903  locators = palloc(sizeof(RelFileLocator) * n); /* non-local relations */
3904  for (i = 0; i < n; i++)
3905  locators[i] = rels[i]->smgr_rlocator.locator;
3906 
3907  /*
3908  * For low number of relations to drop just use a simple walk through, to
3909  * save the bsearch overhead. The threshold to use is rather a guess than
3910  * an exactly determined value, as it depends on many factors (CPU and RAM
3911  * speeds, amount of shared buffers etc.).
3912  */
3913  use_bsearch = n > RELS_BSEARCH_THRESHOLD;
3914 
3915  /* sort the list of rlocators if necessary */
3916  if (use_bsearch)
3917  qsort(locators, n, sizeof(RelFileLocator), rlocator_comparator);
3918 
3919  for (i = 0; i < NBuffers; i++)
3920  {
3921  RelFileLocator *rlocator = NULL;
3922  BufferDesc *bufHdr = GetBufferDescriptor(i);
3923  uint32 buf_state;
3924 
3925  /*
3926  * As in DropRelationBuffers, an unlocked precheck should be safe and
3927  * saves some cycles.
3928  */
3929 
3930  if (!use_bsearch)
3931  {
3932  int j;
3933 
3934  for (j = 0; j < n; j++)
3935  {
3936  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
3937  {
3938  rlocator = &locators[j];
3939  break;
3940  }
3941  }
3942  }
3943  else
3944  {
3945  RelFileLocator locator;
3946 
3947  locator = BufTagGetRelFileLocator(&bufHdr->tag);
3948  rlocator = bsearch((const void *) &(locator),
3949  locators, n, sizeof(RelFileLocator),
3951  }
3952 
3953  /* buffer doesn't belong to any of the given relfilelocators; skip it */
3954  if (rlocator == NULL)
3955  continue;
3956 
3957  buf_state = LockBufHdr(bufHdr);
3958  if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
3959  InvalidateBuffer(bufHdr); /* releases spinlock */
3960  else
3961  UnlockBufHdr(bufHdr, buf_state);
3962  }
3963 
3964  pfree(locators);
3965  pfree(rels);
3966 }
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:74
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition: localbuf.c:537
#define qsort(a, b, c, d)
Definition: port.h:449

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, if(), InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 838 of file bufmgr.c.

842 {
843  Buffer buf;
844  uint32 extend_by = 1;
845 
846  ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
847  &buf, &extend_by);
848 
849  return buf;
850 }
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:870

References buf, and ExtendBufferedRelBy().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 870 of file bufmgr.c.

877 {
878  Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
879  Assert(bmr.smgr == NULL || bmr.relpersistence != 0);
880  Assert(extend_by > 0);
881 
882  if (bmr.smgr == NULL)
883  {
884  bmr.smgr = RelationGetSmgr(bmr.rel);
885  bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
886  }
887 
888  return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
889  extend_by, InvalidBlockNumber,
890  buffers, extended_by);
891 }
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:1804
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
struct SMgrRelationData * smgr
Definition: bufmgr.h:102
Form_pg_class rd_rel
Definition: rel.h:111

References Assert(), ExtendBufferedRelCommon(), InvalidBlockNumber, RelationData::rd_rel, BufferManagerRelation::rel, RelationGetSmgr(), BufferManagerRelation::relpersistence, and BufferManagerRelation::smgr.

Referenced by ExtendBufferedRel(), and RelationAddBlocks().

◆ ExtendBufferedRelCommon()

static BlockNumber ExtendBufferedRelCommon ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 1804 of file bufmgr.c.

1812 {
1813  BlockNumber first_block;
1814 
1815  TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
1819  bmr.smgr->smgr_rlocator.backend,
1820  extend_by);
1821 
1822  if (bmr.relpersistence == RELPERSISTENCE_TEMP)
1823  first_block = ExtendBufferedRelLocal(bmr, fork, flags,
1824  extend_by, extend_upto,
1825  buffers, &extend_by);
1826  else
1827  first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags,
1828  extend_by, extend_upto,
1829  buffers, &extend_by);
1830  *extended_by = extend_by;
1831 
1832  TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
1836  bmr.smgr->smgr_rlocator.backend,
1837  *extended_by,
1838  first_block);
1839 
1840  return first_block;
1841 }
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:1848
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: localbuf.c:313
RelFileNumber relNumber

References RelFileLocatorBackend::backend, RelFileLocator::dbOid, ExtendBufferedRelLocal(), ExtendBufferedRelShared(), RelFileLocatorBackend::locator, RelFileLocator::relNumber, BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, SMgrRelationData::smgr_rlocator, and RelFileLocator::spcOid.

Referenced by ExtendBufferedRelBy(), and ExtendBufferedRelTo().

◆ ExtendBufferedRelShared()

static BlockNumber ExtendBufferedRelShared ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 1848 of file bufmgr.c.

1856 {
1857  BlockNumber first_block;
1858  IOContext io_context = IOContextForStrategy(strategy);
1859  instr_time io_start;
1860 
1861  LimitAdditionalPins(&extend_by);
1862 
1863  /*
1864  * Acquire victim buffers for extension without holding extension lock.
1865  * Writing out victim buffers is the most expensive part of extending the
1866  * relation, particularly when doing so requires WAL flushes. Zeroing out
1867  * the buffers is also quite expensive, so do that before holding the
1868  * extension lock as well.
1869  *
1870  * These pages are pinned by us and not valid. While we hold the pin they
1871  * can't be acquired as victim buffers by another backend.
1872  */
1873  for (uint32 i = 0; i < extend_by; i++)
1874  {
1875  Block buf_block;
1876 
1877  buffers[i] = GetVictimBuffer(strategy, io_context);
1878  buf_block = BufHdrGetBlock(GetBufferDescriptor(buffers[i] - 1));
1879 
1880  /* new buffers are zero-filled */
1881  MemSet((char *) buf_block, 0, BLCKSZ);
1882  }
1883 
1884  /*
1885  * Lock relation against concurrent extensions, unless requested not to.
1886  *
1887  * We use the same extension lock for all forks. That's unnecessarily
1888  * restrictive, but currently extensions for forks don't happen often
1889  * enough to make it worth locking more granularly.
1890  *
1891  * Note that another backend might have extended the relation by the time
1892  * we get the lock.
1893  */
1894  if (!(flags & EB_SKIP_EXTENSION_LOCK))
1896 
1897  /*
1898  * If requested, invalidate size cache, so that smgrnblocks asks the
1899  * kernel.
1900  */
1901  if (flags & EB_CLEAR_SIZE_CACHE)
1903 
1904  first_block = smgrnblocks(bmr.smgr, fork);
1905 
1906  /*
1907  * Now that we have the accurate relation size, check if the caller wants
1908  * us to extend to only up to a specific size. If there were concurrent
1909  * extensions, we might have acquired too many buffers and need to release
1910  * them.
1911  */
1912  if (extend_upto != InvalidBlockNumber)
1913  {
1914  uint32 orig_extend_by = extend_by;
1915 
1916  if (first_block > extend_upto)
1917  extend_by = 0;
1918  else if ((uint64) first_block + extend_by > extend_upto)
1919  extend_by = extend_upto - first_block;
1920 
1921  for (uint32 i = extend_by; i < orig_extend_by; i++)
1922  {
1923  BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
1924 
1925  /*
1926  * The victim buffer we acquired previously is clean and unused,
1927  * let it be found again quickly
1928  */
1929  StrategyFreeBuffer(buf_hdr);
1930  UnpinBuffer(buf_hdr);
1931  }
1932 
1933  if (extend_by == 0)
1934  {
1935  if (!(flags & EB_SKIP_EXTENSION_LOCK))
1937  *extended_by = extend_by;
1938  return first_block;
1939  }
1940  }
1941 
1942  /* Fail if relation is already at maximum possible length */
1943  if ((uint64) first_block + extend_by >= MaxBlockNumber)
1944  ereport(ERROR,
1945  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1946  errmsg("cannot extend relation %s beyond %u blocks",
1947  relpath(bmr.smgr->smgr_rlocator, fork),
1948  MaxBlockNumber)));
1949 
1950  /*
1951  * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
1952  *
1953  * This needs to happen before we extend the relation, because as soon as
1954  * we do, other backends can start to read in those pages.
1955  */
1956  for (uint32 i = 0; i < extend_by; i++)
1957  {
1958  Buffer victim_buf = buffers[i];
1959  BufferDesc *victim_buf_hdr = GetBufferDescriptor(victim_buf - 1);
1960  BufferTag tag;
1961  uint32 hash;
1962  LWLock *partition_lock;
1963  int existing_id;
1964 
1965  /* in case we need to pin an existing buffer below */
1968 
1969  InitBufferTag(&tag, &bmr.smgr->smgr_rlocator.locator, fork, first_block + i);
1970  hash = BufTableHashCode(&tag);
1971  partition_lock = BufMappingPartitionLock(hash);
1972 
1973  LWLockAcquire(partition_lock, LW_EXCLUSIVE);
1974 
1975  existing_id = BufTableInsert(&tag, hash, victim_buf_hdr->buf_id);
1976 
1977  /*
1978  * We get here only in the corner case where we are trying to extend
1979  * the relation but we found a pre-existing buffer. This can happen
1980  * because a prior attempt at extending the relation failed, and
1981  * because mdread doesn't complain about reads beyond EOF (when
1982  * zero_damaged_pages is ON) and so a previous attempt to read a block
1983  * beyond EOF could have left a "valid" zero-filled buffer.
1984  * Unfortunately, we have also seen this case occurring because of
1985  * buggy Linux kernels that sometimes return an lseek(SEEK_END) result
1986  * that doesn't account for a recent write. In that situation, the
1987  * pre-existing buffer would contain valid data that we don't want to
1988  * overwrite. Since the legitimate cases should always have left a
1989  * zero-filled buffer, complain if not PageIsNew.
1990  */
1991  if (existing_id >= 0)
1992  {
1993  BufferDesc *existing_hdr = GetBufferDescriptor(existing_id);
1994  Block buf_block;
1995  bool valid;
1996 
1997  /*
1998  * Pin the existing buffer before releasing the partition lock,
1999  * preventing it from being evicted.
2000  */
2001  valid = PinBuffer(existing_hdr, strategy);
2002 
2003  LWLockRelease(partition_lock);
2004 
2005  /*
2006  * The victim buffer we acquired previously is clean and unused,
2007  * let it be found again quickly
2008  */
2009  StrategyFreeBuffer(victim_buf_hdr);
2010  UnpinBuffer(victim_buf_hdr);
2011 
2012  buffers[i] = BufferDescriptorGetBuffer(existing_hdr);
2013  buf_block = BufHdrGetBlock(existing_hdr);
2014 
2015  if (valid && !PageIsNew((Page) buf_block))
2016  ereport(ERROR,
2017  (errmsg("unexpected data beyond EOF in block %u of relation %s",
2018  existing_hdr->tag.blockNum, relpath(bmr.smgr->smgr_rlocator, fork)),
2019  errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
2020 
2021  /*
2022  * We *must* do smgr[zero]extend before succeeding, else the page
2023  * will not be reserved by the kernel, and the next P_NEW call
2024  * will decide to return the same page. Clear the BM_VALID bit,
2025  * do StartBufferIO() and proceed.
2026  *
2027  * Loop to handle the very small possibility that someone re-sets
2028  * BM_VALID between our clearing it and StartBufferIO inspecting
2029  * it.
2030  */
2031  do
2032  {
2033  uint32 buf_state = LockBufHdr(existing_hdr);
2034 
2035  buf_state &= ~BM_VALID;
2036  UnlockBufHdr(existing_hdr, buf_state);
2037  } while (!StartBufferIO(existing_hdr, true));
2038  }
2039  else
2040  {
2041  uint32 buf_state;
2042 
2043  buf_state = LockBufHdr(victim_buf_hdr);
2044 
2045  /* some sanity checks while we hold the buffer header lock */
2046  Assert(!(buf_state & (BM_VALID | BM_TAG_VALID | BM_DIRTY | BM_JUST_DIRTIED)));
2047  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2048 
2049  victim_buf_hdr->tag = tag;
2050 
2051  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
2052  if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
2053  buf_state |= BM_PERMANENT;
2054 
2055  UnlockBufHdr(victim_buf_hdr, buf_state);
2056 
2057  LWLockRelease(partition_lock);
2058 
2059  /* XXX: could combine the locked operations in it with the above */
2060  StartBufferIO(victim_buf_hdr, true);
2061  }
2062  }
2063 
2065 
2066  /*
2067  * Note: if smgrzeroextend fails, we will end up with buffers that are
2068  * allocated but not marked BM_VALID. The next relation extension will
2069  * still select the same block number (because the relation didn't get any
2070  * longer on disk) and so future attempts to extend the relation will find
2071  * the same buffers (if they have not been recycled) but come right back
2072  * here to try smgrzeroextend again.
2073  *
2074  * We don't need to set checksum for all-zero pages.
2075  */
2076  smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);
2077 
2078  /*
2079  * Release the file-extension lock; it's now OK for someone else to extend
2080  * the relation some more.
2081  *
2082  * We remove IO_IN_PROGRESS after this, as waking up waiting backends can
2083  * take noticeable time.
2084  */
2085  if (!(flags & EB_SKIP_EXTENSION_LOCK))
2087 
2089  io_start, extend_by);
2090 
2091  /* Set BM_VALID, terminate IO, and wake up any waiters */
2092  for (uint32 i = 0; i < extend_by; i++)
2093  {
2094  Buffer buf = buffers[i];
2095  BufferDesc *buf_hdr = GetBufferDescriptor(buf - 1);
2096  bool lock = false;
2097 
2098  if (flags & EB_LOCK_FIRST && i == 0)
2099  lock = true;
2100  else if (flags & EB_LOCK_TARGET)
2101  {
2102  Assert(extend_upto != InvalidBlockNumber);
2103  if (first_block + i + 1 == extend_upto)
2104  lock = true;
2105  }
2106 
2107  if (lock)
2109 
2110  TerminateBufferIO(buf_hdr, false, BM_VALID, true);
2111  }
2112 
2113  pgBufferUsage.shared_blks_written += extend_by;
2114 
2115  *extended_by = extend_by;
2116 
2117  return first_block;
2118 }
#define MaxBlockNumber
Definition: block.h:35
#define BM_JUST_DIRTIED
Definition: buf_internals.h:66
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
bool track_io_timing
Definition: bufmgr.c:138
static void LimitAdditionalPins(uint32 *additional_pins)
Definition: bufmgr.c:1773
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:63
void * Block
Definition: bufmgr.h:24
@ EB_LOCK_TARGET
Definition: bufmgr.h:91
@ EB_CLEAR_SIZE_CACHE
Definition: bufmgr.h:88
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:73
@ EB_LOCK_FIRST
Definition: bufmgr.h:85
Pointer Page
Definition: bufpage.h:78
static bool PageIsNew(Page page)
Definition: bufpage.h:230
#define MemSet(start, val, len)
Definition: c.h:1007
int errhint(const char *fmt,...)
Definition: elog.c:1319
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:716
BufferUsage pgBufferUsage
Definition: instrument.c:20
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:430
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:480
#define ExclusiveLock
Definition: lockdefs.h:42
@ IOOBJECT_RELATION
Definition: pgstat.h:280
IOContext
Definition: pgstat.h:287
@ IOOP_EXTEND
Definition: pgstat.h:299
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:100
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
Definition: pgstat_io.c:122
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
#define relpath(rlocator, forknum)
Definition: relpath.h:94
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:655
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:560
int64 shared_blks_written
Definition: instrument.h:29
BlockNumber smgr_cached_nblocks[MAX_FORKNUM+1]
Definition: smgr.h:46

References Assert(), buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufHdrGetBlock, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), CurrentResourceOwner, EB_CLEAR_SIZE_CACHE, EB_LOCK_FIRST, EB_LOCK_TARGET, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), errhint(), errmsg(), ERROR, ExclusiveLock, GetBufferDescriptor(), GetVictimBuffer(), hash(), i, INIT_FORKNUM, InitBufferTag(), InvalidBlockNumber, IOContextForStrategy(), IOOBJECT_RELATION, IOOP_EXTEND, LimitAdditionalPins(), RelFileLocatorBackend::locator, LockBufHdr(), LockRelationForExtension(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MaxBlockNumber, MemSet, PageIsNew(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer(), BufferManagerRelation::rel, relpath, BufferManagerRelation::relpersistence, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_written, BufferManagerRelation::smgr, SMgrRelationData::smgr_cached_nblocks, SMgrRelationData::smgr_rlocator, smgrnblocks(), smgrzeroextend(), StartBufferIO(), StrategyFreeBuffer(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr(), UnlockRelationForExtension(), and UnpinBuffer().

Referenced by ExtendBufferedRelCommon().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 902 of file bufmgr.c.

908 {
910  uint32 extended_by = 0;
911  Buffer buffer = InvalidBuffer;
912  Buffer buffers[64];
913 
914  Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
915  Assert(bmr.smgr == NULL || bmr.relpersistence != 0);
916  Assert(extend_to != InvalidBlockNumber && extend_to > 0);
917 
918  if (bmr.smgr == NULL)
919  {
920  bmr.smgr = RelationGetSmgr(bmr.rel);
921  bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
922  }
923 
924  /*
925  * If desired, create the file if it doesn't exist. If
926  * smgr_cached_nblocks[fork] is positive then it must exist, no need for
927  * an smgrexists call.
928  */
929  if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
930  (bmr.smgr->smgr_cached_nblocks[fork] == 0 ||
932  !smgrexists(bmr.smgr, fork))
933  {
935 
936  /* recheck, fork might have been created concurrently */
937  if (!smgrexists(bmr.smgr, fork))
938  smgrcreate(bmr.smgr, fork, flags & EB_PERFORMING_RECOVERY);
939 
941  }
942 
943  /*
944  * If requested, invalidate size cache, so that smgrnblocks asks the
945  * kernel.
946  */
947  if (flags & EB_CLEAR_SIZE_CACHE)
949 
950  /*
951  * Estimate how many pages we'll need to extend by. This avoids acquiring
952  * unnecessarily many victim buffers.
953  */
954  current_size = smgrnblocks(bmr.smgr, fork);
955 
956  /*
957  * Since no-one else can be looking at the page contents yet, there is no
958  * difference between an exclusive lock and a cleanup-strength lock. Note
959  * that we pass the original mode to ReadBuffer_common() below, when
960  * falling back to reading the buffer to a concurrent relation extension.
961  */
963  flags |= EB_LOCK_TARGET;
964 
965  while (current_size < extend_to)
966  {
967  uint32 num_pages = lengthof(buffers);
968  BlockNumber first_block;
969 
970  if ((uint64) current_size + num_pages > extend_to)
971  num_pages = extend_to - current_size;
972 
973  first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
974  num_pages, extend_to,
975  buffers, &extended_by);
976 
977  current_size = first_block + extended_by;
978  Assert(num_pages != 0 || current_size >= extend_to);
979 
980  for (uint32 i = 0; i < extended_by; i++)
981  {
982  if (first_block + i != extend_to - 1)
983  ReleaseBuffer(buffers[i]);
984  else
985  buffer = buffers[i];
986  }
987  }
988 
989  /*
990  * It's possible that another backend concurrently extended the relation.
991  * In that case read the buffer.
992  *
993  * XXX: Should we control this via a flag?
994  */
995  if (buffer == InvalidBuffer)
996  {
997  bool hit;
998 
999  Assert(extended_by == 0);
1000  buffer = ReadBuffer_common(bmr.smgr, bmr.relpersistence,
1001  fork, extend_to - 1, mode, strategy,
1002  &hit);
1003  }
1004 
1005  return buffer;
1006 }
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4560
static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
Definition: bufmgr.c:1014
@ EB_PERFORMING_RECOVERY
Definition: bufmgr.h:76
@ EB_CREATE_FORK_IF_NEEDED
Definition: bufmgr.h:82
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:47
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:45
#define lengthof(array)
Definition: c.h:775
static PgChecksumMode mode
Definition: pg_checksums.c:56
int64 current_size
Definition: pg_checksums.c:64

References Assert(), PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RelationData::rd_rel, ReadBuffer_common(), BufferManagerRelation::rel, RelationGetSmgr(), ReleaseBuffer(), BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, SMgrRelationData::smgr_cached_nblocks, smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 3978 of file bufmgr.c.

3981 {
3982  BlockNumber curBlock;
3983 
3984  for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
3985  {
3986  uint32 bufHash; /* hash value for tag */
3987  BufferTag bufTag; /* identity of requested block */
3988  LWLock *bufPartitionLock; /* buffer partition lock for it */
3989  int buf_id;
3990  BufferDesc *bufHdr;
3991  uint32 buf_state;
3992 
3993  /* create a tag so we can lookup the buffer */
3994  InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
3995 
3996  /* determine its hash code and partition lock ID */
3997  bufHash = BufTableHashCode(&bufTag);
3998  bufPartitionLock = BufMappingPartitionLock(bufHash);
3999 
4000  /* Check that it is in the buffer pool. If not, do nothing. */
4001  LWLockAcquire(bufPartitionLock, LW_SHARED);
4002  buf_id = BufTableLookup(&bufTag, bufHash);
4003  LWLockRelease(bufPartitionLock);
4004 
4005  if (buf_id < 0)
4006  continue;
4007 
4008  bufHdr = GetBufferDescriptor(buf_id);
4009 
4010  /*
4011  * We need to lock the buffer header and recheck if the buffer is
4012  * still associated with the same block because the buffer could be
4013  * evicted by some other backend loading blocks for a different
4014  * relation after we release lock on the BufMapping table.
4015  */
4016  buf_state = LockBufHdr(bufHdr);
4017 
4018  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
4019  BufTagGetForkNum(&bufHdr->tag) == forkNum &&
4020  bufHdr->tag.blockNum >= firstDelBlock)
4021  InvalidateBuffer(bufHdr); /* releases spinlock */
4022  else
4023  UnlockBufHdr(bufHdr, buf_state);
4024  }
4025 }

References buftag::blockNum, BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), BufferDesc::tag, and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 3437 of file bufmgr.c.

3439 {
3440  XLogRecPtr recptr;
3441  ErrorContextCallback errcallback;
3442  instr_time io_start;
3443  Block bufBlock;
3444  char *bufToWrite;
3445  uint32 buf_state;
3446 
3447  /*
3448  * Try to start an I/O operation. If StartBufferIO returns false, then
3449  * someone else flushed the buffer before we could, so we need not do
3450  * anything.
3451  */
3452  if (!StartBufferIO(buf, false))
3453  return;
3454 
3455  /* Setup error traceback support for ereport() */
3457  errcallback.arg = (void *) buf;
3458  errcallback.previous = error_context_stack;
3459  error_context_stack = &errcallback;
3460 
3461  /* Find smgr relation for buffer */
3462  if (reln == NULL)
3464 
3465  TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag),
3466  buf->tag.blockNum,
3468  reln->smgr_rlocator.locator.dbOid,
3470 
3471  buf_state = LockBufHdr(buf);
3472 
3473  /*
3474  * Run PageGetLSN while holding header lock, since we don't have the
3475  * buffer locked exclusively in all cases.
3476  */
3477  recptr = BufferGetLSN(buf);
3478 
3479  /* To check if block content changes while flushing. - vadim 01/17/97 */
3480  buf_state &= ~BM_JUST_DIRTIED;
3481  UnlockBufHdr(buf, buf_state);
3482 
3483  /*
3484  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
3485  * rule that log updates must hit disk before any of the data-file changes
3486  * they describe do.
3487  *
3488  * However, this rule does not apply to unlogged relations, which will be
3489  * lost after a crash anyway. Most unlogged relation pages do not bear
3490  * LSNs since we never emit WAL records for them, and therefore flushing
3491  * up through the buffer LSN would be useless, but harmless. However,
3492  * GiST indexes use LSNs internally to track page-splits, and therefore
3493  * unlogged GiST pages bear "fake" LSNs generated by
3494  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
3495  * LSN counter could advance past the WAL insertion point; and if it did
3496  * happen, attempting to flush WAL through that location would fail, with
3497  * disastrous system-wide consequences. To make sure that can't happen,
3498  * skip the flush if the buffer isn't permanent.
3499  */
3500  if (buf_state & BM_PERMANENT)
3501  XLogFlush(recptr);
3502 
3503  /*
3504  * Now it's safe to write buffer to disk. Note that no one else should
3505  * have been able to write it while we were busy with log flushing because
3506  * only one process at a time can set the BM_IO_IN_PROGRESS bit.
3507  */
3508  bufBlock = BufHdrGetBlock(buf);
3509 
3510  /*
3511  * Update page checksum if desired. Since we have only shared lock on the
3512  * buffer, other processes might be updating hint bits in it, so we must
3513  * copy the page to private storage if we do checksumming.
3514  */
3515  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
3516 
3518 
3519  /*
3520  * bufToWrite is either the shared buffer or a copy, as appropriate.
3521  */
3522  smgrwrite(reln,
3523  BufTagGetForkNum(&buf->tag),
3524  buf->tag.blockNum,
3525  bufToWrite,
3526  false);
3527 
3528  /*
3529  * When a strategy is in use, only flushes of dirty buffers already in the
3530  * strategy ring are counted as strategy writes (IOCONTEXT
3531  * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
3532  * statistics tracking.
3533  *
3534  * If a shared buffer initially added to the ring must be flushed before
3535  * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
3536  *
3537  * If a shared buffer which was added to the ring later because the
3538  * current strategy buffer is pinned or in use or because all strategy
3539  * buffers were dirty and rejected (for BAS_BULKREAD operations only)
3540  * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
3541  * (from_ring will be false).
3542  *
3543  * When a strategy is not in use, the write can only be a "regular" write
3544  * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
3545  */
3547  IOOP_WRITE, io_start, 1);
3548 
3550 
3551  /*
3552  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
3553  * end the BM_IO_IN_PROGRESS state.
3554  */
3555  TerminateBufferIO(buf, true, 0, true);
3556 
3557  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(BufTagGetForkNum(&buf->tag),
3558  buf->tag.blockNum,
3560  reln->smgr_rlocator.locator.dbOid,
3562 
3563  /* Pop the error context stack */
3564  error_context_stack = errcallback.previous;
3565 }
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:64
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:5323
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1510
ErrorContextCallback * error_context_stack
Definition: elog.c:94
@ IOOP_WRITE
Definition: pgstat.h:304
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.h:121
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2728

References ErrorContextCallback::arg, BM_JUST_DIRTIED, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, RelFileLocator::dbOid, error_context_stack, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITE, RelFileLocatorBackend::locator, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), ErrorContextCallback::previous, RelFileLocator::relNumber, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rlocator, smgropen(), smgrwrite(), RelFileLocator::spcOid, StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdr(), and XLogFlush().

Referenced by FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), and SyncOneBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 4498 of file bufmgr.c.

4499 {
4500  int i;
4501  BufferDesc *bufHdr;
4502 
4503  for (i = 0; i < NBuffers; i++)
4504  {
4505  uint32 buf_state;
4506 
4507  bufHdr = GetBufferDescriptor(i);
4508 
4509  /*
4510  * As in DropRelationBuffers, an unlocked precheck should be safe and
4511  * saves some cycles.
4512  */
4513  if (bufHdr->tag.dbOid != dbid)
4514  continue;
4515 
4516  /* Make sure we can handle the pin */
4519 
4520  buf_state = LockBufHdr(bufHdr);
4521  if (bufHdr->tag.dbOid == dbid &&
4522  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4523  {
4524  PinBuffer_Locked(bufHdr);
4528  UnpinBuffer(bufHdr);
4529  }
4530  else
4531  UnlockBufHdr(bufHdr, buf_state);
4532  }
4533 }
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:3437
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:2416

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), CurrentResourceOwner, buftag::dbOid, FlushBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 4540 of file bufmgr.c.

4541 {
4542  BufferDesc *bufHdr;
4543 
4544  /* currently not needed, but no fundamental reason not to support */
4545  Assert(!BufferIsLocal(buffer));
4546 
4547  Assert(BufferIsPinned(buffer));
4548 
4549  bufHdr = GetBufferDescriptor(buffer - 1);
4550 
4552 
4554 }
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1897

References Assert(), PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 4145 of file bufmgr.c.

4146 {
4147  int i;
4148  BufferDesc *bufHdr;
4149  SMgrRelation srel = RelationGetSmgr(rel);
4150 
4151  if (RelationUsesLocalBuffers(rel))
4152  {
4153  for (i = 0; i < NLocBuffer; i++)
4154  {
4155  uint32 buf_state;
4156  instr_time io_start;
4157 
4158  bufHdr = GetLocalBufferDescriptor(i);
4159  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4160  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
4161  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4162  {
4163  ErrorContextCallback errcallback;
4164  Page localpage;
4165 
4166  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
4167 
4168  /* Setup error traceback support for ereport() */
4170  errcallback.arg = (void *) bufHdr;
4171  errcallback.previous = error_context_stack;
4172  error_context_stack = &errcallback;
4173 
4174  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
4175 
4177 
4178  smgrwrite(srel,
4179  BufTagGetForkNum(&bufHdr->tag),
4180  bufHdr->tag.blockNum,
4181  localpage,
4182  false);
4183 
4186  io_start, 1);
4187 
4188  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
4189  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
4190 
4192 
4193  /* Pop the error context stack */
4194  error_context_stack = errcallback.previous;
4195  }
4196  }
4197 
4198  return;
4199  }
4200 
4201  for (i = 0; i < NBuffers; i++)
4202  {
4203  uint32 buf_state;
4204 
4205  bufHdr = GetBufferDescriptor(i);
4206 
4207  /*
4208  * As in DropRelationBuffers, an unlocked precheck should be safe and
4209  * saves some cycles.
4210  */
4211  if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator))
4212  continue;
4213 
4214  /* Make sure we can handle the pin */
4217 
4218  buf_state = LockBufHdr(bufHdr);
4219  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
4220  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4221  {
4222  PinBuffer_Locked(bufHdr);
4226  UnpinBuffer(bufHdr);
4227  }
4228  else
4229  UnlockBufHdr(bufHdr, buf_state);
4230  }
4231 }
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:290
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:67
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:5343
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1542
int NLocBuffer
Definition: localbuf.c:42
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:281
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:637
int64 local_blks_written
Definition: instrument.h:33
RelFileLocator rd_locator
Definition: rel.h:57

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_WRITE, BufferUsage::local_blks_written, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), smgrwrite(), BufferDesc::state, BufferDesc::tag, track_io_timing, UnlockBufHdr(), and UnpinBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 4243 of file bufmgr.c.

4244 {
4245  int i;
4246  SMgrSortArray *srels;
4247  bool use_bsearch;
4248 
4249  if (nrels == 0)
4250  return;
4251 
4252  /* fill-in array for qsort */
4253  srels = palloc(sizeof(SMgrSortArray) * nrels);
4254 
4255  for (i = 0; i < nrels; i++)
4256  {
4257  Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
4258 
4259  srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
4260  srels[i].srel = smgrs[i];
4261  }
4262 
4263  /*
4264  * Save the bsearch overhead for low number of relations to sync. See
4265  * DropRelationsAllBuffers for details.
4266  */
4267  use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
4268 
4269  /* sort the list of SMgrRelations if necessary */
4270  if (use_bsearch)
4271  qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
4272 
4273  for (i = 0; i < NBuffers; i++)
4274  {
4275  SMgrSortArray *srelent = NULL;
4276  BufferDesc *bufHdr = GetBufferDescriptor(i);
4277  uint32 buf_state;
4278 
4279  /*
4280  * As in DropRelationBuffers, an unlocked precheck should be safe and
4281  * saves some cycles.
4282  */
4283 
4284  if (!use_bsearch)
4285  {
4286  int j;
4287 
4288  for (j = 0; j < nrels; j++)
4289  {
4290  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
4291  {
4292  srelent = &srels[j];
4293  break;
4294  }
4295  }
4296  }
4297  else
4298  {
4299  RelFileLocator rlocator;
4300 
4301  rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4302  srelent = bsearch((const void *) &(rlocator),
4303  srels, nrels, sizeof(SMgrSortArray),
4305  }
4306 
4307  /* buffer doesn't belong to any of the given relfilelocators; skip it */
4308  if (srelent == NULL)
4309  continue;
4310 
4311  /* Make sure we can handle the pin */
4314 
4315  buf_state = LockBufHdr(bufHdr);
4316  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
4317  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
4318  {
4319  PinBuffer_Locked(bufHdr);
4321  FlushBuffer(bufHdr, srelent->srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
4323  UnpinBuffer(bufHdr);
4324  }
4325  else
4326  UnlockBufHdr(bufHdr, buf_state);
4327  }
4328 
4329  pfree(srels);
4330 }
SMgrRelation srel
Definition: bufmgr.c:131
RelFileLocator rlocator
Definition: bufmgr.c:130

References Assert(), BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, RelFileLocatorBackend::locator, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, palloc(), pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrSortArray::rlocator, rlocator_comparator(), SMgrRelationData::smgr_rlocator, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 427 of file bufmgr.c.

428 {
429  Assert(ref->refcount == 0);
430 
431  if (ref >= &PrivateRefCountArray[0] &&
433  {
434  ref->buffer = InvalidBuffer;
435 
436  /*
437  * Mark the just used entry as reserved - in many scenarios that
438  * allows us to avoid ever having to search the array/hash for free
439  * entries.
440  */
441  ReservedRefCountEntry = ref;
442  }
443  else
444  {
445  bool found;
446  Buffer buffer = ref->buffer;
447 
448  hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found);
449  Assert(found);
452  }
453 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:200
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_REMOVE
Definition: hsearch.h:115

References Assert(), PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountEntry.

Referenced by UnpinBufferNoOwner().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 404 of file bufmgr.c.

405 {
407 
408  Assert(BufferIsValid(buffer));
409  Assert(!BufferIsLocal(buffer));
410 
411  /*
412  * Not moving the entry - that's ok for the current users, but we might
413  * want to change this one day.
414  */
415  ref = GetPrivateRefCountEntry(buffer, false);
416 
417  if (ref == NULL)
418  return 0;
419  return ref->refcount;
420 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:330

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), MarkBufferDirtyHint(), and ReadRecentBuffer().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 330 of file bufmgr.c.

331 {
333  int i;
334 
335  Assert(BufferIsValid(buffer));
336  Assert(!BufferIsLocal(buffer));
337 
338  /*
339  * First search for references in the array, that'll be sufficient in the
340  * majority of cases.
341  */
342  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
343  {
345 
346  if (res->buffer == buffer)
347  return res;
348  }
349 
350  /*
351  * By here we know that the buffer, if already pinned, isn't residing in
352  * the array.
353  *
354  * Only look up the buffer in the hashtable if we've previously overflowed
355  * into it.
356  */
357  if (PrivateRefCountOverflowed == 0)
358  return NULL;
359 
360  res = hash_search(PrivateRefCountHash, &buffer, HASH_FIND, NULL);
361 
362  if (res == NULL)
363  return NULL;
364  else if (!do_move)
365  {
366  /* caller doesn't want us to move the hash entry into the array */
367  return res;
368  }
369  else
370  {
371  /* move buffer from hashtable into the free array slot */
372  bool found;
374 
375  /* Ensure there's a free array slot */
377 
378  /* Use up the reserved slot */
379  Assert(ReservedRefCountEntry != NULL);
381  ReservedRefCountEntry = NULL;
382  Assert(free->buffer == InvalidBuffer);
383 
384  /* and fill it */
385  free->buffer = buffer;
386  free->refcount = res->refcount;
387 
388  /* delete from hashtable */
389  hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found);
390  Assert(found);
393 
394  return free;
395  }
396 }
#define free(a)
Definition: header.h:65
@ HASH_FIND
Definition: hsearch.h:113

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBufferNoOwner().

◆ GetVictimBuffer()

static Buffer GetVictimBuffer ( BufferAccessStrategy  strategy,
IOContext  io_context 
)
static

Definition at line 1607 of file bufmgr.c.

1608 {
1609  BufferDesc *buf_hdr;
1610  Buffer buf;
1611  uint32 buf_state;
1612  bool from_ring;
1613 
1614  /*
1615  * Ensure, while the spinlock's not yet held, that there's a free refcount
1616  * entry, and a resource owner slot for the pin.
1617  */
1620 
1621  /* we return here if a prospective victim buffer gets used concurrently */
1622 again:
1623 
1624  /*
1625  * Select a victim buffer. The buffer is returned with its header
1626  * spinlock still held!
1627  */
1628  buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring);
1629  buf = BufferDescriptorGetBuffer(buf_hdr);
1630 
1631  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1632 
1633  /* Pin the buffer and then release the buffer spinlock */
1634  PinBuffer_Locked(buf_hdr);
1635 
1636  /*
1637  * We shouldn't have any other pins for this buffer.
1638  */
1640 
1641  /*
1642  * If the buffer was dirty, try to write it out. There is a race
1643  * condition here, in that someone might dirty it after we released the
1644  * buffer header lock above, or even while we are writing it out (since
1645  * our share-lock won't prevent hint-bit updates). We will recheck the
1646  * dirty bit after re-locking the buffer header.
1647  */
1648  if (buf_state & BM_DIRTY)
1649  {
1650  LWLock *content_lock;
1651 
1652  Assert(buf_state & BM_TAG_VALID);
1653  Assert(buf_state & BM_VALID);
1654 
1655  /*
1656  * We need a share-lock on the buffer contents to write it out (else
1657  * we might write invalid data, eg because someone else is compacting
1658  * the page contents while we write). We must use a conditional lock
1659  * acquisition here to avoid deadlock. Even though the buffer was not
1660  * pinned (and therefore surely not locked) when StrategyGetBuffer
1661  * returned it, someone else could have pinned and exclusive-locked it
1662  * by the time we get here. If we try to get the lock unconditionally,
1663  * we'd block waiting for them; if they later block waiting for us,
1664  * deadlock ensues. (This has been observed to happen when two
1665  * backends are both trying to split btree index pages, and the second
1666  * one just happens to be trying to split the page the first one got
1667  * from StrategyGetBuffer.)
1668  */
1669  content_lock = BufferDescriptorGetContentLock(buf_hdr);
1670  if (!LWLockConditionalAcquire(content_lock, LW_SHARED))
1671  {
1672  /*
1673  * Someone else has locked the buffer, so give it up and loop back
1674  * to get another one.
1675  */
1676  UnpinBuffer(buf_hdr);
1677  goto again;
1678  }
1679 
1680  /*
1681  * If using a nondefault strategy, and writing the buffer would
1682  * require a WAL flush, let the strategy decide whether to go ahead
1683  * and write/reuse the buffer or to choose another victim. We need a
1684  * lock to inspect the page LSN, so this can't be done inside
1685  * StrategyGetBuffer.
1686  */
1687  if (strategy != NULL)
1688  {
1689  XLogRecPtr lsn;
1690 
1691  /* Read the LSN while holding buffer header lock */
1692  buf_state = LockBufHdr(buf_hdr);
1693  lsn = BufferGetLSN(buf_hdr);
1694  UnlockBufHdr(buf_hdr, buf_state);
1695 
1696  if (XLogNeedsFlush(lsn)
1697  && StrategyRejectBuffer(strategy, buf_hdr, from_ring))
1698  {
1699  LWLockRelease(content_lock);
1700  UnpinBuffer(buf_hdr);
1701  goto again;
1702  }
1703  }
1704 
1705  /* OK, do the I/O */
1706  FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
1707  LWLockRelease(content_lock);
1708 
1710  &buf_hdr->tag);
1711  }
1712 
1713 
1714  if (buf_state & BM_VALID)
1715  {
1716  /*
1717  * When a BufferAccessStrategy is in use, blocks evicted from shared
1718  * buffers are counted as IOOP_EVICT in the corresponding context
1719  * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
1720  * strategy in two cases: 1) while initially claiming buffers for the
1721  * strategy ring 2) to replace an existing strategy ring buffer
1722  * because it is pinned or in use and cannot be reused.
1723  *
1724  * Blocks evicted from buffers already in the strategy ring are
1725  * counted as IOOP_REUSE in the corresponding strategy context.
1726  *
1727  * At this point, we can accurately count evictions and reuses,
1728  * because we have successfully claimed the valid buffer. Previously,
1729  * we may have been forced to release the buffer due to concurrent
1730  * pinners or erroring out.
1731  */
1733  from_ring ? IOOP_REUSE : IOOP_EVICT);
1734  }
1735 
1736  /*
1737  * If the buffer has an entry in the buffer mapping table, delete it. This
1738  * can fail because another backend could have pinned or dirtied the
1739  * buffer.
1740  */
1741  if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr))
1742  {
1743  UnpinBuffer(buf_hdr);
1744  goto again;
1745  }
1746 
1747  /* a final set of sanity checks */
1748 #ifdef USE_ASSERT_CHECKING
1749  buf_state = pg_atomic_read_u32(&buf_hdr->state);
1750 
1751  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
1752  Assert(!(buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY)));
1753 
1755 #endif
1756 
1757  return buf;
1758 }
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition: bufmgr.c:4842
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
Definition: bufmgr.c:1539
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
Definition: bufmgr.c:5544
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
Definition: freelist.c:196
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
Definition: freelist.c:756
@ IOOP_EVICT
Definition: pgstat.h:298
@ IOOP_REUSE
Definition: pgstat.h:303
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
Definition: pgstat_io.c:77
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3061

References Assert(), BackendWritebackContext, BM_DIRTY, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferGetLSN, CheckBufferIsPinnedOnce(), CurrentResourceOwner, FlushBuffer(), InvalidateVictimBuffer(), IOOBJECT_RELATION, IOOP_EVICT, IOOP_REUSE, LockBufHdr(), LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), pg_atomic_read_u32(), pgstat_count_io_op(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::state, StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by BufferAlloc(), and ExtendBufferedRelShared().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 5010 of file bufmgr.c.

5011 {
5012  int bufid = GetStartupBufferPinWaitBufId();
5013 
5014  /*
5015  * If we get woken slowly then it's possible that the Startup process was
5016  * already woken by other backends before we got here. Also possible that
5017  * we get here by multiple interrupts or interrupts at inappropriate
5018  * times, so make sure we do nothing if the bufid is not set.
5019  */
5020  if (bufid < 0)
5021  return false;
5022 
5023  if (GetPrivateRefCount(bufid + 1) > 0)
5024  return true;
5025 
5026  return false;
5027 }
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:671

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 4592 of file bufmgr.c.

4593 {
4594  Assert(BufferIsPinned(buffer));
4596  if (BufferIsLocal(buffer))
4597  LocalRefCount[-buffer - 1]++;
4598  else
4599  {
4600  PrivateRefCountEntry *ref;
4601 
4602  ref = GetPrivateRefCountEntry(buffer, true);
4603  Assert(ref != NULL);
4604  ref->refcount++;
4605  }
4607 }
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlarge(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), RelationAddBlocks(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferPoolAccess()

void InitBufferPoolAccess ( void  )

Definition at line 3229 of file bufmgr.c.

3230 {
3231  HASHCTL hash_ctl;
3232 
3233  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
3234 
3235  hash_ctl.keysize = sizeof(int32);
3236  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
3237 
3238  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
3239  HASH_ELEM | HASH_BLOBS);
3240 
3241  /*
3242  * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
3243  * the corresponding phase of backend shutdown.
3244  */
3245  Assert(MyProc != NULL);
3247 }
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:3254
struct PrivateRefCountEntry PrivateRefCountEntry
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
PGPROC * MyProc
Definition: proc.c:66
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76

References Assert(), AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MyProc, on_shmem_exit(), PrivateRefCountArray, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1441 of file bufmgr.c.

1442 {
1443  BufferTag oldTag;
1444  uint32 oldHash; /* hash value for oldTag */
1445  LWLock *oldPartitionLock; /* buffer partition lock for it */
1446  uint32 oldFlags;
1447  uint32 buf_state;
1448 
1449  /* Save the original buffer tag before dropping the spinlock */
1450  oldTag = buf->tag;
1451 
1452  buf_state = pg_atomic_read_u32(&buf->state);
1453  Assert(buf_state & BM_LOCKED);
1454  UnlockBufHdr(buf, buf_state);
1455 
1456  /*
1457  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1458  * worth storing the hashcode in BufferDesc so we need not recompute it
1459  * here? Probably not.
1460  */
1461  oldHash = BufTableHashCode(&oldTag);
1462  oldPartitionLock = BufMappingPartitionLock(oldHash);
1463 
1464 retry:
1465 
1466  /*
1467  * Acquire exclusive mapping lock in preparation for changing the buffer's
1468  * association.
1469  */
1470  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1471 
1472  /* Re-lock the buffer header */
1473  buf_state = LockBufHdr(buf);
1474 
1475  /* If it's changed while we were waiting for lock, do nothing */
1476  if (!BufferTagsEqual(&buf->tag, &oldTag))
1477  {
1478  UnlockBufHdr(buf, buf_state);
1479  LWLockRelease(oldPartitionLock);
1480  return;
1481  }
1482 
1483  /*
1484  * We assume the only reason for it to be pinned is that someone else is
1485  * flushing the page out. Wait for them to finish. (This could be an
1486  * infinite loop if the refcount is messed up... it would be nice to time
1487  * out after awhile, but there seems no way to be sure how many loops may
1488  * be needed. Note that if the other guy has pinned the buffer but not
1489  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1490  * be busy-looping here.)
1491  */
1492  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1493  {
1494  UnlockBufHdr(buf, buf_state);
1495  LWLockRelease(oldPartitionLock);
1496  /* safety check: should definitely not be our *own* pin */
1498  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1499  WaitIO(buf);
1500  goto retry;
1501  }
1502 
1503  /*
1504  * Clear out the buffer's tag and flags. We must do this to ensure that
1505  * linear scans of the buffer array don't think the buffer is valid.
1506  */
1507  oldFlags = buf_state & BUF_FLAG_MASK;
1508  ClearBufferTag(&buf->tag);
1509  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1510  UnlockBufHdr(buf, buf_state);
1511 
1512  /*
1513  * Remove the buffer from the lookup hashtable, if it was in there.
1514  */
1515  if (oldFlags & BM_TAG_VALID)
1516  BufTableDelete(&oldTag, oldHash);
1517 
1518  /*
1519  * Done with mapping lock.
1520  */
1521  LWLockRelease(oldPartitionLock);
1522 
1523  /*
1524  * Insert the buffer at the head of the list of free buffers.
1525  */
1527 }
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:45
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
#define BM_LOCKED
Definition: buf_internals.h:60
static void ClearBufferTag(BufferTag *tag)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:148
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:5146

References Assert(), BM_LOCKED, BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), StrategyFreeBuffer(), UnlockBufHdr(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ InvalidateVictimBuffer()

static bool InvalidateVictimBuffer ( BufferDesc buf_hdr)
static

Definition at line 1539 of file bufmgr.c.

1540 {
1541  uint32 buf_state;
1542  uint32 hash;
1543  LWLock *partition_lock;
1544  BufferTag tag;
1545 
1547 
1548  /* have buffer pinned, so it's safe to read tag without lock */
1549  tag = buf_hdr->tag;
1550 
1551  hash = BufTableHashCode(&tag);
1552  partition_lock = BufMappingPartitionLock(hash);
1553 
1554  LWLockAcquire(partition_lock, LW_EXCLUSIVE);
1555 
1556  /* lock the buffer header */
1557  buf_state = LockBufHdr(buf_hdr);
1558 
1559  /*
1560  * We have the buffer pinned nobody else should have been able to unset
1561  * this concurrently.
1562  */
1563  Assert(buf_state & BM_TAG_VALID);
1564  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1565  Assert(BufferTagsEqual(&buf_hdr->tag, &tag));
1566 
1567  /*
1568  * If somebody else pinned the buffer since, or even worse, dirtied it,
1569  * give up on this buffer: It's clearly in use.
1570  */
1571  if (BUF_STATE_GET_REFCOUNT(buf_state) != 1 || (buf_state & BM_DIRTY))
1572  {
1573  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1574 
1575  UnlockBufHdr(buf_hdr, buf_state);
1576  LWLockRelease(partition_lock);
1577 
1578  return false;
1579  }
1580 
1581  /*
1582  * Clear out the buffer's tag and flags and usagecount. This is not
1583  * strictly required, as BM_TAG_VALID/BM_VALID needs to be checked before
1584  * doing anything with the buffer. But currently it's beneficial, as the
1585  * cheaper pre-check for several linear scans of shared buffers use the
1586  * tag (see e.g. FlushDatabaseBuffers()).
1587  */
1588  ClearBufferTag(&buf_hdr->tag);
1589  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1590  UnlockBufHdr(buf_hdr, buf_state);
1591 
1592  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1593 
1594  /* finally delete buffer from the buffer mapping table */
1595  BufTableDelete(&tag, hash);
1596 
1597  LWLockRelease(partition_lock);
1598 
1599  Assert(!(buf_state & (BM_DIRTY | BM_VALID | BM_TAG_VALID)));
1600  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1602 
1603  return true;
1604 }

References Assert(), BM_DIRTY, BM_TAG_VALID, BM_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), GetPrivateRefCount(), hash(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by GetVictimBuffer().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 5092 of file bufmgr.c.

5093 {
5094  BufferDesc *bufHdr;
5095  uint32 buf_state;
5096 
5097  Assert(BufferIsValid(buffer));
5098 
5099  if (BufferIsLocal(buffer))
5100  {
5101  /* There should be exactly one pin */
5102  if (LocalRefCount[-buffer - 1] != 1)
5103  return false;
5104  /* Nobody else to wait for */
5105  return true;
5106  }
5107 
5108  /* There should be exactly one local pin */
5109  if (GetPrivateRefCount(buffer) != 1)
5110  return false;
5111 
5112  bufHdr = GetBufferDescriptor(buffer - 1);
5113 
5114  /* caller must hold exclusive lock on buffer */
5116  LW_EXCLUSIVE));
5117 
5118  buf_state = LockBufHdr(bufHdr);
5119 
5120  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5121  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5122  {
5123  /* pincount is OK. */
5124  UnlockBufHdr(bufHdr, buf_state);
5125  return true;
5126  }
5127 
5128  UnlockBufHdr(bufHdr, buf_state);
5129  return false;
5130 }

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsValid(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext wb_context,
IOContext  io_context 
)

Definition at line 5589 of file bufmgr.c.

5590 {
5591  instr_time io_start;
5592  int i;
5593 
5594  if (wb_context->nr_pending == 0)
5595  return;
5596 
5597  /*
5598  * Executing the writes in-order can make them a lot faster, and allows to
5599  * merge writeback requests to consecutive blocks into larger writebacks.
5600  */
5601  sort_pending_writebacks(wb_context->pending_writebacks,
5602  wb_context->nr_pending);
5603 
5605 
5606  /*
5607  * Coalesce neighbouring writes, but nothing else. For that we iterate
5608  * through the, now sorted, array of pending flushes, and look forward to
5609  * find all neighbouring (or identical) writes.
5610  */
5611  for (i = 0; i < wb_context->nr_pending; i++)
5612  {
5615  SMgrRelation reln;
5616  int ahead;
5617  BufferTag tag;
5618  RelFileLocator currlocator;
5619  Size nblocks = 1;
5620 
5621  cur = &wb_context->pending_writebacks[i];
5622  tag = cur->tag;
5623  currlocator = BufTagGetRelFileLocator(&tag);
5624 
5625  /*
5626  * Peek ahead, into following writeback requests, to see if they can
5627  * be combined with the current one.
5628  */
5629  for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
5630  {
5631 
5632  next = &wb_context->pending_writebacks[i + ahead + 1];
5633 
5634  /* different file, stop */
5635  if (!RelFileLocatorEquals(currlocator,
5636  BufTagGetRelFileLocator(&next->tag)) ||
5637  BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
5638  break;
5639 
5640  /* ok, block queued twice, skip */
5641  if (cur->tag.blockNum == next->tag.blockNum)
5642  continue;
5643 
5644  /* only merge consecutive writes */
5645  if (cur->tag.blockNum + 1 != next->tag.blockNum)
5646  break;
5647 
5648  nblocks++;
5649  cur = next;
5650  }
5651 
5652  i += ahead;
5653 
5654  /* and finally tell the kernel to write the data to storage */
5655  reln = smgropen(currlocator, INVALID_PROC_NUMBER);
5656  smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
5657  }
5658 
5659  /*
5660  * Assume that writeback requests are only issued for buffers containing
5661  * blocks of permanent relations.
5662  */
5664  IOOP_WRITEBACK, io_start, wb_context->nr_pending);
5665 
5666  wb_context->nr_pending = 0;
5667 }
static int32 next
Definition: blutils.c:221
struct cursor * cur
Definition: ecpg.c:28
@ IOOP_WRITEBACK
Definition: pgstat.h:305
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:643
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, i, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITEBACK, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RelFileLocatorEquals, smgropen(), smgrwriteback(), and track_io_timing.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ LimitAdditionalPins()

static void LimitAdditionalPins ( uint32 additional_pins)
static

Definition at line 1773 of file bufmgr.c.

1774 {
1775  uint32 max_backends;
1776  int max_proportional_pins;
1777 
1778  if (*additional_pins <= 1)
1779  return;
1780 
1781  max_backends = MaxBackends + NUM_AUXILIARY_PROCS;
1782  max_proportional_pins = NBuffers / max_backends;
1783 
1784  /*
1785  * Subtract the approximate number of buffers already pinned by this
1786  * backend. We get the number of "overflowed" pins for free, but don't
1787  * know the number of pins in PrivateRefCountArray. The cost of
1788  * calculating that exactly doesn't seem worth it, so just assume the max.
1789  */
1790  max_proportional_pins -= PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES;
1791 
1792  if (max_proportional_pins <= 0)
1793  max_proportional_pins = 1;
1794 
1795  if (*additional_pins > max_proportional_pins)
1796  *additional_pins = max_proportional_pins;
1797 }
int MaxBackends
Definition: globals.c:143
#define NUM_AUXILIARY_PROCS
Definition: proc.h:440

References MaxBackends, NBuffers, NUM_AUXILIARY_PROCS, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by ExtendBufferedRelShared().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 5343 of file bufmgr.c.

5344 {
5345  BufferDesc *bufHdr = (BufferDesc *) arg;
5346 
5347  if (bufHdr != NULL)
5348  {
5349  char *path = relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
5350  MyProcNumber,
5351  BufTagGetForkNum(&bufHdr->tag));
5352 
5353  errcontext("writing block %u of relation %s",
5354  bufHdr->tag.blockNum, path);
5355  pfree(path);
5356  }
5357 }
#define errcontext
Definition: elog.h:196
void * arg

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, MyProcNumber, pfree(), relpathbackend, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 4795 of file bufmgr.c.

4796 {
4797  BufferDesc *buf;
4798 
4799  Assert(BufferIsPinned(buffer));
4800  if (BufferIsLocal(buffer))
4801  return; /* local buffers need no lock */
4802 
4803  buf = GetBufferDescriptor(buffer - 1);
4804 
4805  if (mode == BUFFER_LOCK_UNLOCK)
4807  else if (mode == BUFFER_LOCK_SHARE)
4809  else if (mode == BUFFER_LOCK_EXCLUSIVE)
4811  else
4812  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
4813 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:159

References Assert(), buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), and mode.

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgetpage(), heapgettup(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), ScanSourceDatabasePgClass(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 4875 of file bufmgr.c.

4876 {
4877  BufferDesc *bufHdr;
4878  TimestampTz waitStart = 0;
4879  bool waiting = false;
4880  bool logged_recovery_conflict = false;
4881 
4882  Assert(BufferIsPinned(buffer));
4883  Assert(PinCountWaitBuf == NULL);
4884 
4885  CheckBufferIsPinnedOnce(buffer);
4886 
4887  /* Nobody else to wait for */
4888  if (BufferIsLocal(buffer))
4889  return;
4890 
4891  bufHdr = GetBufferDescriptor(buffer - 1);
4892 
4893  for (;;)
4894  {
4895  uint32 buf_state;
4896 
4897  /* Try to acquire lock */
4899  buf_state = LockBufHdr(bufHdr);
4900 
4901  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4902  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4903  {
4904  /* Successfully acquired exclusive lock with pincount 1 */
4905  UnlockBufHdr(bufHdr, buf_state);
4906 
4907  /*
4908  * Emit the log message if recovery conflict on buffer pin was
4909  * resolved but the startup process waited longer than
4910  * deadlock_timeout for it.
4911  */
4912  if (logged_recovery_conflict)
4914  waitStart, GetCurrentTimestamp(),
4915  NULL, false);
4916 
4917  if (waiting)
4918  {
4919  /* reset ps display to remove the suffix if we added one */
4921  waiting = false;
4922  }
4923  return;
4924  }
4925  /* Failed, so mark myself as waiting for pincount 1 */
4926  if (buf_state & BM_PIN_COUNT_WAITER)
4927  {
4928  UnlockBufHdr(bufHdr, buf_state);
4929  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4930  elog(ERROR, "multiple backends attempting to wait for pincount 1");
4931  }
4933  PinCountWaitBuf = bufHdr;
4934  buf_state |= BM_PIN_COUNT_WAITER;
4935  UnlockBufHdr(bufHdr, buf_state);
4936  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4937 
4938  /* Wait to be signaled by UnpinBuffer() */
4939  if (InHotStandby)
4940  {
4941  if (!waiting)
4942  {
4943  /* adjust the process title to indicate that it's waiting */
4944  set_ps_display_suffix("waiting");
4945  waiting = true;
4946  }
4947 
4948  /*
4949  * Emit the log message if the startup process is waiting longer
4950  * than deadlock_timeout for recovery conflict on buffer pin.
4951  *
4952  * Skip this if first time through because the startup process has
4953  * not started waiting yet in this case. So, the wait start
4954  * timestamp is set after this logic.
4955  */
4956  if (waitStart != 0 && !logged_recovery_conflict)
4957  {
4959 
4960  if (TimestampDifferenceExceeds(waitStart, now,
4961  DeadlockTimeout))
4962  {
4964  waitStart, now, NULL, true);
4965  logged_recovery_conflict = true;
4966  }
4967  }
4968 
4969  /*
4970  * Set the wait start timestamp if logging is enabled and first
4971  * time through.
4972  */
4973  if (log_recovery_conflict_waits && waitStart == 0)
4974  waitStart = GetCurrentTimestamp();
4975 
4976  /* Publish the bufid that Startup process waits on */
4977  SetStartupBufferPinWaitBufId(buffer - 1);
4978  /* Set alarm and then wait to be signaled by UnpinBuffer() */
4980  /* Reset the published bufid */
4982  }
4983  else
4984  ProcWaitForSignal(WAIT_EVENT_BUFFER_PIN);
4985 
4986  /*
4987  * Remove flag marking us as waiter. Normally this will not be set
4988  * anymore, but ProcWaitForSignal() can return for other signals as
4989  * well. We take care to only reset the flag if we're the waiter, as
4990  * theoretically another backend could have started waiting. That's
4991  * impossible with the current usages due to table level locking, but
4992  * better be safe.
4993  */
4994  buf_state = LockBufHdr(bufHdr);
4995  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
4997  buf_state &= ~BM_PIN_COUNT_WAITER;
4998  UnlockBufHdr(bufHdr, buf_state);
4999 
5000  PinCountWaitBuf = NULL;
5001  /* Loop back and try again */
5002  }
5003 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1790
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1618
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:67
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:164
int64 TimestampTz
Definition: timestamp.h:39
static volatile sig_atomic_t waiting
Definition: latch.c:162
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:394
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:342
int DeadlockTimeout
Definition: proc.c:57
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:659
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1866
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:792
bool log_recovery_conflict_waits
Definition: standby.c:41
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:273
int wait_backend_pgprocno
#define InHotStandby
Definition: xlogutils.h:57

References Assert(), BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), BufferDesc::wait_backend_pgprocno, and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), ReadBuffer_common(), and XLogReadBufferForRedoExtended().

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 5390 of file bufmgr.c.

5391 {
5392  SpinDelayStatus delayStatus;
5393  uint32 old_buf_state;
5394 
5396 
5397  init_local_spin_delay(&delayStatus);
5398 
5399  while (true)
5400  {
5401  /* set BM_LOCKED flag */
5402  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
5403  /* if it wasn't set before we're OK */
5404  if (!(old_buf_state & BM_LOCKED))
5405  break;
5406  perform_spin_delay(&delayStatus);
5407  }
5408  finish_spin_delay(&delayStatus);
5409  return old_buf_state | BM_LOCKED;
5410 }
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:405
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:132
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:192
#define init_local_spin_delay(status)
Definition: s_lock.h:843

References Assert(), BM_LOCKED, BufferDescriptorGetBuffer(), BufferIsLocal, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), ExtendBufferedRelShared(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), GetVictimBuffer(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadRecentBuffer(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBufferNoOwner(), and WaitIO().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 2189 of file bufmgr.c.

2190 {
2191  BufferDesc *bufHdr;
2192  uint32 buf_state;
2193  uint32 old_buf_state;
2194 
2195  if (!BufferIsValid(buffer))
2196  elog(ERROR, "bad buffer ID: %d", buffer);
2197 
2198  if (BufferIsLocal(buffer))
2199  {
2200  MarkLocalBufferDirty(buffer);
2201  return;
2202  }
2203 
2204  bufHdr = GetBufferDescriptor(buffer - 1);
2205 
2206  Assert(BufferIsPinned(buffer));
2208  LW_EXCLUSIVE));
2209 
2210  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
2211  for (;;)
2212  {
2213  if (old_buf_state & BM_LOCKED)
2214  old_buf_state = WaitBufHdrUnlocked(bufHdr);
2215 
2216  buf_state = old_buf_state;
2217 
2218  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2219  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
2220 
2221  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
2222  buf_state))
2223  break;
2224  }
2225 
2226  /*
2227  * If the buffer was not dirty already, do vacuum accounting.
2228  */
2229  if (!(old_buf_state & BM_DIRTY))
2230  {
2231  VacuumPageDirty++;
2233  if (VacuumCostActive)
2235  }
2236 }
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:344
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:5420
bool VacuumCostActive
Definition: globals.c:159
int64 VacuumPageDirty
Definition: globals.c:156
int VacuumCostBalance
Definition: globals.c:158
int VacuumCostPageDirty
Definition: globals.c:150
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:449
int64 shared_blks_dirtied
Definition: instrument.h:28

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), do_setval(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_freeze_execute_prepared(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune(), heap_xlog_update(), heap_xlog_vacuum(), heap_xlog_visible(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 4624 of file bufmgr.c.

4625 {
4626  BufferDesc *bufHdr;
4627  Page page = BufferGetPage(buffer);
4628 
4629  if (!BufferIsValid(buffer))
4630  elog(ERROR, "bad buffer ID: %d", buffer);
4631 
4632  if (BufferIsLocal(buffer))
4633  {
4634  MarkLocalBufferDirty(buffer);
4635  return;
4636  }
4637 
4638  bufHdr = GetBufferDescriptor(buffer - 1);
4639 
4640  Assert(GetPrivateRefCount(buffer) > 0);
4641  /* here, either share or exclusive lock is OK */
4643 
4644  /*
4645  * This routine might get called many times on the same page, if we are
4646  * making the first scan after commit of an xact that added/deleted many
4647  * tuples. So, be as quick as we can if the buffer is already dirty. We
4648  * do this by not acquiring spinlock if it looks like the status bits are
4649  * already set. Since we make this test unlocked, there's a chance we
4650  * might fail to notice that the flags have just been cleared, and failed
4651  * to reset them, due to memory-ordering issues. But since this function
4652  * is only intended to be used in cases where failing to write out the
4653  * data would be harmless anyway, it doesn't really matter.
4654  */
4655  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
4657  {
4659  bool dirtied = false;
4660  bool delayChkptFlags = false;
4661  uint32 buf_state;
4662 
4663  /*
4664  * If we need to protect hint bit updates from torn writes, WAL-log a
4665  * full page image of the page. This full page image is only necessary
4666  * if the hint bit update is the first change to the page since the
4667  * last checkpoint.
4668  *
4669  * We don't check full_page_writes here because that logic is included
4670  * when we call XLogInsert() since the value changes dynamically.
4671  */
4672  if (XLogHintBitIsNeeded() &&
4673  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
4674  {
4675  /*
4676  * If we must not write WAL, due to a relfilelocator-specific
4677  * condition or being in recovery, don't dirty the page. We can
4678  * set the hint, just not dirty the page as a result so the hint
4679  * is lost when we evict the page or shutdown.
4680  *
4681  * See src/backend/storage/page/README for longer discussion.
4682  */
4683  if (RecoveryInProgress() ||
4685  return;
4686 
4687  /*
4688  * If the block is already dirty because we either made a change
4689  * or set a hint already, then we don't need to write a full page
4690  * image. Note that aggressive cleaning of blocks dirtied by hint
4691  * bit setting would increase the call rate. Bulk setting of hint
4692  * bits would reduce the call rate...
4693  *
4694  * We must issue the WAL record before we mark the buffer dirty.
4695  * Otherwise we might write the page before we write the WAL. That
4696  * causes a race condition, since a checkpoint might occur between
4697  * writing the WAL record and marking the buffer dirty. We solve
4698  * that with a kluge, but one that is already in use during
4699  * transaction commit to prevent race conditions. Basically, we
4700  * simply prevent the checkpoint WAL record from being written
4701  * until we have marked the buffer dirty. We don't start the
4702  * checkpoint flush until we have marked dirty, so our checkpoint
4703  * must flush the change to disk successfully or the checkpoint
4704  * never gets written, so crash recovery will fix.
4705  *
4706  * It's possible we may enter here without an xid, so it is
4707  * essential that CreateCheckPoint waits for virtual transactions
4708  * rather than full transactionids.
4709  */
4712  delayChkptFlags = true;
4713  lsn = XLogSaveBufferForHint(buffer, buffer_std);
4714  }
4715 
4716  buf_state = LockBufHdr(bufHdr);
4717 
4718  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4719 
4720  if (!(buf_state & BM_DIRTY))
4721  {
4722  dirtied = true; /* Means "will be dirtied by this action" */
4723 
4724  /*
4725  * Set the page LSN if we wrote a backup block. We aren't supposed
4726  * to set this when only holding a share lock but as long as we
4727  * serialise it somehow we're OK. We choose to set LSN while
4728  * holding the buffer header lock, which causes any reader of an
4729  * LSN who holds only a share lock to also obtain a buffer header
4730  * lock before using PageGetLSN(), which is enforced in
4731  * BufferGetLSNAtomic().
4732  *
4733  * If checksums are enabled, you might think we should reset the
4734  * checksum here. That will happen when the page is written
4735  * sometime later in this checkpoint cycle.
4736  */
4737  if (!XLogRecPtrIsInvalid(lsn))
4738  PageSetLSN(page, lsn);
4739  }
4740 
4741  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
4742  UnlockBufHdr(bufHdr, buf_state);
4743 
4744  if (delayChkptFlags)
4746 
4747  if (dirtied)
4748  {
4749  VacuumPageDirty++;
4751  if (VacuumCostActive)
4753  }
4754  }
4755 }
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define DELAY_CHKPT_START
Definition: proc.h:114
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition: storage.c:532
int delayChkptFlags
Definition: proc.h:236
bool RecoveryInProgress(void)
Definition: xlog.c:6201
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:1065

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferGetPage(), BufferIsLocal, BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 304 of file bufmgr.c.

305 {
307 
308  /* only allowed to be called when a reservation has been made */
309  Assert(ReservedRefCountEntry != NULL);
310 
311  /* use up the reserved entry */
313  ReservedRefCountEntry = NULL;
314 
315  /* and fill it */
316  res->buffer = buffer;
317  res->refcount = 0;
318 
319  return res;
320 }

References Assert(), PrivateRefCountEntry::buffer, res, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 2310 of file bufmgr.c.

2311 {
2313  bool result;
2314  PrivateRefCountEntry *ref;
2315 
2316  Assert(!BufferIsLocal(b));
2317  Assert(ReservedRefCountEntry != NULL);
2318 
2319  ref = GetPrivateRefCountEntry(b, true);
2320 
2321  if (ref == NULL)
2322  {
2323  uint32 buf_state;
2324  uint32 old_buf_state;
2325 
2326  ref = NewPrivateRefCountEntry(b);
2327 
2328  old_buf_state = pg_atomic_read_u32(&buf->state);
2329  for (;;)
2330  {
2331  if (old_buf_state & BM_LOCKED)
2332  old_buf_state = WaitBufHdrUnlocked(buf);
2333 
2334  buf_state = old_buf_state;
2335 
2336  /* increase refcount */
2337  buf_state += BUF_REFCOUNT_ONE;
2338 
2339  if (strategy == NULL)
2340  {
2341  /* Default case: increase usagecount unless already max. */
2343  buf_state += BUF_USAGECOUNT_ONE;
2344  }
2345  else
2346  {
2347  /*
2348  * Ring buffers shouldn't evict others from pool. Thus we
2349  * don't make usagecount more than 1.
2350  */
2351  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
2352  buf_state += BUF_USAGECOUNT_ONE;
2353  }
2354 
2355  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
2356  buf_state))
2357  {
2358  result = (buf_state & BM_VALID) != 0;
2359 
2360  /*
2361  * Assume that we acquired a buffer pin for the purposes of
2362  * Valgrind buffer client checks (even in !result case) to
2363  * keep things simple. Buffers that are unsafe to access are
2364  * not generally guaranteed to be marked undefined or
2365  * non-accessible in any case.
2366  */
2368  break;
2369  }
2370  }
2371  }
2372  else
2373  {
2374  /*
2375  * If we previously pinned the buffer, it must surely be valid.
2376  *
2377  * Note: We deliberately avoid a Valgrind client request here.
2378  * Individual access methods can optionally superimpose buffer page
2379  * client requests on top of our client requests to enforce that
2380  * buffers are only accessed while locked (and pinned). It's possible
2381  * that the buffer page is legitimately non-accessible here. We
2382  * cannot meddle with that.
2383  */
2384  result = true;
2385  }
2386 
2387  ref->refcount++;
2388  Assert(ref->refcount > 0);
2390  return result;
2391 }
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:78
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:43
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:52
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:304
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26

References Assert(), b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservedRefCountEntry, ResourceOwnerRememberBuffer(), VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 2416 of file bufmgr.c.

2417 {
2418  Buffer b;
2419  PrivateRefCountEntry *ref;
2420  uint32 buf_state;
2421 
2422  /*
2423  * As explained, We don't expect any preexisting pins. That allows us to
2424  * manipulate the PrivateRefCount after releasing the spinlock
2425  */
2427 
2428  /*
2429  * Buffer can't have a preexisting pin, so mark its page as defined to
2430  * Valgrind (this is similar to the PinBuffer() case where the backend
2431  * doesn't already have a buffer pin)
2432  */
2434 
2435  /*
2436  * Since we hold the buffer spinlock, we can update the buffer state and
2437  * release the lock in one operation.
2438  */
2439  buf_state = pg_atomic_read_u32(&buf->state);
2440  Assert(buf_state & BM_LOCKED);
2441  buf_state += BUF_REFCOUNT_ONE;
2442  UnlockBufHdr(buf, buf_state);
2443 
2445 
2446  ref = NewPrivateRefCountEntry(b);
2447  ref->refcount++;
2448 
2450 }

References Assert(), b, BM_LOCKED, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), UnlockBufHdr(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 627 of file bufmgr.c.

628 {
629  Assert(RelationIsValid(reln));
630  Assert(BlockNumberIsValid(blockNum));
631 
632  if (RelationUsesLocalBuffers(reln))
633  {
634  /* see comments in ReadBufferExtended */
635  if (RELATION_IS_OTHER_TEMP(reln))
636  ereport(ERROR,
637  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
638  errmsg("cannot access temporary tables of other sessions")));
639 
640  /* pass it off to localbuf.c */
641  return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
642  }
643  else
644  {
645  /* pass it to the shared buffer version */
646  return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
647  }
648 }
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:537
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:69
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:658
#define RelationIsValid(relation)
Definition: rel.h:478

References Assert(), BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by acquire_sample_rows(), BitmapPrefetch(), count_nondeletable_pages(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 537 of file bufmgr.c.

540 {
541  PrefetchBufferResult result = {InvalidBuffer, false};
542  BufferTag newTag; /* identity of requested block */
543  uint32 newHash; /* hash value for newTag */
544  LWLock *newPartitionLock; /* buffer partition lock for it */
545  int buf_id;
546 
547  Assert(BlockNumberIsValid(blockNum));
548 
549  /* create a tag so we can lookup the buffer */
550  InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
551  forkNum, blockNum);
552 
553  /* determine its hash code and partition lock ID */
554  newHash = BufTableHashCode(&newTag);
555  newPartitionLock = BufMappingPartitionLock(newHash);
556 
557  /* see if the block is in the buffer pool already */
558  LWLockAcquire(newPartitionLock, LW_SHARED);
559  buf_id = BufTableLookup(&newTag, newHash);
560  LWLockRelease(newPartitionLock);
561 
562  /* If not in buffers, initiate prefetch */
563  if (buf_id < 0)
564  {
565 #ifdef USE_PREFETCH
566  /*
567  * Try to initiate an asynchronous read. This returns false in
568  * recovery if the relation file doesn't exist.
569  */
570  if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
571  smgrprefetch(smgr_reln, forkNum, blockNum, 1))
572  {
573  result.initiated_io = true;
574  }
575 #endif /* USE_PREFETCH */
576  }
577  else
578  {
579  /*
580  * Report the buffer it was in at that time. The caller may be able
581  * to avoid a buffer table lookup, but it's not pinned and it must be
582  * rechecked!
583  */
584  result.recent_buffer = buf_id + 1;
585  }
586 
587  /*
588  * If the block *is* in buffers, we do nothing. This is not really ideal:
589  * the block might be just about to be evicted, which would be stupid
590  * since we know we are going to need it soon. But the only easy answer
591  * is to bump the usage_count, which does not seem like a great solution:
592  * when the caller does ultimately touch the block, usage_count would get
593  * bumped again, resulting in too much favoritism for blocks that are
594  * involved in a prefetch sequence. A real fix would involve some
595  * additional per-buffer state, and it's not clear that there's enough of
596  * a problem to justify that.
597  */
598 
599  return result;
600 }
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_DATA
Definition: fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:585
Buffer recent_buffer
Definition: bufmgr.h:59

References Assert(), BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, RelFileLocatorBackend::locator, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rlocator, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 734 of file bufmgr.c.

735 {
736  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
737 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:781
@ RBM_NORMAL
Definition: bufmgr.h:44

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static Buffer ReadBuffer_common ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool hit 
)
static

Definition at line 1014 of file bufmgr.c.

1017 {
1018  BufferDesc *bufHdr;
1019  Block bufBlock;
1020  bool found;
1021  IOContext io_context;
1022  IOObject io_object;
1023  bool isLocalBuf = SmgrIsTemp(smgr);
1024 
1025  *hit = false;
1026 
1027  /*
1028  * Backward compatibility path, most code should use ExtendBufferedRel()
1029  * instead, as acquiring the extension lock inside ExtendBufferedRel()
1030  * scales a lot better.
1031  */
1032  if (unlikely(blockNum == P_NEW))
1033  {
1035 
1036  /*
1037  * Since no-one else can be looking at the page contents yet, there is
1038  * no difference between an exclusive lock and a cleanup-strength
1039  * lock.
1040  */
1042  flags |= EB_LOCK_FIRST;
1043 
1044  return ExtendBufferedRel(BMR_SMGR(smgr, relpersistence),
1045  forkNum, strategy, flags);
1046  }
1047 
1048  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1050  smgr->smgr_rlocator.locator.dbOid,
1052  smgr->smgr_rlocator.backend);
1053 
1054  if (isLocalBuf)
1055  {
1056  /*
1057  * We do not use a BufferAccessStrategy for I/O of temporary tables.
1058  * However, in some cases, the "strategy" may not be NULL, so we can't
1059  * rely on IOContextForStrategy() to set the right IOContext for us.
1060  * This may happen in cases like CREATE TEMPORARY TABLE AS...
1061  */
1062  io_context = IOCONTEXT_NORMAL;
1063  io_object = IOOBJECT_TEMP_RELATION;
1064  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
1065  if (found)
1067  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
1070  }
1071  else
1072  {
1073  /*
1074  * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
1075  * not currently in memory.
1076  */
1077  io_context = IOContextForStrategy(strategy);
1078  io_object = IOOBJECT_RELATION;
1079  bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
1080  strategy, &found, io_context);
1081  if (found)
1083  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
1086  }
1087 
1088  /* At this point we do NOT hold any locks. */
1089 
1090  /* if it was already in the buffer pool, we're done */
1091  if (found)
1092  {
1093  /* Just need to update stats before we exit */
1094  *hit = true;
1095  VacuumPageHit++;
1096  pgstat_count_io_op(io_object, io_context, IOOP_HIT);
1097 
1098  if (VacuumCostActive)
1100 
1101  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1103  smgr->smgr_rlocator.locator.dbOid,
1105  smgr->smgr_rlocator.backend,
1106  found);
1107 
1108  /*
1109  * In RBM_ZERO_AND_LOCK mode the caller expects the page to be locked
1110  * on return.
1111  */
1112  if (!isLocalBuf)
1113  {
1114  if (mode == RBM_ZERO_AND_LOCK)
1116  LW_EXCLUSIVE);
1117  else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
1119  }
1120 
1121  return BufferDescriptorGetBuffer(bufHdr);
1122  }
1123 
1124  /*
1125  * if we have gotten to this point, we have allocated a buffer for the
1126  * page but its contents are not yet valid. IO_IN_PROGRESS is set for it,
1127  * if it's a shared buffer.
1128  */
1129  Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
1130 
1131  bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
1132 
1133  /*
1134  * Read in the page, unless the caller intends to overwrite it and just
1135  * wants us to allocate a buffer.
1136  */
1138  MemSet((char *) bufBlock, 0, BLCKSZ);
1139  else
1140  {
1142 
1143  smgrread(smgr, forkNum, blockNum, bufBlock);
1144 
1145  pgstat_count_io_op_time(io_object, io_context,
1146  IOOP_READ, io_start, 1);
1147 
1148  /* check for garbage data */
1149  if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
1151  {
1153  {
1154  ereport(WARNING,
1156  errmsg("invalid page in block %u of relation %s; zeroing out page",
1157  blockNum,
1158  relpath(smgr->smgr_rlocator, forkNum))));
1159  MemSet((char *) bufBlock, 0, BLCKSZ);
1160  }
1161  else
1162  ereport(ERROR,
1164  errmsg("invalid page in block %u of relation %s",
1165  blockNum,
1166  relpath(smgr->smgr_rlocator, forkNum))));
1167  }
1168  }
1169 
1170  /*
1171  * In RBM_ZERO_AND_LOCK / RBM_ZERO_AND_CLEANUP_LOCK mode, grab the buffer
1172  * content lock before marking the page as valid, to make sure that no
1173  * other backend sees the zeroed page before the caller has had a chance
1174  * to initialize it.
1175  *
1176  * Since no-one else can be looking at the page contents yet, there is no
1177  * difference between an exclusive lock and a cleanup-strength lock. (Note
1178  * that we cannot use LockBuffer() or LockBufferForCleanup() here, because
1179  * they assert that the buffer is already valid.)
1180  */
1182  !isLocalBuf)
1183  {
1185  }
1186 
1187  if (isLocalBuf)
1188  {
1189  /* Only need to adjust flags */
1190  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
1191 
1192  buf_state |= BM_VALID;
1193  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
1194  }
1195  else
1196  {
1197  /* Set BM_VALID, terminate IO, and wake up any waiters */
1198  TerminateBufferIO(bufHdr, false, BM_VALID, true);
1199  }
1200 
1201  VacuumPageMiss++;
1202  if (VacuumCostActive)
1204 
1205  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1207  smgr->smgr_rlocator.locator.dbOid,
1209  smgr->smgr_rlocator.backend,
1210  found);
1211 
1212  return BufferDescriptorGetBuffer(bufHdr);
1213 }
bool zero_damaged_pages
Definition: bufmgr.c:135
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
Definition: bufmgr.c:1239
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:838
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4875
#define P_NEW
Definition: bufmgr.h:152
#define BMR_SMGR(p_smgr, p_relpersistence)
Definition: bufmgr.h:107
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:49
@ RBM_NORMAL_NO_LOG
Definition: bufmgr.h:50
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
#define PIV_LOG_WARNING
Definition: bufpage.h:465
#define PIV_REPORT_STAT
Definition: bufpage.h:466
#define unlikely(x)
Definition: c.h:298
int64 VacuumPageHit
Definition: globals.c:154
int VacuumCostPageMiss
Definition: globals.c:149
int64 VacuumPageMiss
Definition: globals.c:155
int VacuumCostPageHit
Definition: globals.c:148
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition: localbuf.c:117
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
IOObject
Definition: pgstat.h:279
@ IOOP_READ
Definition: pgstat.h:302
@ IOOP_HIT
Definition: pgstat.h:301
#define SmgrIsTemp(smgr)
Definition: smgr.h:73
static void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer)
Definition: smgr.h:114
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 shared_blks_hit
Definition: instrument.h:26

References Assert(), RelFileLocatorBackend::backend, BM_VALID, BMR_SMGR, BufferAlloc(), BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufHdrGetBlock, RelFileLocator::dbOid, EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg(), ERROR, ExtendBufferedRel(), IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, IOOP_READ, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LocalBufferAlloc(), LocalBufHdrGetBlock, RelFileLocatorBackend::locator, LockBufferForCleanup(), LW_EXCLUSIVE, LWLockAcquire(), MemSet, mode, P_NEW, PageIsVerifiedExtended(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_io_op(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), PIV_LOG_WARNING, PIV_REPORT_STAT, RBM_NORMAL, RBM_NORMAL_NO_LOG, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelFileLocator::relNumber, relpath, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, SMgrRelationData::smgr_rlocator, SmgrIsTemp, smgrread(), RelFileLocator::spcOid, BufferDesc::state, TerminateBufferIO(), track_io_timing, unlikely, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, VacuumPageHit, VacuumPageMiss, WARNING, and zero_damaged_pages.

Referenced by ExtendBufferedRelTo(), ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)

Definition at line 781 of file bufmgr.c.

783 {
784  bool hit;
785  Buffer buf;
786 
787  /*
788  * Reject attempts to read non-local temporary relations; we would be
789  * likely to get wrong data since we have no visibility into the owning
790  * session's local buffers.
791  */
792  if (RELATION_IS_OTHER_TEMP(reln))
793  ereport(ERROR,
794  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
795  errmsg("cannot access temporary tables of other sessions")));
796 
797  /*
798  * Read the buffer, and update pgstat counters to reflect a cache hit or
799  * miss.
800  */
802  buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence,
803  forkNum, blockNum, mode, strategy, &hit);
804  if (hit)
806  return buf;
807 }
#define pgstat_count_buffer_read(rel)
Definition: pgstat.h:635
#define pgstat_count_buffer_hit(rel)
Definition: pgstat.h:640

References buf, ereport, errcode(), errmsg(), ERROR, mode, pgstat_count_buffer_hit, pgstat_count_buffer_read, RelationData::rd_rel, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), autoprewarm_database_main(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), brin_vacuum_scan(), bt_recheck_sibling_links(), btvacuumpage(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), fsm_readbuf(), get_raw_page_internal(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_analyze_next_block(), heapgetpage(), lazy_scan_heap(), lazy_vacuum_heap_rel(), log_newpage_range(), palloc_btree_page(), pg_prewarm(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), spgvacuumpage(), statapprox_heap(), verify_heapam(), and vm_readbuf().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 821 of file bufmgr.c.

824 {
825  bool hit;
826 
827  SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
828 
829  return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT :
830  RELPERSISTENCE_UNLOGGED, forkNum, blockNum,
831  mode, strategy, &hit);
832 }

References INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 658 of file bufmgr.c.

660 {
661  BufferDesc *bufHdr;
662  BufferTag tag;
663  uint32 buf_state;
664  bool have_private_ref;
665 
666  Assert(BufferIsValid(recent_buffer));
667 
670  InitBufferTag(&tag, &rlocator, forkNum, blockNum);
671 
672  if (BufferIsLocal(recent_buffer))
673  {
674  int b = -recent_buffer - 1;
675 
676  bufHdr = GetLocalBufferDescriptor(b);
677  buf_state = pg_atomic_read_u32(&bufHdr->state);
678 
679  /* Is it still valid and holding the right tag? */
680  if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
681  {
682  PinLocalBuffer(bufHdr, true);
683 
685 
686  return true;
687  }
688  }
689  else
690  {
691  bufHdr = GetBufferDescriptor(recent_buffer - 1);
692  have_private_ref = GetPrivateRefCount(recent_buffer) > 0;
693 
694  /*
695  * Do we already have this buffer pinned with a private reference? If
696  * so, it must be valid and it is safe to check the tag without
697  * locking. If not, we have to lock the header first and then check.
698  */
699  if (have_private_ref)
700  buf_state = pg_atomic_read_u32(&bufHdr->state);
701  else
702  buf_state = LockBufHdr(bufHdr);
703 
704  if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
705  {
706  /*
707  * It's now safe to pin the buffer. We can't pin first and ask
708  * questions later, because it might confuse code paths like
709  * InvalidateBuffer() if we pinned a random non-matching buffer.
710  */
711  if (have_private_ref)
712  PinBuffer(bufHdr, NULL); /* bump pin count */
713  else
714  PinBuffer_Locked(bufHdr); /* pin for first time */
715 
717 
718  return true;
719  }
720 
721  /* If we locked the header above, now unlock. */
722  if (!have_private_ref)
723  UnlockBufHdr(bufHdr, buf_state);
724  }
725 
726  return false;
727 }
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition: localbuf.c:655

References Assert(), b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), InitBufferTag(), BufferUsage::local_blks_hit, LockBufHdr(), pg_atomic_read_u32(), pgBufferUsage, PinBuffer(), PinBuffer_Locked(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by XLogReadBufferExtended().

◆ RelationCopyStorageUsingBuffer()

static void RelationCopyStorageUsingBuffer ( RelFileLocator  srclocator,
RelFileLocator  dstlocator,
ForkNumber  forkNum,
bool  permanent 
)
static

Definition at line 4343 of file bufmgr.c.

4346 {
4347  Buffer srcBuf;
4348  Buffer dstBuf;
4349  Page srcPage;
4350  Page dstPage;
4351  bool use_wal;
4352  BlockNumber nblocks;
4353  BlockNumber blkno;
4355  BufferAccessStrategy bstrategy_src;
4356  BufferAccessStrategy bstrategy_dst;
4357 
4358  /*
4359  * In general, we want to write WAL whenever wal_level > 'minimal', but we
4360  * can skip it when copying any fork of an unlogged relation other than
4361  * the init fork.
4362  */
4363  use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
4364 
4365  /* Get number of blocks in the source relation. */
4366  nblocks = smgrnblocks(smgropen(srclocator, INVALID_PROC_NUMBER),
4367  forkNum);
4368 
4369  /* Nothing to copy; just return. */
4370  if (nblocks == 0)
4371  return;
4372 
4373  /*
4374  * Bulk extend the destination relation of the same size as the source
4375  * relation before starting to copy block by block.
4376  */
4377  memset(buf.data, 0, BLCKSZ);
4378  smgrextend(smgropen(dstlocator, INVALID_PROC_NUMBER), forkNum, nblocks - 1,
4379  buf.data, true);
4380 
4381  /* This is a bulk operation, so use buffer access strategies. */
4382  bstrategy_src = GetAccessStrategy(BAS_BULKREAD);
4383  bstrategy_dst = GetAccessStrategy(BAS_BULKWRITE);
4384 
4385  /* Iterate over each block of the source relation file. */
4386  for (blkno = 0; blkno < nblocks; blkno++)
4387  {
4389 
4390  /* Read block from source relation. */
4391  srcBuf = ReadBufferWithoutRelcache(srclocator, forkNum, blkno,
4392  RBM_NORMAL, bstrategy_src,
4393  permanent);
4394  LockBuffer(srcBuf, BUFFER_LOCK_SHARE);
4395  srcPage = BufferGetPage(srcBuf);
4396 
4397  dstBuf = ReadBufferWithoutRelcache(dstlocator, forkNum, blkno,
4398  RBM_ZERO_AND_LOCK, bstrategy_dst,
4399  permanent);
4400  dstPage = BufferGetPage(dstBuf);
4401 
4403 
4404  /* Copy page data from the source to the destination. */
4405  memcpy(dstPage, srcPage, BLCKSZ);
4406  MarkBufferDirty(dstBuf);
4407 
4408  /* WAL-log the copied page. */
4409  if (use_wal)
4410  log_newpage_buffer(dstBuf, true);
4411 
4412  END_CRIT_SECTION();
4413 
4414  UnlockReleaseBuffer(dstBuf);
4415  UnlockReleaseBuffer(srcBuf);
4416  }
4417 
4418  FreeAccessStrategy(bstrategy_src);
4419  FreeAccessStrategy(bstrategy_dst);
4420 }
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4577
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2189
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
Definition: bufmgr.c:821
@ BAS_BULKREAD
Definition: bufmgr.h:35
@ BAS_BULKWRITE
Definition: bufmgr.h:37
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:639
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:535
#define XLogIsNeeded()
Definition: xlog.h:107
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237

References BAS_BULKREAD, BAS_BULKWRITE, buf, BUFFER_LOCK_SHARE, BufferGetPage(), CHECK_FOR_INTERRUPTS, END_CRIT_SECTION, FreeAccessStrategy(), GetAccessStrategy(), INIT_FORKNUM, INVALID_PROC_NUMBER, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), RBM_NORMAL, RBM_ZERO_AND_LOCK, ReadBufferWithoutRelcache(), smgrextend(), smgrnblocks(), smgropen(), START_CRIT_SECTION, UnlockReleaseBuffer(), and XLogIsNeeded.

Referenced by CreateAndCopyRelationData().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 3576 of file bufmgr.c.

3577 {
3578  if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
3579  {
3580  /*
3581  * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
3582  * tableam returns the size in bytes - but for the purpose of this
3583  * routine, we want the number of blocks. Therefore divide, rounding
3584  * up.
3585  */
3586  uint64 szbytes;
3587 
3588  szbytes = table_relation_size(relation, forkNum);
3589 
3590  return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
3591  }
3592  else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
3593  {
3594  return smgrnblocks(RelationGetSmgr(relation), forkNum);
3595  }
3596  else
3597  Assert(false);
3598 
3599  return 0; /* keep compiler quiet */
3600 }
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1860

References Assert(), RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 2252 of file bufmgr.c.

2255 {
2256  ForkNumber forkNum = MAIN_FORKNUM;
2257  BufferDesc *bufHdr;
2258 
2259  if (BufferIsValid(buffer))
2260  {
2261  Assert(BufferIsPinned(buffer));
2262  if (BufferIsLocal(buffer))
2263  {
2264  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2265  if (bufHdr->tag.blockNum == blockNum &&
2266  BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
2267  BufTagGetForkNum(&bufHdr->tag) == forkNum)
2268  return buffer;
2269  UnpinLocalBuffer(buffer);
2270  }
2271  else
2272  {
2273  bufHdr = GetBufferDescriptor(buffer - 1);
2274  /* we have pin, so it's ok to examine tag without spinlock */
2275  if (bufHdr->tag.blockNum == blockNum &&
2276  BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
2277  BufTagGetForkNum(&bufHdr->tag) == forkNum)
2278  return buffer;
2279  UnpinBuffer(bufHdr);
2280  }
2281  }
2282 
2283  return ReadBuffer(relation, blockNum);
2284 }
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:734
void UnpinLocalBuffer(Buffer buffer)
Definition: localbuf.c:681

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), BufferDesc::tag, UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 4560 of file bufmgr.c.

4561 {
4562  if (!BufferIsValid(buffer))
4563  elog(ERROR, "bad buffer ID: %d", buffer);
4564 
4565  if (BufferIsLocal(buffer))
4566  UnpinLocalBuffer(buffer);
4567  else
4568  UnpinBuffer(GetBufferDescriptor(buffer - 1));
4569 }

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_drop_lock_and_maybe_pin(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndBitmapHeapScan(), ExecEndIndexOnlyScan(), ExecReScanBitmapHeapScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgetpage(), heapgettup(), heapgettup_pagemode(), lazy_scan_heap(), lazy_vacuum_heap_rel(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pg_visibility_map_summary(), pgstatindex_impl(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ ReservePrivateRefCountEntry()

static void ReservePrivateRefCountEntry ( void  )
static

Definition at line 238 of file bufmgr.c.

239 {
240  /* Already reserved (or freed), nothing to do */
241  if (ReservedRefCountEntry != NULL)
242  return;
243 
244  /*
245  * First search for a free entry the array, that'll be sufficient in the
246  * majority of cases.
247  */
248  {
249  int i;
250 
251  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
252  {
254 
256 
257  if (res->buffer == InvalidBuffer)
258  {
260  return;
261  }
262  }
263  }
264 
265  /*
266  * No luck. All array entries are full. Move one array entry into the hash
267  * table.
268  */
269  {
270  /*
271  * Move entry from the current clock position in the array into the
272  * hashtable. Use that slot.
273  */
274  PrivateRefCountEntry *hashent;
275  bool found;
276 
277  /* select victim slot */
280 
281  /* Better be used, otherwise we shouldn't get here. */
283 
284  /* enter victim array entry into hashtable */
287  HASH_ENTER,
288  &found);
289  Assert(!found);
291 
292  /* clear the now free array slot */
295 
297  }
298 }
static uint32 PrivateRefCountClock
Definition: bufmgr.c:199
@ HASH_ENTER
Definition: hsearch.h:114

References Assert(), PrivateRefCountEntry::buffer, HASH_ENTER, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountClock, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, res, and ReservedRefCountEntry.

Referenced by BufferAlloc(), ExtendBufferedRelShared(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetPrivateRefCountEntry(), GetVictimBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ ResOwnerPrintBufferIO()

static char * ResOwnerPrintBufferIO ( Datum  res)
static

Definition at line 5680 of file bufmgr.c.

5681 {
5682  Buffer buffer = DatumGetInt32(res);
5683 
5684  return psprintf("lost track of buffer IO on buffer %d", buffer);
5685 }
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202

References PrivateRefCountEntry::buffer, DatumGetInt32(), psprintf(), and res.

◆ ResOwnerPrintBufferPin()

static char * ResOwnerPrintBufferPin ( Datum  res)
static

Definition at line 5703 of file bufmgr.c.

5704 {
5706 }

References DatumGetInt32(), DebugPrintBufferRefcount(), and res.

◆ ResOwnerReleaseBufferIO()

static void ResOwnerReleaseBufferIO ( Datum  res)
static

Definition at line 5672 of file bufmgr.c.

5673 {
5674  Buffer buffer = DatumGetInt32(res);
5675 
5676  AbortBufferIO(buffer);
5677 }
static void AbortBufferIO(Buffer buffer)
Definition: bufmgr.c:5281

References AbortBufferIO(), PrivateRefCountEntry::buffer, DatumGetInt32(), and res.

◆ ResOwnerReleaseBufferPin()

static void ResOwnerReleaseBufferPin ( Datum  res)
static

Definition at line 5688 of file bufmgr.c.

5689 {
5690  Buffer buffer = DatumGetInt32(res);
5691 
5692  /* Like ReleaseBuffer, but don't call ResourceOwnerForgetBuffer */
5693  if (!BufferIsValid(buffer))
5694  elog(ERROR, "bad buffer ID: %d", buffer);
5695 
5696  if (BufferIsLocal(buffer))
5697  UnpinLocalBufferNoOwner(buffer);
5698  else
5700 }
static void UnpinBufferNoOwner(BufferDesc *buf)
Definition: bufmgr.c:2468
void UnpinLocalBufferNoOwner(Buffer buffer)
Definition: localbuf.c:688

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), DatumGetInt32(), elog, ERROR, GetBufferDescriptor(), res, UnpinBufferNoOwner(), and UnpinLocalBufferNoOwner().

◆ rlocator_comparator()

static int rlocator_comparator ( const void *  p1,
const void *  p2 
)
static

Definition at line 5363 of file bufmgr.c.

5364 {
5365  RelFileLocator n1 = *(const RelFileLocator *) p1;
5366  RelFileLocator n2 = *(const RelFileLocator *) p2;
5367 
5368  if (n1.relNumber < n2.relNumber)
5369  return -1;
5370  else if (n1.relNumber > n2.relNumber)
5371  return 1;
5372 
5373  if (n1.dbOid < n2.dbOid)
5374  return -1;
5375  else if (n1.dbOid > n2.dbOid)
5376  return 1;
5377 
5378  if (n1.spcOid < n2.spcOid)
5379  return -1;
5380  else if (n1.spcOid > n2.spcOid)
5381  return 1;
5382  else
5383  return 0;
5384 }

References RelFileLocator::dbOid, p2, RelFileLocator::relNumber, and RelFileLocator::spcOid.

Referenced by buffertag_comparator(), DropRelationsAllBuffers(), and FlushRelationsAllBuffers().

◆ ScheduleBufferTagForWriteback()

void ScheduleBufferTagForWriteback ( WritebackContext wb_context,
IOContext  io_context,
BufferTag tag 
)

Definition at line 5544 of file bufmgr.c.

5546 {
5547  PendingWriteback *pending;
5548 
5550  return;
5551 
5552  /*
5553  * Add buffer to the pending writeback array, unless writeback control is
5554  * disabled.
5555  */
5556  if (*wb_context->max_pending > 0)
5557  {
5559 
5560  pending = &wb_context->pending_writebacks[wb_context->nr_pending++];
5561 
5562  pending->tag = *tag;
5563  }
5564 
5565  /*
5566  * Perform pending flushes if the writeback limit is exceeded. This
5567  * includes the case where previously an item has been added, but control
5568  * is now disabled.
5569  */
5570  if (wb_context->nr_pending >= *wb_context->max_pending)
5571  IssuePendingWritebacks(wb_context, io_context);
5572 }
#define WRITEBACK_MAX_PENDING_FLUSHES

References Assert(), IO_DIRECT_DATA, io_direct_flags, IssuePendingWritebacks(), WritebackContext::max_pending, WritebackContext::nr_pending, WritebackContext::pending_writebacks, PendingWriteback::tag, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by GetVictimBuffer(), and SyncOneBuffer().

◆ shared_buffer_write_error_callback()

static void shared_buffer_write_error_callback ( void *  arg)
static

Definition at line 5323 of file bufmgr.c.

5324 {
5325  BufferDesc *bufHdr = (BufferDesc *) arg;
5326 
5327  /* Buffer is pinned, so we can read the tag without locking the spinlock */
5328  if (bufHdr != NULL)
5329  {
5330  char *path = relpathperm(BufTagGetRelFileLocator(&bufHdr->tag),
5331  BufTagGetForkNum(&bufHdr->tag));
5332 
5333  errcontext("writing block %u of relation %s",
5334  bufHdr->tag.blockNum, path);
5335  pfree(path);
5336  }
5337 }

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, pfree(), relpathperm, and BufferDesc::tag.

Referenced by FlushBuffer().

◆ StartBufferIO()

static bool StartBufferIO ( BufferDesc buf,
bool  forInput 
)
static

Definition at line 5189 of file bufmgr.c.

5190 {
5191  uint32 buf_state;
5192 
5194 
5195  for (;;)
5196  {
5197  buf_state = LockBufHdr(buf);
5198 
5199  if (!(buf_state & BM_IO_IN_PROGRESS))
5200  break;
5201  UnlockBufHdr(buf, buf_state);
5202  WaitIO(buf);
5203  }
5204 
5205  /* Once we get here, there is definitely no I/O active on this buffer */
5206 
5207  if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
5208  {
5209  /* someone else already did the I/O */
5210  UnlockBufHdr(buf, buf_state);
5211  return false;
5212  }
5213 
5214  buf_state |= BM_IO_IN_PROGRESS;
5215  UnlockBufHdr(buf, buf_state);
5216 
5219 
5220  return true;
5221 }
void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)

References BM_DIRTY, BM_IO_IN_PROGRESS, BM_VALID, buf, BufferDescriptorGetBuffer(), CurrentResourceOwner, LockBufHdr(), ResourceOwnerEnlarge(), ResourceOwnerRememberBufferIO(), UnlockBufHdr(), and WaitIO().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and FlushBuffer().

◆ SyncOneBuffer()

static int SyncOneBuffer ( int  buf_id,
bool  skip_recently_used,
WritebackContext wb_context 
)
static

Definition at line 3139 of file bufmgr.c.

3140 {
3141  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
3142  int result = 0;
3143  uint32 buf_state;
3144  BufferTag tag;
3145 
3146  /* Make sure we can handle the pin */
3149 
3150  /*
3151  * Check whether buffer needs writing.
3152  *
3153  * We can make this check without taking the buffer content lock so long
3154  * as we mark pages dirty in access methods *before* logging changes with
3155  * XLogInsert(): if someone marks the buffer dirty just after our check we
3156  * don't worry because our checkpoint.redo points before log record for
3157  * upcoming changes and so we are not required to write such dirty buffer.
3158  */
3159  buf_state = LockBufHdr(bufHdr);
3160 
3161  if (BUF_STATE_GET_REFCOUNT(buf_state) == 0 &&
3162  BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
3163  {
3164  result |= BUF_REUSABLE;
3165  }
3166  else if (skip_recently_used)
3167  {
3168  /* Caller told us not to write recently-used buffers */
3169  UnlockBufHdr(bufHdr, buf_state);
3170  return result;
3171  }
3172 
3173  if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
3174  {
3175  /* It's clean, so nothing to do */
3176  UnlockBufHdr(bufHdr, buf_state);
3177  return result;
3178  }
3179 
3180  /*
3181  * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
3182  * buffer is clean by the time we've locked it.)
3183  */
3184  PinBuffer_Locked(bufHdr);
3186 
3188 
3190 
3191  tag = bufHdr->tag;
3192 
3193  UnpinBuffer(bufHdr);
3194 
3195  /*
3196  * SyncOneBuffer() is only called by checkpointer and bgwriter, so
3197  * IOContext will always be IOCONTEXT_NORMAL.
3198  */
3200 
3201  return result | BUF_WRITTEN;
3202 }

References BM_DIRTY, BM_VALID, BUF_REUSABLE, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BUF_WRITTEN, BufferDescriptorGetContentLock(), CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by BgBufferSync(), and BufferSync().

◆ TerminateBufferIO()

static void TerminateBufferIO ( BufferDesc buf,
bool  clear_dirty,
uint32  set_flag_bits,
bool  forget_owner 
)
static

Definition at line 5244 of file bufmgr.c.

5246 {
5247  uint32 buf_state;
5248 
5249  buf_state = LockBufHdr(buf);
5250 
5251  Assert(buf_state & BM_IO_IN_PROGRESS);
5252 
5253  buf_state &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
5254  if (clear_dirty && !(buf_state & BM_JUST_DIRTIED))
5255  buf_state &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
5256 
5257  buf_state |= set_flag_bits;
5258  UnlockBufHdr(buf, buf_state);
5259 
5260  if (forget_owner)
5263 
5265 }
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)

References Assert(), BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_JUST_DIRTIED, buf, BufferDescriptorGetBuffer(), BufferDescriptorGetIOCV(), ConditionVariableBroadcast(), CurrentResourceOwner, LockBufHdr(), ResourceOwnerForgetBufferIO(), and UnlockBufHdr().

Referenced by AbortBufferIO(), ExtendBufferedRelShared(), FlushBuffer(), and ReadBuffer_common().

◆ ts_ckpt_progress_comparator()

static int ts_ckpt_progress_comparator ( Datum  a,
Datum  b,
void *  arg 
)
static

Definition at line 5509 of file bufmgr.c.

5510 {
5511  CkptTsStatus *sa = (CkptTsStatus *) a;
5512  CkptTsStatus *sb = (CkptTsStatus *) b;
5513 
5514  /* we want a min-heap, so return 1 for the a < b */
5515  if (sa->progress < sb->progress)
5516  return 1;
5517  else if (sa->progress == sb->progress)
5518  return 0;
5519  else
5520  return -1;
5521 }

References a, b, and CkptTsStatus::progress.

Referenced by BufferSync().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 4767 of file bufmgr.c.

4768 {
4770 
4771  if (buf)
4772  {
4773  uint32 buf_state;
4774 
4775  buf_state = LockBufHdr(buf);
4776 
4777  /*
4778  * Don't complain if flag bit not set; it could have been reset but we
4779  * got a cancel/die interrupt before getting the signal.
4780  */
4781  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
4782  buf->wait_backend_pgprocno == MyProcNumber)
4783  buf_state &= ~BM_PIN_COUNT_WAITER;
4784 
4785  UnlockBufHdr(buf, buf_state);
4786 
4787  PinCountWaitBuf = NULL;
4788  }
4789 }

References BM_PIN_COUNT_WAITER, buf, LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdr().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 4577 of file bufmgr.c.

4578 {
4579  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4580  ReleaseBuffer(buffer);
4581 }

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), do_setval(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune(), heap_xlog_update(), heap_xlog_vacuum(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ UnpinBuffer()

◆ UnpinBufferNoOwner()

static void UnpinBufferNoOwner ( BufferDesc buf)
static

Definition at line 2468 of file bufmgr.c.

2469 {
2470  PrivateRefCountEntry *ref;
2472 
2473  Assert(!BufferIsLocal(b));
2474 
2475  /* not moving as we're likely deleting it soon anyway */
2476  ref = GetPrivateRefCountEntry(b, false);
2477  Assert(ref != NULL);
2478  Assert(ref->refcount > 0);
2479  ref->refcount--;
2480  if (ref->refcount == 0)
2481  {
2482  uint32 buf_state;
2483  uint32 old_buf_state;
2484 
2485  /*
2486  * Mark buffer non-accessible to Valgrind.
2487  *
2488  * Note that the buffer may have already been marked non-accessible
2489  * within access method code that enforces that buffers are only
2490  * accessed while a buffer lock is held.
2491  */
2493 
2494  /* I'd better not still hold the buffer content lock */
2496 
2497  /*
2498  * Decrement the shared reference count.
2499  *
2500  * Since buffer spinlock holder can update status using just write,
2501  * it's not safe to use atomic decrement here; thus use a CAS loop.
2502  */
2503  old_buf_state = pg_atomic_read_u32(&buf->state);
2504  for (;;)
2505  {
2506  if (old_buf_state & BM_LOCKED)
2507  old_buf_state = WaitBufHdrUnlocked(buf);
2508 
2509  buf_state = old_buf_state;
2510 
2511  buf_state -= BUF_REFCOUNT_ONE;
2512 
2513  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
2514  buf_state))
2515  break;
2516  }
2517 
2518  /* Support LockBufferForCleanup() */
2519  if (buf_state & BM_PIN_COUNT_WAITER)
2520  {
2521  /*
2522  * Acquire the buffer header lock, re-check that there's a waiter.
2523  * Another backend could have unpinned this buffer, and already
2524  * woken up the waiter. There's no danger of the buffer being
2525  * replaced after we unpinned it above, as it's pinned by the
2526  * waiter.
2527  */
2528  buf_state = LockBufHdr(buf);
2529 
2530  if ((buf_state & BM_PIN_COUNT_WAITER) &&
2531  BUF_STATE_GET_REFCOUNT(buf_state) == 1)
2532  {
2533  /* we just released the last pin other than the waiter's */
2534  int wait_backend_pgprocno = buf->wait_backend_pgprocno;
2535 
2536  buf_state &= ~BM_PIN_COUNT_WAITER;
2537  UnlockBufHdr(buf, buf_state);
2538  ProcSendSignal(wait_backend_pgprocno);
2539  }
2540  else
2541  UnlockBufHdr(buf, buf_state);
2542  }
2544  }
2545 }
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
Definition: bufmgr.c:427
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition: memdebug.h:27
void ProcSendSignal(ProcNumber procNumber)
Definition: proc.c:1878

References Assert(), b, BM_LOCKED, BM_PIN_COUNT_WAITER, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferIsLocal, BufHdrGetBlock, ForgetPrivateRefCountEntry(), GetPrivateRefCountEntry(), LockBufHdr(), LWLockHeldByMe(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), ProcSendSignal(), PrivateRefCountEntry::refcount, UnlockBufHdr(), VALGRIND_MAKE_MEM_NOACCESS, and WaitBufHdrUnlocked().

Referenced by ResOwnerReleaseBufferPin(), and UnpinBuffer().

◆ WaitBufHdrUnlocked()

static uint32 WaitBufHdrUnlocked ( BufferDesc buf)
static

Definition at line 5420 of file bufmgr.c.

5421 {
5422  SpinDelayStatus delayStatus;
5423  uint32 buf_state;
5424 
5425  init_local_spin_delay(&delayStatus);
5426 
5427  buf_state = pg_atomic_read_u32(&buf->state);
5428 
5429  while (buf_state & BM_LOCKED)
5430  {
5431  perform_spin_delay(&delayStatus);
5432  buf_state = pg_atomic_read_u32(&buf->state);
5433  }
5434 
5435  finish_spin_delay(&delayStatus);
5436 
5437  return buf_state;
5438 }

References BM_LOCKED, buf, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), and pg_atomic_read_u32().

Referenced by MarkBufferDirty(), PinBuffer(), and UnpinBufferNoOwner().

◆ WaitIO()

static void WaitIO ( BufferDesc buf)
static

Definition at line 5146 of file bufmgr.c.

5147 {
5149 
5151  for (;;)
5152  {
5153  uint32 buf_state;
5154 
5155  /*
5156  * It may not be necessary to acquire the spinlock to check the flag
5157  * here, but since this test is essential for correctness, we'd better
5158  * play it safe.
5159  */
5160  buf_state = LockBufHdr(buf);
5161  UnlockBufHdr(buf, buf_state);
5162 
5163  if (!(buf_state & BM_IO_IN_PROGRESS))
5164  break;
5165  ConditionVariableSleep(cv, WAIT_EVENT_BUFFER_IO);
5166  }
5168 }
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)

References BM_IO_IN_PROGRESS, buf, BufferDescriptorGetIOCV(), ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), LockBufHdr(), and UnlockBufHdr().

Referenced by InvalidateBuffer(), and StartBufferIO().

◆ WritebackContextInit()

void WritebackContextInit ( WritebackContext context,
int *  max_pending 
)

Definition at line 5532 of file bufmgr.c.

5533 {
5534  Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES);
5535 
5536  context->max_pending = max_pending;
5537  context->nr_pending = 0;
5538 }

References Assert(), WritebackContext::max_pending, WritebackContext::nr_pending, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by BackgroundWriterMain(), BufferSync(), and InitBufferPool().

Variable Documentation

◆ backend_flush_after

int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER

Definition at line 161 of file bufmgr.c.

Referenced by InitBufferPool().

◆ bgwriter_flush_after

int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER

Definition at line 160 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

int bgwriter_lru_maxpages = 100

Definition at line 136 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

double bgwriter_lru_multiplier = 2.0

Definition at line 137 of file bufmgr.c.

Referenced by BgBufferSync().

◆ buffer_io_resowner_desc

const ResourceOwnerDesc buffer_io_resowner_desc
Initial value:
=
{
.name = "buffer io",
.release_priority = RELEASE_PRIO_BUFFER_IOS,
.ReleaseResource = ResOwnerReleaseBufferIO,
.DebugPrint = ResOwnerPrintBufferIO
}
static void ResOwnerReleaseBufferIO(Datum res)
Definition: bufmgr.c:5672
static char * ResOwnerPrintBufferIO(Datum res)
Definition: bufmgr.c:5680
#define RELEASE_PRIO_BUFFER_IOS
Definition: resowner.h:62
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition: resowner.h:54

Definition at line 214 of file bufmgr.c.

Referenced by ResourceOwnerForgetBufferIO(), and ResourceOwnerRememberBufferIO().

◆ buffer_pin_resowner_desc

const ResourceOwnerDesc buffer_pin_resowner_desc
Initial value:
=
{
.name = "buffer pin",
.release_priority = RELEASE_PRIO_BUFFER_PINS,
.ReleaseResource = ResOwnerReleaseBufferPin,
.DebugPrint = ResOwnerPrintBufferPin
}
static char * ResOwnerPrintBufferPin(Datum res)
Definition: bufmgr.c:5703
static void ResOwnerReleaseBufferPin(Datum res)
Definition: bufmgr.c:5688
#define RELEASE_PRIO_BUFFER_PINS
Definition: resowner.h:63

Definition at line 223 of file bufmgr.c.

Referenced by ResourceOwnerForgetBuffer(), and ResourceOwnerRememberBuffer().

◆ checkpoint_flush_after

int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER

Definition at line 159 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY

Definition at line 146 of file bufmgr.c.

Referenced by get_tablespace_io_concurrency(), and tablespace_reloptions().

◆ maintenance_io_concurrency

◆ PinCountWaitBuf

BufferDesc* PinCountWaitBuf = NULL
static

Definition at line 164 of file bufmgr.c.

Referenced by LockBufferForCleanup(), and UnlockBuffers().

◆ PrivateRefCountArray

◆ PrivateRefCountClock

uint32 PrivateRefCountClock = 0
static

Definition at line 199 of file bufmgr.c.

Referenced by ReservePrivateRefCountEntry().

◆ PrivateRefCountHash

HTAB* PrivateRefCountHash = NULL
static

◆ PrivateRefCountOverflowed

◆ ReservedRefCountEntry

◆ track_io_timing

◆ zero_damaged_pages

bool zero_damaged_pages = false

Definition at line 135 of file bufmgr.c.

Referenced by mdreadv(), and ReadBuffer_common().