PostgreSQL Source Code  git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
#include <lib/sort_template.h>
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)   LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static Buffer ReadBuffer_common (SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf, bool fixOwner)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln)
 
static void FindAndDropRelFileNodeBuffers (RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rnode_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *a, const BufferTag *b)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static void InvalidateBuffer (BufferDesc *buf)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
void InitBufferPoolBackend (void)
 
void PrintBufferLeakWarning (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
void BufmgrCommit (void)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelFileNodeBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelFileNodesAllBuffers (SMgrRelation *smgr_reln, int nnodes)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
void AbortBufferIO (void)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *context)
 
void TestForOldSnapshot_impl (Snapshot snapshot, Relation relation)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = 0
 
int maintenance_io_concurrency = 0
 
int checkpoint_flush_after = 0
 
int bgwriter_flush_after = 0
 
int backend_flush_after = 0
 
static BufferDescInProgressBuf = NULL
 
static bool IsForInput
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 79 of file bufmgr.c.

Referenced by DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 69 of file bufmgr.c.

Referenced by BgBufferSync(), and SyncOneBuffer().

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 68 of file bufmgr.c.

Referenced by BgBufferSync(), BufferSync(), and SyncOneBuffer().

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 61 of file bufmgr.c.

Referenced by BufferAlloc(), and FlushBuffer().

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int32 * LocalRefCount
Definition: localbuf.c:45

Definition at line 448 of file bufmgr.c.

Referenced by BufferGetBlockNumber(), BufferGetLSNAtomic(), BufferGetTag(), BufferIsPermanent(), ConditionalLockBuffer(), FlushOneBuffer(), IncrBufferRefCount(), LockBuffer(), LockBufferForCleanup(), MarkBufferDirty(), and ReleaseAndReadBuffer().

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 60 of file bufmgr.c.

Referenced by FlushBuffer(), PinBuffer(), PinBuffer_Locked(), ReadBuffer_common(), and UnpinBuffer().

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 64 of file bufmgr.c.

Referenced by FlushRelationBuffers(), and ReadBuffer_common().

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 71 of file bufmgr.c.

Referenced by DropRelFileNodesAllBuffers(), and FlushRelationsAllBuffers().

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
 
)    ckpt_buforder_comparator(a, b)

Definition at line 4766 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 4766 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 4768 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 4768 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 4765 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 4765 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 4767 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 4767 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 4764 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 4764 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

void AbortBufferIO ( void  )

Definition at line 4481 of file bufmgr.c.

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_VALID, buf, ereport, errcode(), errdetail(), errmsg(), buftag::forkNum, InProgressBuf, IsForInput, LockBufHdr(), pfree(), relpathperm, buftag::rnode, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

4482 {
4484 
4485  if (buf)
4486  {
4487  uint32 buf_state;
4488 
4489  buf_state = LockBufHdr(buf);
4490  Assert(buf_state & BM_IO_IN_PROGRESS);
4491  if (IsForInput)
4492  {
4493  Assert(!(buf_state & BM_DIRTY));
4494 
4495  /* We'd better not think buffer is valid yet */
4496  Assert(!(buf_state & BM_VALID));
4497  UnlockBufHdr(buf, buf_state);
4498  }
4499  else
4500  {
4501  Assert(buf_state & BM_DIRTY);
4502  UnlockBufHdr(buf, buf_state);
4503  /* Issue notice if this is not the first failure... */
4504  if (buf_state & BM_IO_ERROR)
4505  {
4506  /* Buffer is pinned, so we can read tag without spinlock */
4507  char *path;
4508 
4509  path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
4510  ereport(WARNING,
4511  (errcode(ERRCODE_IO_ERROR),
4512  errmsg("could not write block %u of %s",
4513  buf->tag.blockNum, path),
4514  errdetail("Multiple failures --- write error might be permanent.")));
4515  pfree(path);
4516  }
4517  }
4518  TerminateBufferIO(buf, false, BM_IO_ERROR);
4519  }
4520 }
#define relpathperm(rnode, forknum)
Definition: relpath.h:83
ForkNumber forkNum
Definition: buf_internals.h:94
int errcode(int sqlerrcode)
Definition: elog.c:698
#define BM_DIRTY
Definition: buf_internals.h:59
static BufferDesc * InProgressBuf
Definition: bufmgr.c:161
void pfree(void *pointer)
Definition: mcxt.c:1169
static char * buf
Definition: pg_test_fsync.c:68
int errdetail(const char *fmt,...)
Definition: elog.c:1042
unsigned int uint32
Definition: c.h:441
static bool IsForInput
Definition: bufmgr.c:162
#define WARNING
Definition: elog.h:40
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4449
#define BM_VALID
Definition: buf_internals.h:60
#define ereport(elevel,...)
Definition: elog.h:157
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define Assert(condition)
Definition: c.h:804
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define UnlockBufHdr(desc, s)
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:62

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 2570 of file bufmgr.c.

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

2571 {
2573 
2574  AtEOXact_LocalBuffers(isCommit);
2575 
2577 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define Assert(condition)
Definition: c.h:804
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2644
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:577

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 2625 of file bufmgr.c.

References AbortBufferIO(), AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolBackend().

2626 {
2627  AbortBufferIO();
2628  UnlockBuffers();
2629 
2631 
2632  /* localbuf.c needs a chance too */
2634 }
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:588
void UnlockBuffers(void)
Definition: bufmgr.c:3983
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2644
void AbortBufferIO(void)
Definition: bufmgr.c:4481

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 2200 of file bufmgr.c.

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, BgWriterStats, BUF_REUSABLE, BUF_WRITTEN, CurrentResourceOwner, DEBUG1, DEBUG2, elog, PgStat_MsgBgWriter::m_buf_alloc, PgStat_MsgBgWriter::m_buf_written_clean, PgStat_MsgBgWriter::m_maxwritten_clean, NBuffers, ResourceOwnerEnlargeBuffers(), StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

2201 {
2202  /* info obtained from freelist.c */
2203  int strategy_buf_id;
2204  uint32 strategy_passes;
2205  uint32 recent_alloc;
2206 
2207  /*
2208  * Information saved between calls so we can determine the strategy
2209  * point's advance rate and avoid scanning already-cleaned buffers.
2210  */
2211  static bool saved_info_valid = false;
2212  static int prev_strategy_buf_id;
2213  static uint32 prev_strategy_passes;
2214  static int next_to_clean;
2215  static uint32 next_passes;
2216 
2217  /* Moving averages of allocation rate and clean-buffer density */
2218  static float smoothed_alloc = 0;
2219  static float smoothed_density = 10.0;
2220 
2221  /* Potentially these could be tunables, but for now, not */
2222  float smoothing_samples = 16;
2223  float scan_whole_pool_milliseconds = 120000.0;
2224 
2225  /* Used to compute how far we scan ahead */
2226  long strategy_delta;
2227  int bufs_to_lap;
2228  int bufs_ahead;
2229  float scans_per_alloc;
2230  int reusable_buffers_est;
2231  int upcoming_alloc_est;
2232  int min_scan_buffers;
2233 
2234  /* Variables for the scanning loop proper */
2235  int num_to_scan;
2236  int num_written;
2237  int reusable_buffers;
2238 
2239  /* Variables for final smoothed_density update */
2240  long new_strategy_delta;
2241  uint32 new_recent_alloc;
2242 
2243  /*
2244  * Find out where the freelist clock sweep currently is, and how many
2245  * buffer allocations have happened since our last call.
2246  */
2247  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
2248 
2249  /* Report buffer alloc counts to pgstat */
2250  BgWriterStats.m_buf_alloc += recent_alloc;
2251 
2252  /*
2253  * If we're not running the LRU scan, just stop after doing the stats
2254  * stuff. We mark the saved state invalid so that we can recover sanely
2255  * if LRU scan is turned back on later.
2256  */
2257  if (bgwriter_lru_maxpages <= 0)
2258  {
2259  saved_info_valid = false;
2260  return true;
2261  }
2262 
2263  /*
2264  * Compute strategy_delta = how many buffers have been scanned by the
2265  * clock sweep since last time. If first time through, assume none. Then
2266  * see if we are still ahead of the clock sweep, and if so, how many
2267  * buffers we could scan before we'd catch up with it and "lap" it. Note:
2268  * weird-looking coding of xxx_passes comparisons are to avoid bogus
2269  * behavior when the passes counts wrap around.
2270  */
2271  if (saved_info_valid)
2272  {
2273  int32 passes_delta = strategy_passes - prev_strategy_passes;
2274 
2275  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
2276  strategy_delta += (long) passes_delta * NBuffers;
2277 
2278  Assert(strategy_delta >= 0);
2279 
2280  if ((int32) (next_passes - strategy_passes) > 0)
2281  {
2282  /* we're one pass ahead of the strategy point */
2283  bufs_to_lap = strategy_buf_id - next_to_clean;
2284 #ifdef BGW_DEBUG
2285  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2286  next_passes, next_to_clean,
2287  strategy_passes, strategy_buf_id,
2288  strategy_delta, bufs_to_lap);
2289 #endif
2290  }
2291  else if (next_passes == strategy_passes &&
2292  next_to_clean >= strategy_buf_id)
2293  {
2294  /* on same pass, but ahead or at least not behind */
2295  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
2296 #ifdef BGW_DEBUG
2297  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2298  next_passes, next_to_clean,
2299  strategy_passes, strategy_buf_id,
2300  strategy_delta, bufs_to_lap);
2301 #endif
2302  }
2303  else
2304  {
2305  /*
2306  * We're behind, so skip forward to the strategy point and start
2307  * cleaning from there.
2308  */
2309 #ifdef BGW_DEBUG
2310  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
2311  next_passes, next_to_clean,
2312  strategy_passes, strategy_buf_id,
2313  strategy_delta);
2314 #endif
2315  next_to_clean = strategy_buf_id;
2316  next_passes = strategy_passes;
2317  bufs_to_lap = NBuffers;
2318  }
2319  }
2320  else
2321  {
2322  /*
2323  * Initializing at startup or after LRU scanning had been off. Always
2324  * start at the strategy point.
2325  */
2326 #ifdef BGW_DEBUG
2327  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
2328  strategy_passes, strategy_buf_id);
2329 #endif
2330  strategy_delta = 0;
2331  next_to_clean = strategy_buf_id;
2332  next_passes = strategy_passes;
2333  bufs_to_lap = NBuffers;
2334  }
2335 
2336  /* Update saved info for next time */
2337  prev_strategy_buf_id = strategy_buf_id;
2338  prev_strategy_passes = strategy_passes;
2339  saved_info_valid = true;
2340 
2341  /*
2342  * Compute how many buffers had to be scanned for each new allocation, ie,
2343  * 1/density of reusable buffers, and track a moving average of that.
2344  *
2345  * If the strategy point didn't move, we don't update the density estimate
2346  */
2347  if (strategy_delta > 0 && recent_alloc > 0)
2348  {
2349  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
2350  smoothed_density += (scans_per_alloc - smoothed_density) /
2351  smoothing_samples;
2352  }
2353 
2354  /*
2355  * Estimate how many reusable buffers there are between the current
2356  * strategy point and where we've scanned ahead to, based on the smoothed
2357  * density estimate.
2358  */
2359  bufs_ahead = NBuffers - bufs_to_lap;
2360  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
2361 
2362  /*
2363  * Track a moving average of recent buffer allocations. Here, rather than
2364  * a true average we want a fast-attack, slow-decline behavior: we
2365  * immediately follow any increase.
2366  */
2367  if (smoothed_alloc <= (float) recent_alloc)
2368  smoothed_alloc = recent_alloc;
2369  else
2370  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
2371  smoothing_samples;
2372 
2373  /* Scale the estimate by a GUC to allow more aggressive tuning. */
2374  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
2375 
2376  /*
2377  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
2378  * eventually underflow to zero, and the underflows produce annoying
2379  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
2380  * zero, there's no point in tracking smaller and smaller values of
2381  * smoothed_alloc, so just reset it to exactly zero to avoid this
2382  * syndrome. It will pop back up as soon as recent_alloc increases.
2383  */
2384  if (upcoming_alloc_est == 0)
2385  smoothed_alloc = 0;
2386 
2387  /*
2388  * Even in cases where there's been little or no buffer allocation
2389  * activity, we want to make a small amount of progress through the buffer
2390  * cache so that as many reusable buffers as possible are clean after an
2391  * idle period.
2392  *
2393  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
2394  * the BGW will be called during the scan_whole_pool time; slice the
2395  * buffer pool into that many sections.
2396  */
2397  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
2398 
2399  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
2400  {
2401 #ifdef BGW_DEBUG
2402  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
2403  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
2404 #endif
2405  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
2406  }
2407 
2408  /*
2409  * Now write out dirty reusable buffers, working forward from the
2410  * next_to_clean point, until we have lapped the strategy scan, or cleaned
2411  * enough buffers to match our estimate of the next cycle's allocation
2412  * requirements, or hit the bgwriter_lru_maxpages limit.
2413  */
2414 
2415  /* Make sure we can handle the pin inside SyncOneBuffer */
2417 
2418  num_to_scan = bufs_to_lap;
2419  num_written = 0;
2420  reusable_buffers = reusable_buffers_est;
2421 
2422  /* Execute the LRU scan */
2423  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
2424  {
2425  int sync_state = SyncOneBuffer(next_to_clean, true,
2426  wb_context);
2427 
2428  if (++next_to_clean >= NBuffers)
2429  {
2430  next_to_clean = 0;
2431  next_passes++;
2432  }
2433  num_to_scan--;
2434 
2435  if (sync_state & BUF_WRITTEN)
2436  {
2437  reusable_buffers++;
2438  if (++num_written >= bgwriter_lru_maxpages)
2439  {
2441  break;
2442  }
2443  }
2444  else if (sync_state & BUF_REUSABLE)
2445  reusable_buffers++;
2446  }
2447 
2448  BgWriterStats.m_buf_written_clean += num_written;
2449 
2450 #ifdef BGW_DEBUG
2451  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
2452  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
2453  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
2454  bufs_to_lap - num_to_scan,
2455  num_written,
2456  reusable_buffers - reusable_buffers_est);
2457 #endif
2458 
2459  /*
2460  * Consider the above scan as being like a new allocation scan.
2461  * Characterize its density and update the smoothed one based on it. This
2462  * effectively halves the moving average period in cases where both the
2463  * strategy and the background writer are doing some useful scanning,
2464  * which is helpful because a long memory isn't as desirable on the
2465  * density estimates.
2466  */
2467  new_strategy_delta = bufs_to_lap - num_to_scan;
2468  new_recent_alloc = reusable_buffers - reusable_buffers_est;
2469  if (new_strategy_delta > 0 && new_recent_alloc > 0)
2470  {
2471  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
2472  smoothed_density += (scans_per_alloc - smoothed_density) /
2473  smoothing_samples;
2474 
2475 #ifdef BGW_DEBUG
2476  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
2477  new_recent_alloc, new_strategy_delta,
2478  scans_per_alloc, smoothed_density);
2479 #endif
2480  }
2481 
2482  /* Return true if OK to hibernate */
2483  return (bufs_to_lap == 0 && recent_alloc == 0);
2484 }
PgStat_Counter m_buf_alloc
Definition: pgstat.h:480
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:395
#define DEBUG1
Definition: elog.h:25
int BgWriterDelay
Definition: bgwriter.c:64
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
PgStat_Counter m_maxwritten_clean
Definition: pgstat.h:477
PgStat_Counter m_buf_written_clean
Definition: pgstat.h:476
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:131
double bgwriter_lru_multiplier
Definition: bufmgr.c:134
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2503
signed int int32
Definition: c.h:429
#define BUF_REUSABLE
Definition: bufmgr.c:69
int bgwriter_lru_maxpages
Definition: bufmgr.c:133
#define DEBUG2
Definition: elog.h:24
unsigned int uint32
Definition: c.h:441
#define BUF_WRITTEN
Definition: bufmgr.c:68
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define Assert(condition)
Definition: c.h:804
#define elog(elevel,...)
Definition: elog.h:232
int NBuffers
Definition: globals.c:135

◆ BufferAlloc()

static BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1092 of file bufmgr.c.

References Assert, BackendWritebackContext, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_FLAG_MASK, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BUF_USAGECOUNT_ONE, BufferDescriptorGetContentLock, BufferGetLSN, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, INIT_BUFFERTAG, INIT_FORKNUM, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), RelFileNodeBackend::node, PinBuffer(), PinBuffer_Locked(), RelFileNode::relNode, ReservePrivateRefCountEntry(), ScheduleBufferTagForWriteback(), SMgrRelationData::smgr_rnode, RelFileNode::spcNode, StartBufferIO(), StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr, UnpinBuffer(), and XLogNeedsFlush().

Referenced by ReadBuffer_common().

1096 {
1097  BufferTag newTag; /* identity of requested block */
1098  uint32 newHash; /* hash value for newTag */
1099  LWLock *newPartitionLock; /* buffer partition lock for it */
1100  BufferTag oldTag; /* previous identity of selected buffer */
1101  uint32 oldHash; /* hash value for oldTag */
1102  LWLock *oldPartitionLock; /* buffer partition lock for it */
1103  uint32 oldFlags;
1104  int buf_id;
1105  BufferDesc *buf;
1106  bool valid;
1107  uint32 buf_state;
1108 
1109  /* create a tag so we can lookup the buffer */
1110  INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
1111 
1112  /* determine its hash code and partition lock ID */
1113  newHash = BufTableHashCode(&newTag);
1114  newPartitionLock = BufMappingPartitionLock(newHash);
1115 
1116  /* see if the block is in the buffer pool already */
1117  LWLockAcquire(newPartitionLock, LW_SHARED);
1118  buf_id = BufTableLookup(&newTag, newHash);
1119  if (buf_id >= 0)
1120  {
1121  /*
1122  * Found it. Now, pin the buffer so no one can steal it from the
1123  * buffer pool, and check to see if the correct data has been loaded
1124  * into the buffer.
1125  */
1126  buf = GetBufferDescriptor(buf_id);
1127 
1128  valid = PinBuffer(buf, strategy);
1129 
1130  /* Can release the mapping lock as soon as we've pinned it */
1131  LWLockRelease(newPartitionLock);
1132 
1133  *foundPtr = true;
1134 
1135  if (!valid)
1136  {
1137  /*
1138  * We can only get here if (a) someone else is still reading in
1139  * the page, or (b) a previous read attempt failed. We have to
1140  * wait for any active read attempt to finish, and then set up our
1141  * own read attempt if the page is still not BM_VALID.
1142  * StartBufferIO does it all.
1143  */
1144  if (StartBufferIO(buf, true))
1145  {
1146  /*
1147  * If we get here, previous attempts to read the buffer must
1148  * have failed ... but we shall bravely try again.
1149  */
1150  *foundPtr = false;
1151  }
1152  }
1153 
1154  return buf;
1155  }
1156 
1157  /*
1158  * Didn't find it in the buffer pool. We'll have to initialize a new
1159  * buffer. Remember to unlock the mapping lock while doing the work.
1160  */
1161  LWLockRelease(newPartitionLock);
1162 
1163  /* Loop here in case we have to try another victim buffer */
1164  for (;;)
1165  {
1166  /*
1167  * Ensure, while the spinlock's not yet held, that there's a free
1168  * refcount entry.
1169  */
1171 
1172  /*
1173  * Select a victim buffer. The buffer is returned with its header
1174  * spinlock still held!
1175  */
1176  buf = StrategyGetBuffer(strategy, &buf_state);
1177 
1178  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1179 
1180  /* Must copy buffer flags while we still hold the spinlock */
1181  oldFlags = buf_state & BUF_FLAG_MASK;
1182 
1183  /* Pin the buffer and then release the buffer spinlock */
1184  PinBuffer_Locked(buf);
1185 
1186  /*
1187  * If the buffer was dirty, try to write it out. There is a race
1188  * condition here, in that someone might dirty it after we released it
1189  * above, or even while we are writing it out (since our share-lock
1190  * won't prevent hint-bit updates). We will recheck the dirty bit
1191  * after re-locking the buffer header.
1192  */
1193  if (oldFlags & BM_DIRTY)
1194  {
1195  /*
1196  * We need a share-lock on the buffer contents to write it out
1197  * (else we might write invalid data, eg because someone else is
1198  * compacting the page contents while we write). We must use a
1199  * conditional lock acquisition here to avoid deadlock. Even
1200  * though the buffer was not pinned (and therefore surely not
1201  * locked) when StrategyGetBuffer returned it, someone else could
1202  * have pinned and exclusive-locked it by the time we get here. If
1203  * we try to get the lock unconditionally, we'd block waiting for
1204  * them; if they later block waiting for us, deadlock ensues.
1205  * (This has been observed to happen when two backends are both
1206  * trying to split btree index pages, and the second one just
1207  * happens to be trying to split the page the first one got from
1208  * StrategyGetBuffer.)
1209  */
1211  LW_SHARED))
1212  {
1213  /*
1214  * If using a nondefault strategy, and writing the buffer
1215  * would require a WAL flush, let the strategy decide whether
1216  * to go ahead and write/reuse the buffer or to choose another
1217  * victim. We need lock to inspect the page LSN, so this
1218  * can't be done inside StrategyGetBuffer.
1219  */
1220  if (strategy != NULL)
1221  {
1222  XLogRecPtr lsn;
1223 
1224  /* Read the LSN while holding buffer header lock */
1225  buf_state = LockBufHdr(buf);
1226  lsn = BufferGetLSN(buf);
1227  UnlockBufHdr(buf, buf_state);
1228 
1229  if (XLogNeedsFlush(lsn) &&
1230  StrategyRejectBuffer(strategy, buf))
1231  {
1232  /* Drop lock/pin and loop around for another buffer */
1234  UnpinBuffer(buf, true);
1235  continue;
1236  }
1237  }
1238 
1239  /* OK, do the I/O */
1240  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
1241  smgr->smgr_rnode.node.spcNode,
1242  smgr->smgr_rnode.node.dbNode,
1243  smgr->smgr_rnode.node.relNode);
1244 
1245  FlushBuffer(buf, NULL);
1247 
1249  &buf->tag);
1250 
1251  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
1252  smgr->smgr_rnode.node.spcNode,
1253  smgr->smgr_rnode.node.dbNode,
1254  smgr->smgr_rnode.node.relNode);
1255  }
1256  else
1257  {
1258  /*
1259  * Someone else has locked the buffer, so give it up and loop
1260  * back to get another one.
1261  */
1262  UnpinBuffer(buf, true);
1263  continue;
1264  }
1265  }
1266 
1267  /*
1268  * To change the association of a valid buffer, we'll need to have
1269  * exclusive lock on both the old and new mapping partitions.
1270  */
1271  if (oldFlags & BM_TAG_VALID)
1272  {
1273  /*
1274  * Need to compute the old tag's hashcode and partition lock ID.
1275  * XXX is it worth storing the hashcode in BufferDesc so we need
1276  * not recompute it here? Probably not.
1277  */
1278  oldTag = buf->tag;
1279  oldHash = BufTableHashCode(&oldTag);
1280  oldPartitionLock = BufMappingPartitionLock(oldHash);
1281 
1282  /*
1283  * Must lock the lower-numbered partition first to avoid
1284  * deadlocks.
1285  */
1286  if (oldPartitionLock < newPartitionLock)
1287  {
1288  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1289  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1290  }
1291  else if (oldPartitionLock > newPartitionLock)
1292  {
1293  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1294  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1295  }
1296  else
1297  {
1298  /* only one partition, only one lock */
1299  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1300  }
1301  }
1302  else
1303  {
1304  /* if it wasn't valid, we need only the new partition */
1305  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1306  /* remember we have no old-partition lock or tag */
1307  oldPartitionLock = NULL;
1308  /* keep the compiler quiet about uninitialized variables */
1309  oldHash = 0;
1310  }
1311 
1312  /*
1313  * Try to make a hashtable entry for the buffer under its new tag.
1314  * This could fail because while we were writing someone else
1315  * allocated another buffer for the same block we want to read in.
1316  * Note that we have not yet removed the hashtable entry for the old
1317  * tag.
1318  */
1319  buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
1320 
1321  if (buf_id >= 0)
1322  {
1323  /*
1324  * Got a collision. Someone has already done what we were about to
1325  * do. We'll just handle this as if it were found in the buffer
1326  * pool in the first place. First, give up the buffer we were
1327  * planning to use.
1328  */
1329  UnpinBuffer(buf, true);
1330 
1331  /* Can give up that buffer's mapping partition lock now */
1332  if (oldPartitionLock != NULL &&
1333  oldPartitionLock != newPartitionLock)
1334  LWLockRelease(oldPartitionLock);
1335 
1336  /* remaining code should match code at top of routine */
1337 
1338  buf = GetBufferDescriptor(buf_id);
1339 
1340  valid = PinBuffer(buf, strategy);
1341 
1342  /* Can release the mapping lock as soon as we've pinned it */
1343  LWLockRelease(newPartitionLock);
1344 
1345  *foundPtr = true;
1346 
1347  if (!valid)
1348  {
1349  /*
1350  * We can only get here if (a) someone else is still reading
1351  * in the page, or (b) a previous read attempt failed. We
1352  * have to wait for any active read attempt to finish, and
1353  * then set up our own read attempt if the page is still not
1354  * BM_VALID. StartBufferIO does it all.
1355  */
1356  if (StartBufferIO(buf, true))
1357  {
1358  /*
1359  * If we get here, previous attempts to read the buffer
1360  * must have failed ... but we shall bravely try again.
1361  */
1362  *foundPtr = false;
1363  }
1364  }
1365 
1366  return buf;
1367  }
1368 
1369  /*
1370  * Need to lock the buffer header too in order to change its tag.
1371  */
1372  buf_state = LockBufHdr(buf);
1373 
1374  /*
1375  * Somebody could have pinned or re-dirtied the buffer while we were
1376  * doing the I/O and making the new hashtable entry. If so, we can't
1377  * recycle this buffer; we must undo everything we've done and start
1378  * over with a new victim buffer.
1379  */
1380  oldFlags = buf_state & BUF_FLAG_MASK;
1381  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
1382  break;
1383 
1384  UnlockBufHdr(buf, buf_state);
1385  BufTableDelete(&newTag, newHash);
1386  if (oldPartitionLock != NULL &&
1387  oldPartitionLock != newPartitionLock)
1388  LWLockRelease(oldPartitionLock);
1389  LWLockRelease(newPartitionLock);
1390  UnpinBuffer(buf, true);
1391  }
1392 
1393  /*
1394  * Okay, it's finally safe to rename the buffer.
1395  *
1396  * Clearing BM_VALID here is necessary, clearing the dirtybits is just
1397  * paranoia. We also reset the usage_count since any recency of use of
1398  * the old content is no longer relevant. (The usage_count starts out at
1399  * 1 so that the buffer can survive one clock-sweep pass.)
1400  *
1401  * Make sure BM_PERMANENT is set for buffers that must be written at every
1402  * checkpoint. Unlogged buffers only need to be written at shutdown
1403  * checkpoints, except for their "init" forks, which need to be treated
1404  * just like permanent relations.
1405  */
1406  buf->tag = newTag;
1407  buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
1410  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1411  buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
1412  else
1413  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1414 
1415  UnlockBufHdr(buf, buf_state);
1416 
1417  if (oldPartitionLock != NULL)
1418  {
1419  BufTableDelete(&oldTag, oldHash);
1420  if (oldPartitionLock != newPartitionLock)
1421  LWLockRelease(oldPartitionLock);
1422  }
1423 
1424  LWLockRelease(newPartitionLock);
1425 
1426  /*
1427  * Buffer contents are currently invalid. Try to obtain the right to
1428  * start I/O. If StartBufferIO returns false, then someone else managed
1429  * to read it before we did, so there's nothing left for BufferAlloc() to
1430  * do.
1431  */
1432  if (StartBufferIO(buf, true))
1433  *foundPtr = false;
1434  else
1435  *foundPtr = true;
1436 
1437  return buf;
1438 }
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:1677
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
Definition: freelist.c:201
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
Definition: lwlock.h:31
#define BM_PERMANENT
Definition: buf_internals.h:67
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3184
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2812
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4398
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:119
void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
Definition: bufmgr.c:4738
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
WritebackContext BackendWritebackContext
Definition: buf_init.c:23
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1370
static char * buf
Definition: pg_test_fsync.c:68
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1825
#define BM_VALID
Definition: buf_internals.h:60
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf)
Definition: freelist.c:686
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1780
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
#define UnlockBufHdr(desc, s)
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:61
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 2752 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, GetLocalBufferDescriptor, and BufferDesc::tag.

Referenced by _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_prune_chain(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and XLogReadBufferExtended().

2753 {
2754  BufferDesc *bufHdr;
2755 
2756  Assert(BufferIsPinned(buffer));
2757 
2758  if (BufferIsLocal(buffer))
2759  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2760  else
2761  bufHdr = GetBufferDescriptor(buffer - 1);
2762 
2763  /* pinned, so OK to read tag without spinlock */
2764  return bufHdr->tag.blockNum;
2765 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
BufferTag tag

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 3012 of file bufmgr.c.

References Assert, BufferGetPage, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, LockBufHdr(), PageGetLSN, UnlockBufHdr, and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

3013 {
3014  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
3015  char *page = BufferGetPage(buffer);
3016  XLogRecPtr lsn;
3017  uint32 buf_state;
3018 
3019  /*
3020  * If we don't need locking for correctness, fastpath out.
3021  */
3022  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
3023  return PageGetLSN(page);
3024 
3025  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3026  Assert(BufferIsValid(buffer));
3027  Assert(BufferIsPinned(buffer));
3028 
3029  buf_state = LockBufHdr(bufHdr);
3030  lsn = PageGetLSN(page);
3031  UnlockBufHdr(bufHdr, buf_state);
3032 
3033  return lsn;
3034 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define PageGetLSN(page)
Definition: bufpage.h:366
#define UnlockBufHdr(desc, s)
#define XLogHintBitIsNeeded()
Definition: xlog.h:177

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileNode rnode,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 2773 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, buftag::rnode, and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

2775 {
2776  BufferDesc *bufHdr;
2777 
2778  /* Do the same checks as BufferGetBlockNumber. */
2779  Assert(BufferIsPinned(buffer));
2780 
2781  if (BufferIsLocal(buffer))
2782  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2783  else
2784  bufHdr = GetBufferDescriptor(buffer - 1);
2785 
2786  /* pinned, so OK to read tag without spinlock */
2787  *rnode = bufHdr->tag.rnode;
2788  *forknum = bufHdr->tag.forkNum;
2789  *blknum = bufHdr->tag.blockNum;
2790 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 2982 of file bufmgr.c.

References Assert, BM_PERMANENT, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

2983 {
2984  BufferDesc *bufHdr;
2985 
2986  /* Local buffers are used only for temp relations. */
2987  if (BufferIsLocal(buffer))
2988  return false;
2989 
2990  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2991  Assert(BufferIsValid(buffer));
2992  Assert(BufferIsPinned(buffer));
2993 
2994  /*
2995  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
2996  * need not bother with the buffer header spinlock. Even if someone else
2997  * changes the buffer header state while we're doing this, the state is
2998  * changed atomically, so we'll read the old value or the new value, but
2999  * not random garbage.
3000  */
3001  bufHdr = GetBufferDescriptor(buffer - 1);
3002  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
3003 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define BM_PERMANENT
Definition: buf_internals.h:67
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
pg_atomic_uint32 state
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 1924 of file bufmgr.c.

References Assert, BgWriterStats, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, CurrentResourceOwner, DatumGetPointer, buftag::forkNum, CkptSortItem::forkNum, GetBufferDescriptor, i, CkptTsStatus::index, InvalidOid, IssuePendingWritebacks(), LockBufHdr(), PgStat_MsgBgWriter::m_buf_written_checkpoints, NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), pfree(), pg_atomic_read_u32(), PointerGetDatum, ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, RelFileNode::relNode, CkptSortItem::relNode, repalloc(), ResourceOwnerEnlargeBuffers(), buftag::rnode, RelFileNode::spcNode, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr, and WritebackContextInit().

Referenced by CheckPointBuffers().

1925 {
1926  uint32 buf_state;
1927  int buf_id;
1928  int num_to_scan;
1929  int num_spaces;
1930  int num_processed;
1931  int num_written;
1932  CkptTsStatus *per_ts_stat = NULL;
1933  Oid last_tsid;
1934  binaryheap *ts_heap;
1935  int i;
1936  int mask = BM_DIRTY;
1937  WritebackContext wb_context;
1938 
1939  /* Make sure we can handle the pin inside SyncOneBuffer */
1941 
1942  /*
1943  * Unless this is a shutdown checkpoint or we have been explicitly told,
1944  * we write only permanent, dirty buffers. But at shutdown or end of
1945  * recovery, we write all dirty buffers.
1946  */
1949  mask |= BM_PERMANENT;
1950 
1951  /*
1952  * Loop over all buffers, and mark the ones that need to be written with
1953  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
1954  * can estimate how much work needs to be done.
1955  *
1956  * This allows us to write only those pages that were dirty when the
1957  * checkpoint began, and not those that get dirtied while it proceeds.
1958  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
1959  * later in this function, or by normal backends or the bgwriter cleaning
1960  * scan, the flag is cleared. Any buffer dirtied after this point won't
1961  * have the flag set.
1962  *
1963  * Note that if we fail to write some buffer, we may leave buffers with
1964  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
1965  * certainly need to be written for the next checkpoint attempt, too.
1966  */
1967  num_to_scan = 0;
1968  for (buf_id = 0; buf_id < NBuffers; buf_id++)
1969  {
1970  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
1971 
1972  /*
1973  * Header spinlock is enough to examine BM_DIRTY, see comment in
1974  * SyncOneBuffer.
1975  */
1976  buf_state = LockBufHdr(bufHdr);
1977 
1978  if ((buf_state & mask) == mask)
1979  {
1980  CkptSortItem *item;
1981 
1982  buf_state |= BM_CHECKPOINT_NEEDED;
1983 
1984  item = &CkptBufferIds[num_to_scan++];
1985  item->buf_id = buf_id;
1986  item->tsId = bufHdr->tag.rnode.spcNode;
1987  item->relNode = bufHdr->tag.rnode.relNode;
1988  item->forkNum = bufHdr->tag.forkNum;
1989  item->blockNum = bufHdr->tag.blockNum;
1990  }
1991 
1992  UnlockBufHdr(bufHdr, buf_state);
1993 
1994  /* Check for barrier events in case NBuffers is large. */
1997  }
1998 
1999  if (num_to_scan == 0)
2000  return; /* nothing to do */
2001 
2003 
2004  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
2005 
2006  /*
2007  * Sort buffers that need to be written to reduce the likelihood of random
2008  * IO. The sorting is also important for the implementation of balancing
2009  * writes between tablespaces. Without balancing writes we'd potentially
2010  * end up writing to the tablespaces one-by-one; possibly overloading the
2011  * underlying system.
2012  */
2013  sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
2014 
2015  num_spaces = 0;
2016 
2017  /*
2018  * Allocate progress status for each tablespace with buffers that need to
2019  * be flushed. This requires the to-be-flushed array to be sorted.
2020  */
2021  last_tsid = InvalidOid;
2022  for (i = 0; i < num_to_scan; i++)
2023  {
2024  CkptTsStatus *s;
2025  Oid cur_tsid;
2026 
2027  cur_tsid = CkptBufferIds[i].tsId;
2028 
2029  /*
2030  * Grow array of per-tablespace status structs, every time a new
2031  * tablespace is found.
2032  */
2033  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
2034  {
2035  Size sz;
2036 
2037  num_spaces++;
2038 
2039  /*
2040  * Not worth adding grow-by-power-of-2 logic here - even with a
2041  * few hundred tablespaces this should be fine.
2042  */
2043  sz = sizeof(CkptTsStatus) * num_spaces;
2044 
2045  if (per_ts_stat == NULL)
2046  per_ts_stat = (CkptTsStatus *) palloc(sz);
2047  else
2048  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
2049 
2050  s = &per_ts_stat[num_spaces - 1];
2051  memset(s, 0, sizeof(*s));
2052  s->tsId = cur_tsid;
2053 
2054  /*
2055  * The first buffer in this tablespace. As CkptBufferIds is sorted
2056  * by tablespace all (s->num_to_scan) buffers in this tablespace
2057  * will follow afterwards.
2058  */
2059  s->index = i;
2060 
2061  /*
2062  * progress_slice will be determined once we know how many buffers
2063  * are in each tablespace, i.e. after this loop.
2064  */
2065 
2066  last_tsid = cur_tsid;
2067  }
2068  else
2069  {
2070  s = &per_ts_stat[num_spaces - 1];
2071  }
2072 
2073  s->num_to_scan++;
2074 
2075  /* Check for barrier events. */
2078  }
2079 
2080  Assert(num_spaces > 0);
2081 
2082  /*
2083  * Build a min-heap over the write-progress in the individual tablespaces,
2084  * and compute how large a portion of the total progress a single
2085  * processed buffer is.
2086  */
2087  ts_heap = binaryheap_allocate(num_spaces,
2089  NULL);
2090 
2091  for (i = 0; i < num_spaces; i++)
2092  {
2093  CkptTsStatus *ts_stat = &per_ts_stat[i];
2094 
2095  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
2096 
2097  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
2098  }
2099 
2100  binaryheap_build(ts_heap);
2101 
2102  /*
2103  * Iterate through to-be-checkpointed buffers and write the ones (still)
2104  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
2105  * tablespaces; otherwise the sorting would lead to only one tablespace
2106  * receiving writes at a time, making inefficient use of the hardware.
2107  */
2108  num_processed = 0;
2109  num_written = 0;
2110  while (!binaryheap_empty(ts_heap))
2111  {
2112  BufferDesc *bufHdr = NULL;
2113  CkptTsStatus *ts_stat = (CkptTsStatus *)
2115 
2116  buf_id = CkptBufferIds[ts_stat->index].buf_id;
2117  Assert(buf_id != -1);
2118 
2119  bufHdr = GetBufferDescriptor(buf_id);
2120 
2121  num_processed++;
2122 
2123  /*
2124  * We don't need to acquire the lock here, because we're only looking
2125  * at a single bit. It's possible that someone else writes the buffer
2126  * and clears the flag right after we check, but that doesn't matter
2127  * since SyncOneBuffer will then do nothing. However, there is a
2128  * further race condition: it's conceivable that between the time we
2129  * examine the bit here and the time SyncOneBuffer acquires the lock,
2130  * someone else not only wrote the buffer but replaced it with another
2131  * page and dirtied it. In that improbable case, SyncOneBuffer will
2132  * write the buffer though we didn't need to. It doesn't seem worth
2133  * guarding against this, though.
2134  */
2136  {
2137  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
2138  {
2139  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
2141  num_written++;
2142  }
2143  }
2144 
2145  /*
2146  * Measure progress independent of actually having to flush the buffer
2147  * - otherwise writing become unbalanced.
2148  */
2149  ts_stat->progress += ts_stat->progress_slice;
2150  ts_stat->num_scanned++;
2151  ts_stat->index++;
2152 
2153  /* Have all the buffers from the tablespace been processed? */
2154  if (ts_stat->num_scanned == ts_stat->num_to_scan)
2155  {
2156  binaryheap_remove_first(ts_heap);
2157  }
2158  else
2159  {
2160  /* update heap with the new progress */
2161  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
2162  }
2163 
2164  /*
2165  * Sleep to throttle our I/O rate.
2166  *
2167  * (This will check for barrier events even if it doesn't sleep.)
2168  */
2169  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
2170  }
2171 
2172  /* issue all pending flushes */
2173  IssuePendingWritebacks(&wb_context);
2174 
2175  pfree(per_ts_stat);
2176  per_ts_stat = NULL;
2177  binaryheap_free(ts_heap);
2178 
2179  /*
2180  * Update checkpoint statistics. As noted above, this doesn't include
2181  * buffers written by other backends or bgwriter scan.
2182  */
2183  CheckpointStats.ckpt_bufs_written += num_written;
2184 
2185  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
2186 }
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:694
PgStat_Counter m_buf_written_checkpoints
Definition: pgstat.h:475
#define BM_PERMANENT
Definition: buf_internals.h:67
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:201
Oid tsId
Definition: bufmgr.c:97
#define binaryheap_empty(h)
Definition: binaryheap.h:52
ForkNumber forkNum
Definition: buf_internals.h:94
#define PointerGetDatum(X)
Definition: postgres.h:600
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:453
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:4703
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:131
int checkpoint_flush_after
Definition: bufmgr.c:156
void binaryheap_replace_first(binaryheap *heap, Datum d)
Definition: binaryheap.c:204
unsigned int Oid
Definition: postgres_ext.h:31
#define BM_DIRTY
Definition: buf_internals.h:59
void binaryheap_add_unordered(binaryheap *heap, Datum d)
Definition: binaryheap.c:110
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2503
void IssuePendingWritebacks(WritebackContext *context)
Definition: bufmgr.c:4779
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:4726
void pfree(void *pointer)
Definition: mcxt.c:1169
double float8
Definition: c.h:565
Datum binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:159
int num_to_scan
Definition: bufmgr.c:110
float8 progress_slice
Definition: bufmgr.c:107
int index
Definition: bufmgr.c:115
float8 progress
Definition: bufmgr.c:106
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:197
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
#define BUF_WRITTEN
Definition: bufmgr.c:68
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
int ckpt_bufs_written
Definition: xlog.h:227
BlockNumber blockNum
#define InvalidOid
Definition: postgres_ext.h:36
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:126
const symbol * s
Definition: header.h:17
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define Assert(condition)
Definition: c.h:804
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:37
CheckpointStatsData CheckpointStats
Definition: xlog.c:188
CkptSortItem * CkptBufferIds
Definition: buf_init.c:24
size_t Size
Definition: c.h:540
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:69
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:33
#define DatumGetPointer(X)
Definition: postgres.h:593
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
pg_atomic_uint32 state
Datum binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:174
int num_scanned
Definition: bufmgr.c:112
ForkNumber forkNum
struct CkptTsStatus CkptTsStatus
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:196
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag a,
const BufferTag b 
)
inlinestatic

Definition at line 4643 of file bufmgr.c.

References buftag::blockNum, buftag::forkNum, buftag::rnode, and rnode_comparator().

4644 {
4645  int ret;
4646 
4647  ret = rnode_comparator(&ba->rnode, &bb->rnode);
4648 
4649  if (ret != 0)
4650  return ret;
4651 
4652  if (ba->forkNum < bb->forkNum)
4653  return -1;
4654  if (ba->forkNum > bb->forkNum)
4655  return 1;
4656 
4657  if (ba->blockNum < bb->blockNum)
4658  return -1;
4659  if (ba->blockNum > bb->blockNum)
4660  return 1;
4661 
4662  return 0;
4663 }
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4564

◆ BufmgrCommit()

void BufmgrCommit ( void  )

Definition at line 2738 of file bufmgr.c.

Referenced by PrepareTransaction(), and RecordTransactionCommit().

2739 {
2740  /* Nothing to do in bufmgr anymore... */
2741 }

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 2644 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, PrintBufferLeakWarning(), PrivateRefCountArray, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

2645 {
2646 #ifdef USE_ASSERT_CHECKING
2647  int RefCountErrors = 0;
2648  PrivateRefCountEntry *res;
2649  int i;
2650 
2651  /* check the array */
2652  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
2653  {
2654  res = &PrivateRefCountArray[i];
2655 
2656  if (res->buffer != InvalidBuffer)
2657  {
2659  RefCountErrors++;
2660  }
2661  }
2662 
2663  /* if necessary search the hash */
2665  {
2666  HASH_SEQ_STATUS hstat;
2667 
2669  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
2670  {
2672  RefCountErrors++;
2673  }
2674 
2675  }
2676 
2677  Assert(RefCountErrors == 0);
2678 #endif
2679 }
void PrintBufferLeakWarning(Buffer buffer)
Definition: bufmgr.c:2685
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int i

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 2728 of file bufmgr.c.

References BufferSync().

Referenced by CheckPointGuts().

2729 {
2730  BufferSync(flags);
2731 }
static void BufferSync(int flags)
Definition: bufmgr.c:1924

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 4672 of file bufmgr.c.

References CkptSortItem::blockNum, CkptSortItem::forkNum, CkptSortItem::relNode, and CkptSortItem::tsId.

4673 {
4674  /* compare tablespace */
4675  if (a->tsId < b->tsId)
4676  return -1;
4677  else if (a->tsId > b->tsId)
4678  return 1;
4679  /* compare relation */
4680  if (a->relNode < b->relNode)
4681  return -1;
4682  else if (a->relNode > b->relNode)
4683  return 1;
4684  /* compare fork */
4685  else if (a->forkNum < b->forkNum)
4686  return -1;
4687  else if (a->forkNum > b->forkNum)
4688  return 1;
4689  /* compare block number */
4690  else if (a->blockNum < b->blockNum)
4691  return -1;
4692  else if (a->blockNum > b->blockNum)
4693  return 1;
4694  /* equal page IDs are unlikely, but not impossible */
4695  return 0;
4696 }
BlockNumber blockNum
ForkNumber forkNum

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 4037 of file bufmgr.c.

References Assert, buf, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

4038 {
4039  BufferDesc *buf;
4040 
4041  Assert(BufferIsPinned(buffer));
4042  if (BufferIsLocal(buffer))
4043  return true; /* act as though we got it */
4044 
4045  buf = GetBufferDescriptor(buffer - 1);
4046 
4048  LW_EXCLUSIVE);
4049 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1370
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 4245 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid, ConditionalLockBuffer(), GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr.

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

4246 {
4247  BufferDesc *bufHdr;
4248  uint32 buf_state,
4249  refcount;
4250 
4251  Assert(BufferIsValid(buffer));
4252 
4253  if (BufferIsLocal(buffer))
4254  {
4255  refcount = LocalRefCount[-buffer - 1];
4256  /* There should be exactly one pin */
4257  Assert(refcount > 0);
4258  if (refcount != 1)
4259  return false;
4260  /* Nobody else to wait for */
4261  return true;
4262  }
4263 
4264  /* There should be exactly one local pin */
4265  refcount = GetPrivateRefCount(buffer);
4266  Assert(refcount);
4267  if (refcount != 1)
4268  return false;
4269 
4270  /* Try to acquire lock */
4271  if (!ConditionalLockBuffer(buffer))
4272  return false;
4273 
4274  bufHdr = GetBufferDescriptor(buffer - 1);
4275  buf_state = LockBufHdr(bufHdr);
4276  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
4277 
4278  Assert(refcount > 0);
4279  if (refcount == 1)
4280  {
4281  /* Successfully acquired exclusive lock with pincount 1 */
4282  UnlockBufHdr(bufHdr, buf_state);
4283  return true;
4284  }
4285 
4286  /* Failed, so release the lock */
4287  UnlockBufHdr(bufHdr, buf_state);
4288  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4289  return false;
4290 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:4037
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4011
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 3412 of file bufmgr.c.

References buftag::blockNum, buf, BufferDescriptorGetBuffer, RelFileNode::dbNode, elog, buftag::forkNum, BufferDesc::freeNext, GetBufferDescriptor, GetPrivateRefCount(), i, InvalidateBuffer(), InvalidBackendId, LockBufHdr(), LOG, NBuffers, relpathbackend, relpathperm, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by dbase_redo(), dropdb(), and movedb().

3413 {
3414  int i;
3415 
3416  /*
3417  * We needn't consider local buffers, since by assumption the target
3418  * database isn't our own.
3419  */
3420 
3421  for (i = 0; i < NBuffers; i++)
3422  {
3423  BufferDesc *bufHdr = GetBufferDescriptor(i);
3424  uint32 buf_state;
3425 
3426  /*
3427  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3428  * and saves some cycles.
3429  */
3430  if (bufHdr->tag.rnode.dbNode != dbid)
3431  continue;
3432 
3433  buf_state = LockBufHdr(bufHdr);
3434  if (bufHdr->tag.rnode.dbNode == dbid)
3435  InvalidateBuffer(bufHdr); /* releases spinlock */
3436  else
3437  UnlockBufHdr(bufHdr, buf_state);
3438  }
3439 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1458
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135

◆ DropRelFileNodeBuffers()

void DropRelFileNodeBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 3058 of file bufmgr.c.

References RelFileNodeBackend::backend, buftag::blockNum, BlockNumberIsValid, BUF_DROP_FULL_SCAN_THRESHOLD, DropRelFileNodeLocalBuffers(), FindAndDropRelFileNodeBuffers(), buftag::forkNum, GetBufferDescriptor, i, InvalidateBuffer(), InvalidBlockNumber, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, RelFileNodeBackend::node, RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, SMgrRelationData::smgr_rnode, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrtruncate().

3060 {
3061  int i;
3062  int j;
3063  RelFileNodeBackend rnode;
3064  BlockNumber nForkBlock[MAX_FORKNUM];
3065  uint64 nBlocksToInvalidate = 0;
3066 
3067  rnode = smgr_reln->smgr_rnode;
3068 
3069  /* If it's a local relation, it's localbuf.c's problem. */
3070  if (RelFileNodeBackendIsTemp(rnode))
3071  {
3072  if (rnode.backend == MyBackendId)
3073  {
3074  for (j = 0; j < nforks; j++)
3075  DropRelFileNodeLocalBuffers(rnode.node, forkNum[j],
3076  firstDelBlock[j]);
3077  }
3078  return;
3079  }
3080 
3081  /*
3082  * To remove all the pages of the specified relation forks from the buffer
3083  * pool, we need to scan the entire buffer pool but we can optimize it by
3084  * finding the buffers from BufMapping table provided we know the exact
3085  * size of each fork of the relation. The exact size is required to ensure
3086  * that we don't leave any buffer for the relation being dropped as
3087  * otherwise the background writer or checkpointer can lead to a PANIC
3088  * error while flushing buffers corresponding to files that don't exist.
3089  *
3090  * To know the exact size, we rely on the size cached for each fork by us
3091  * during recovery which limits the optimization to recovery and on
3092  * standbys but we can easily extend it once we have shared cache for
3093  * relation size.
3094  *
3095  * In recovery, we cache the value returned by the first lseek(SEEK_END)
3096  * and the future writes keeps the cached value up-to-date. See
3097  * smgrextend. It is possible that the value of the first lseek is smaller
3098  * than the actual number of existing blocks in the file due to buggy
3099  * Linux kernels that might not have accounted for the recent write. But
3100  * that should be fine because there must not be any buffers after that
3101  * file size.
3102  */
3103  for (i = 0; i < nforks; i++)
3104  {
3105  /* Get the number of blocks for a relation's fork */
3106  nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
3107 
3108  if (nForkBlock[i] == InvalidBlockNumber)
3109  {
3110  nBlocksToInvalidate = InvalidBlockNumber;
3111  break;
3112  }
3113 
3114  /* calculate the number of blocks to be invalidated */
3115  nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
3116  }
3117 
3118  /*
3119  * We apply the optimization iff the total number of blocks to invalidate
3120  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3121  */
3122  if (BlockNumberIsValid(nBlocksToInvalidate) &&
3123  nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3124  {
3125  for (j = 0; j < nforks; j++)
3126  FindAndDropRelFileNodeBuffers(rnode.node, forkNum[j],
3127  nForkBlock[j], firstDelBlock[j]);
3128  return;
3129  }
3130 
3131  for (i = 0; i < NBuffers; i++)
3132  {
3133  BufferDesc *bufHdr = GetBufferDescriptor(i);
3134  uint32 buf_state;
3135 
3136  /*
3137  * We can make this a tad faster by prechecking the buffer tag before
3138  * we attempt to lock the buffer; this saves a lot of lock
3139  * acquisitions in typical cases. It should be safe because the
3140  * caller must have AccessExclusiveLock on the relation, or some other
3141  * reason to be certain that no one is loading new pages of the rel
3142  * into the buffer pool. (Otherwise we might well miss such pages
3143  * entirely.) Therefore, while the tag might be changing while we
3144  * look at it, it can't be changing *to* a value we care about, only
3145  * *away* from such a value. So false negatives are impossible, and
3146  * false positives are safe because we'll recheck after getting the
3147  * buffer lock.
3148  *
3149  * We could check forkNum and blockNum as well as the rnode, but the
3150  * incremental win from doing so seems small.
3151  */
3152  if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
3153  continue;
3154 
3155  buf_state = LockBufHdr(bufHdr);
3156 
3157  for (j = 0; j < nforks; j++)
3158  {
3159  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
3160  bufHdr->tag.forkNum == forkNum[j] &&
3161  bufHdr->tag.blockNum >= firstDelBlock[j])
3162  {
3163  InvalidateBuffer(bufHdr); /* releases spinlock */
3164  break;
3165  }
3166  }
3167  if (j >= nforks)
3168  UnlockBufHdr(bufHdr, buf_state);
3169  }
3170 }
BackendId MyBackendId
Definition: globals.c:84
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ForkNumber forkNum
Definition: buf_internals.h:94
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1458
void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:326
uint32 BlockNumber
Definition: block.h:31
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:79
static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3351
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:572
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
BackendId backend
Definition: relfilenode.h:75
#define InvalidBlockNumber
Definition: block.h:33
#define MAX_FORKNUM
Definition: relpath.h:55
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ DropRelFileNodesAllBuffers()

void DropRelFileNodesAllBuffers ( SMgrRelation smgr_reln,
int  nnodes 
)

Definition at line 3182 of file bufmgr.c.

References BlockNumberIsValid, BUF_DROP_FULL_SCAN_THRESHOLD, DropRelFileNodeAllLocalBuffers(), FindAndDropRelFileNodeBuffers(), GetBufferDescriptor, i, InvalidateBuffer(), InvalidBlockNumber, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, SMgrRelationData::node, palloc(), pfree(), pg_qsort(), RelFileNodeBackendIsTemp, RelFileNodeEquals, RELS_BSEARCH_THRESHOLD, buftag::rnode, rnode_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrdounlinkall().

3183 {
3184  int i;
3185  int j;
3186  int n = 0;
3187  SMgrRelation *rels;
3188  BlockNumber (*block)[MAX_FORKNUM + 1];
3189  uint64 nBlocksToInvalidate = 0;
3190  RelFileNode *nodes;
3191  bool cached = true;
3192  bool use_bsearch;
3193 
3194  if (nnodes == 0)
3195  return;
3196 
3197  rels = palloc(sizeof(SMgrRelation) * nnodes); /* non-local relations */
3198 
3199  /* If it's a local relation, it's localbuf.c's problem. */
3200  for (i = 0; i < nnodes; i++)
3201  {
3202  if (RelFileNodeBackendIsTemp(smgr_reln[i]->smgr_rnode))
3203  {
3204  if (smgr_reln[i]->smgr_rnode.backend == MyBackendId)
3205  DropRelFileNodeAllLocalBuffers(smgr_reln[i]->smgr_rnode.node);
3206  }
3207  else
3208  rels[n++] = smgr_reln[i];
3209  }
3210 
3211  /*
3212  * If there are no non-local relations, then we're done. Release the
3213  * memory and return.
3214  */
3215  if (n == 0)
3216  {
3217  pfree(rels);
3218  return;
3219  }
3220 
3221  /*
3222  * This is used to remember the number of blocks for all the relations
3223  * forks.
3224  */
3225  block = (BlockNumber (*)[MAX_FORKNUM + 1])
3226  palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
3227 
3228  /*
3229  * We can avoid scanning the entire buffer pool if we know the exact size
3230  * of each of the given relation forks. See DropRelFileNodeBuffers.
3231  */
3232  for (i = 0; i < n && cached; i++)
3233  {
3234  for (j = 0; j <= MAX_FORKNUM; j++)
3235  {
3236  /* Get the number of blocks for a relation's fork. */
3237  block[i][j] = smgrnblocks_cached(rels[i], j);
3238 
3239  /* We need to only consider the relation forks that exists. */
3240  if (block[i][j] == InvalidBlockNumber)
3241  {
3242  if (!smgrexists(rels[i], j))
3243  continue;
3244  cached = false;
3245  break;
3246  }
3247 
3248  /* calculate the total number of blocks to be invalidated */
3249  nBlocksToInvalidate += block[i][j];
3250  }
3251  }
3252 
3253  /*
3254  * We apply the optimization iff the total number of blocks to invalidate
3255  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3256  */
3257  if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3258  {
3259  for (i = 0; i < n; i++)
3260  {
3261  for (j = 0; j <= MAX_FORKNUM; j++)
3262  {
3263  /* ignore relation forks that doesn't exist */
3264  if (!BlockNumberIsValid(block[i][j]))
3265  continue;
3266 
3267  /* drop all the buffers for a particular relation fork */
3268  FindAndDropRelFileNodeBuffers(rels[i]->smgr_rnode.node,
3269  j, block[i][j], 0);
3270  }
3271  }
3272 
3273  pfree(block);
3274  pfree(rels);
3275  return;
3276  }
3277 
3278  pfree(block);
3279  nodes = palloc(sizeof(RelFileNode) * n); /* non-local relations */
3280  for (i = 0; i < n; i++)
3281  nodes[i] = rels[i]->smgr_rnode.node;
3282 
3283  /*
3284  * For low number of relations to drop just use a simple walk through, to
3285  * save the bsearch overhead. The threshold to use is rather a guess than
3286  * an exactly determined value, as it depends on many factors (CPU and RAM
3287  * speeds, amount of shared buffers etc.).
3288  */
3289  use_bsearch = n > RELS_BSEARCH_THRESHOLD;
3290 
3291  /* sort the list of rnodes if necessary */
3292  if (use_bsearch)
3293  pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
3294 
3295  for (i = 0; i < NBuffers; i++)
3296  {
3297  RelFileNode *rnode = NULL;
3298  BufferDesc *bufHdr = GetBufferDescriptor(i);
3299  uint32 buf_state;
3300 
3301  /*
3302  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3303  * and saves some cycles.
3304  */
3305 
3306  if (!use_bsearch)
3307  {
3308  int j;
3309 
3310  for (j = 0; j < n; j++)
3311  {
3312  if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
3313  {
3314  rnode = &nodes[j];
3315  break;
3316  }
3317  }
3318  }
3319  else
3320  {
3321  rnode = bsearch((const void *) &(bufHdr->tag.rnode),
3322  nodes, n, sizeof(RelFileNode),
3324  }
3325 
3326  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3327  if (rnode == NULL)
3328  continue;
3329 
3330  buf_state = LockBufHdr(bufHdr);
3331  if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
3332  InvalidateBuffer(bufHdr); /* releases spinlock */
3333  else
3334  UnlockBufHdr(bufHdr, buf_state);
3335  }
3336 
3337  pfree(nodes);
3338  pfree(rels);
3339 }
BackendId MyBackendId
Definition: globals.c:84
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1458
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:71
uint32 BlockNumber
Definition: block.h:31
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:79
static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3351
void DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
Definition: localbuf.c:373
void pfree(void *pointer)
Definition: mcxt.c:1169
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:572
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4564
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define InvalidBlockNumber
Definition: block.h:33
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
#define MAX_FORKNUM
Definition: relpath.h:55
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
dlist_node node
Definition: smgr.h:72

◆ FindAndDropRelFileNodeBuffers()

static void FindAndDropRelFileNodeBuffers ( RelFileNode  rnode,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 3351 of file bufmgr.c.

References buftag::blockNum, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), buftag::forkNum, GetBufferDescriptor, INIT_BUFFERTAG, InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeEquals, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

3354 {
3355  BlockNumber curBlock;
3356 
3357  for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
3358  {
3359  uint32 bufHash; /* hash value for tag */
3360  BufferTag bufTag; /* identity of requested block */
3361  LWLock *bufPartitionLock; /* buffer partition lock for it */
3362  int buf_id;
3363  BufferDesc *bufHdr;
3364  uint32 buf_state;
3365 
3366  /* create a tag so we can lookup the buffer */
3367  INIT_BUFFERTAG(bufTag, rnode, forkNum, curBlock);
3368 
3369  /* determine its hash code and partition lock ID */
3370  bufHash = BufTableHashCode(&bufTag);
3371  bufPartitionLock = BufMappingPartitionLock(bufHash);
3372 
3373  /* Check that it is in the buffer pool. If not, do nothing. */
3374  LWLockAcquire(bufPartitionLock, LW_SHARED);
3375  buf_id = BufTableLookup(&bufTag, bufHash);
3376  LWLockRelease(bufPartitionLock);
3377 
3378  if (buf_id < 0)
3379  continue;
3380 
3381  bufHdr = GetBufferDescriptor(buf_id);
3382 
3383  /*
3384  * We need to lock the buffer header and recheck if the buffer is
3385  * still associated with the same block because the buffer could be
3386  * evicted by some other backend loading blocks for a different
3387  * relation after we release lock on the BufMapping table.
3388  */
3389  buf_state = LockBufHdr(bufHdr);
3390 
3391  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
3392  bufHdr->tag.forkNum == forkNum &&
3393  bufHdr->tag.blockNum >= firstDelBlock)
3394  InvalidateBuffer(bufHdr); /* releases spinlock */
3395  else
3396  UnlockBufHdr(bufHdr, buf_state);
3397  }
3398 }
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
ForkNumber forkNum
Definition: buf_internals.h:94
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1458
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
uint32 BlockNumber
Definition: block.h:31
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln 
)
static

Definition at line 2812 of file bufmgr.c.

References ErrorContextCallback::arg, BufferUsage::blk_write_time, buftag::blockNum, BM_JUST_DIRTIED, BM_PERMANENT, BufferGetLSN, BufHdrGetBlock, ErrorContextCallback::callback, RelFileNode::dbNode, error_context_stack, buftag::forkNum, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBackendId, LockBufHdr(), RelFileNodeBackend::node, PageSetChecksumCopy(), pgBufferUsage, pgstat_count_buffer_write_time, ErrorContextCallback::previous, RelFileNode::relNode, buftag::rnode, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rnode, smgropen(), smgrwrite(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr, and XLogFlush().

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

2813 {
2814  XLogRecPtr recptr;
2815  ErrorContextCallback errcallback;
2816  instr_time io_start,
2817  io_time;
2818  Block bufBlock;
2819  char *bufToWrite;
2820  uint32 buf_state;
2821 
2822  /*
2823  * Try to start an I/O operation. If StartBufferIO returns false, then
2824  * someone else flushed the buffer before we could, so we need not do
2825  * anything.
2826  */
2827  if (!StartBufferIO(buf, false))
2828  return;
2829 
2830  /* Setup error traceback support for ereport() */
2832  errcallback.arg = (void *) buf;
2833  errcallback.previous = error_context_stack;
2834  error_context_stack = &errcallback;
2835 
2836  /* Find smgr relation for buffer */
2837  if (reln == NULL)
2838  reln = smgropen(buf->tag.rnode, InvalidBackendId);
2839 
2840  TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
2841  buf->tag.blockNum,
2842  reln->smgr_rnode.node.spcNode,
2843  reln->smgr_rnode.node.dbNode,
2844  reln->smgr_rnode.node.relNode);
2845 
2846  buf_state = LockBufHdr(buf);
2847 
2848  /*
2849  * Run PageGetLSN while holding header lock, since we don't have the
2850  * buffer locked exclusively in all cases.
2851  */
2852  recptr = BufferGetLSN(buf);
2853 
2854  /* To check if block content changes while flushing. - vadim 01/17/97 */
2855  buf_state &= ~BM_JUST_DIRTIED;
2856  UnlockBufHdr(buf, buf_state);
2857 
2858  /*
2859  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
2860  * rule that log updates must hit disk before any of the data-file changes
2861  * they describe do.
2862  *
2863  * However, this rule does not apply to unlogged relations, which will be
2864  * lost after a crash anyway. Most unlogged relation pages do not bear
2865  * LSNs since we never emit WAL records for them, and therefore flushing
2866  * up through the buffer LSN would be useless, but harmless. However,
2867  * GiST indexes use LSNs internally to track page-splits, and therefore
2868  * unlogged GiST pages bear "fake" LSNs generated by
2869  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
2870  * LSN counter could advance past the WAL insertion point; and if it did
2871  * happen, attempting to flush WAL through that location would fail, with
2872  * disastrous system-wide consequences. To make sure that can't happen,
2873  * skip the flush if the buffer isn't permanent.
2874  */
2875  if (buf_state & BM_PERMANENT)
2876  XLogFlush(recptr);
2877 
2878  /*
2879  * Now it's safe to write buffer to disk. Note that no one else should
2880  * have been able to write it while we were busy with log flushing because
2881  * only one process at a time can set the BM_IO_IN_PROGRESS bit.
2882  */
2883  bufBlock = BufHdrGetBlock(buf);
2884 
2885  /*
2886  * Update page checksum if desired. Since we have only shared lock on the
2887  * buffer, other processes might be updating hint bits in it, so we must
2888  * copy the page to private storage if we do checksumming.
2889  */
2890  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
2891 
2892  if (track_io_timing)
2893  INSTR_TIME_SET_CURRENT(io_start);
2894 
2895  /*
2896  * bufToWrite is either the shared buffer or a copy, as appropriate.
2897  */
2898  smgrwrite(reln,
2899  buf->tag.forkNum,
2900  buf->tag.blockNum,
2901  bufToWrite,
2902  false);
2903 
2904  if (track_io_timing)
2905  {
2906  INSTR_TIME_SET_CURRENT(io_time);
2907  INSTR_TIME_SUBTRACT(io_time, io_start);
2910  }
2911 
2913 
2914  /*
2915  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
2916  * end the BM_IO_IN_PROGRESS state.
2917  */
2918  TerminateBufferIO(buf, true, 0);
2919 
2920  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(buf->tag.forkNum,
2921  buf->tag.blockNum,
2922  reln->smgr_rnode.node.spcNode,
2923  reln->smgr_rnode.node.dbNode,
2924  reln->smgr_rnode.node.relNode);
2925 
2926  /* Pop the error context stack */
2927  error_context_stack = errcallback.previous;
2928 }
#define BM_PERMANENT
Definition: buf_internals.h:67
ForkNumber forkNum
Definition: buf_internals.h:94
struct timeval instr_time
Definition: instr_time.h:150
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1503
void(* callback)(void *arg)
Definition: elog.h:247
struct ErrorContextCallback * previous
Definition: elog.h:246
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2863
ErrorContextCallback * error_context_stack
Definition: elog.c:93
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4398
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:158
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4449
#define InvalidBackendId
Definition: backendid.h:23
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
instr_time blk_write_time
Definition: instrument.h:37
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:1062
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
BufferTag tag
#define UnlockBufHdr(desc, s)
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:61
int64 shared_blks_written
Definition: instrument.h:29
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4526
bool track_io_timing
Definition: bufmgr.c:135
Pointer Page
Definition: bufpage.h:78
BufferUsage pgBufferUsage
Definition: instrument.c:20
void * Block
Definition: bufmgr.h:24

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 3709 of file bufmgr.c.

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by dbase_redo().

3710 {
3711  int i;
3712  BufferDesc *bufHdr;
3713 
3714  /* Make sure we can handle the pin inside the loop */
3716 
3717  for (i = 0; i < NBuffers; i++)
3718  {
3719  uint32 buf_state;
3720 
3721  bufHdr = GetBufferDescriptor(i);
3722 
3723  /*
3724  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3725  * and saves some cycles.
3726  */
3727  if (bufHdr->tag.rnode.dbNode != dbid)
3728  continue;
3729 
3731 
3732  buf_state = LockBufHdr(bufHdr);
3733  if (bufHdr->tag.rnode.dbNode == dbid &&
3734  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3735  {
3736  PinBuffer_Locked(bufHdr);
3738  FlushBuffer(bufHdr, NULL);
3740  UnpinBuffer(bufHdr, true);
3741  }
3742  else
3743  UnlockBufHdr(bufHdr, buf_state);
3744  }
3745 }
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2812
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1825
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define BM_VALID
Definition: buf_internals.h:60
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1780
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 3752 of file bufmgr.c.

References Assert, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor, and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

3753 {
3754  BufferDesc *bufHdr;
3755 
3756  /* currently not needed, but no fundamental reason not to support */
3757  Assert(!BufferIsLocal(buffer));
3758 
3759  Assert(BufferIsPinned(buffer));
3760 
3761  bufHdr = GetBufferDescriptor(buffer - 1);
3762 
3764 
3765  FlushBuffer(bufHdr, NULL);
3766 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2812
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 3516 of file bufmgr.c.

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock, ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, i, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_node, RelationGetSmgr(), RelationUsesLocalBuffers, RelFileNodeEquals, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, smgrwrite(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by heapam_relation_copy_data(), and index_copy_data().

3517 {
3518  int i;
3519  BufferDesc *bufHdr;
3520 
3521  if (RelationUsesLocalBuffers(rel))
3522  {
3523  for (i = 0; i < NLocBuffer; i++)
3524  {
3525  uint32 buf_state;
3526 
3527  bufHdr = GetLocalBufferDescriptor(i);
3528  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3529  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
3530  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3531  {
3532  ErrorContextCallback errcallback;
3533  Page localpage;
3534 
3535  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
3536 
3537  /* Setup error traceback support for ereport() */
3539  errcallback.arg = (void *) bufHdr;
3540  errcallback.previous = error_context_stack;
3541  error_context_stack = &errcallback;
3542 
3543  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
3544 
3546  bufHdr->tag.forkNum,
3547  bufHdr->tag.blockNum,
3548  localpage,
3549  false);
3550 
3551  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
3552  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
3553 
3554  /* Pop the error context stack */
3555  error_context_stack = errcallback.previous;
3556  }
3557  }
3558 
3559  return;
3560  }
3561 
3562  /* Make sure we can handle the pin inside the loop */
3564 
3565  for (i = 0; i < NBuffers; i++)
3566  {
3567  uint32 buf_state;
3568 
3569  bufHdr = GetBufferDescriptor(i);
3570 
3571  /*
3572  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3573  * and saves some cycles.
3574  */
3575  if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
3576  continue;
3577 
3579 
3580  buf_state = LockBufHdr(bufHdr);
3581  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3582  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3583  {
3584  PinBuffer_Locked(bufHdr);
3586  FlushBuffer(bufHdr, RelationGetSmgr(rel));
3588  UnpinBuffer(bufHdr, true);
3589  }
3590  else
3591  UnlockBufHdr(bufHdr, buf_state);
3592  }
3593 }
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:64
ForkNumber forkNum
Definition: buf_internals.h:94
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4545
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define GetLocalBufferDescriptor(id)
#define BM_DIRTY
Definition: buf_internals.h:59
void(* callback)(void *arg)
Definition: elog.h:247
struct ErrorContextCallback * previous
Definition: elog.h:246
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2812
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
ErrorContextCallback * error_context_stack
Definition: elog.c:93
int NLocBuffer
Definition: localbuf.c:41
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1825
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define BM_VALID
Definition: buf_internals.h:60
RelFileNode rd_node
Definition: rel.h:56
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:544
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1780
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1532
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:610
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:277
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
pg_atomic_uint32 state
Pointer Page
Definition: bufpage.h:78
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 3605 of file bufmgr.c.

References Assert, BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, RelFileNodeBackend::node, palloc(), pfree(), pg_qsort(), PinBuffer_Locked(), RelFileNodeBackendIsTemp, RelFileNodeEquals, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, SMgrSortArray::rnode, rnode_comparator(), SMgrRelationData::smgr_rnode, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by smgrdosyncall().

3606 {
3607  int i;
3608  SMgrSortArray *srels;
3609  bool use_bsearch;
3610 
3611  if (nrels == 0)
3612  return;
3613 
3614  /* fill-in array for qsort */
3615  srels = palloc(sizeof(SMgrSortArray) * nrels);
3616 
3617  for (i = 0; i < nrels; i++)
3618  {
3619  Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
3620 
3621  srels[i].rnode = smgrs[i]->smgr_rnode.node;
3622  srels[i].srel = smgrs[i];
3623  }
3624 
3625  /*
3626  * Save the bsearch overhead for low number of relations to sync. See
3627  * DropRelFileNodesAllBuffers for details.
3628  */
3629  use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
3630 
3631  /* sort the list of SMgrRelations if necessary */
3632  if (use_bsearch)
3633  pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
3634 
3635  /* Make sure we can handle the pin inside the loop */
3637 
3638  for (i = 0; i < NBuffers; i++)
3639  {
3640  SMgrSortArray *srelent = NULL;
3641  BufferDesc *bufHdr = GetBufferDescriptor(i);
3642  uint32 buf_state;
3643 
3644  /*
3645  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3646  * and saves some cycles.
3647  */
3648 
3649  if (!use_bsearch)
3650  {
3651  int j;
3652 
3653  for (j = 0; j < nrels; j++)
3654  {
3655  if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
3656  {
3657  srelent = &srels[j];
3658  break;
3659  }
3660  }
3661 
3662  }
3663  else
3664  {
3665  srelent = bsearch((const void *) &(bufHdr->tag.rnode),
3666  srels, nrels, sizeof(SMgrSortArray),
3668  }
3669 
3670  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3671  if (srelent == NULL)
3672  continue;
3673 
3675 
3676  buf_state = LockBufHdr(bufHdr);
3677  if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
3678  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3679  {
3680  PinBuffer_Locked(bufHdr);
3682  FlushBuffer(bufHdr, srelent->srel);
3684  UnpinBuffer(bufHdr, true);
3685  }
3686  else
3687  UnlockBufHdr(bufHdr, buf_state);
3688  }
3689 
3690  pfree(srels);
3691 }
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:71
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2812
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
void pfree(void *pointer)
Definition: mcxt.c:1169
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
SMgrRelation srel
Definition: bufmgr.c:128
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1825
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4564
#define BM_VALID
Definition: buf_internals.h:60
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define Assert(condition)
Definition: c.h:804
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1780
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
RelFileNode rnode
Definition: bufmgr.c:127
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 410 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, and REFCOUNT_ARRAY_ENTRIES.

Referenced by UnpinBuffer().

411 {
412  Assert(ref->refcount == 0);
413 
414  if (ref >= &PrivateRefCountArray[0] &&
416  {
417  ref->buffer = InvalidBuffer;
418 
419  /*
420  * Mark the just used entry as reserved - in many scenarios that
421  * allows us to avoid ever having to search the array/hash for free
422  * entries.
423  */
424  ReservedRefCountEntry = ref;
425  }
426  else
427  {
428  bool found;
429  Buffer buffer = ref->buffer;
430 
432  (void *) &buffer,
433  HASH_REMOVE,
434  &found);
435  Assert(found);
438  }
439 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int Buffer
Definition: buf.h:23

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 387 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsValid, GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), PrintBufferLeakWarning(), and ReadRecentBuffer().

388 {
390 
391  Assert(BufferIsValid(buffer));
392  Assert(!BufferIsLocal(buffer));
393 
394  /*
395  * Not moving the entry - that's ok for the current users, but we might
396  * want to change this one day.
397  */
398  ref = GetPrivateRefCountEntry(buffer, false);
399 
400  if (ref == NULL)
401  return 0;
402  return ref->refcount;
403 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 307 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid, free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBuffer().

308 {
310  int i;
311 
312  Assert(BufferIsValid(buffer));
313  Assert(!BufferIsLocal(buffer));
314 
315  /*
316  * First search for references in the array, that'll be sufficient in the
317  * majority of cases.
318  */
319  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
320  {
321  res = &PrivateRefCountArray[i];
322 
323  if (res->buffer == buffer)
324  return res;
325  }
326 
327  /*
328  * By here we know that the buffer, if already pinned, isn't residing in
329  * the array.
330  *
331  * Only look up the buffer in the hashtable if we've previously overflowed
332  * into it.
333  */
334  if (PrivateRefCountOverflowed == 0)
335  return NULL;
336 
338  (void *) &buffer,
339  HASH_FIND,
340  NULL);
341 
342  if (res == NULL)
343  return NULL;
344  else if (!do_move)
345  {
346  /* caller doesn't want us to move the hash entry into the array */
347  return res;
348  }
349  else
350  {
351  /* move buffer from hashtable into the free array slot */
352  bool found;
354 
355  /* Ensure there's a free array slot */
357 
358  /* Use up the reserved slot */
359  Assert(ReservedRefCountEntry != NULL);
360  free = ReservedRefCountEntry;
361  ReservedRefCountEntry = NULL;
362  Assert(free->buffer == InvalidBuffer);
363 
364  /* and fill it */
365  free->buffer = buffer;
366  free->refcount = res->refcount;
367 
368  /* delete from hashtable */
370  (void *) &buffer,
371  HASH_REMOVE,
372  &found);
373  Assert(found);
376 
377  return free;
378  }
379 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define free(a)
Definition: header.h:65
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int i
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 4219 of file bufmgr.c.

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and RecoveryConflictInterrupt().

4220 {
4221  int bufid = GetStartupBufferPinWaitBufId();
4222 
4223  /*
4224  * If we get woken slowly then it's possible that the Startup process was
4225  * already woken by other backends before we got here. Also possible that
4226  * we get here by multiple interrupts or interrupts at inappropriate
4227  * times, so make sure we do nothing if the bufid is not set.
4228  */
4229  if (bufid < 0)
4230  return false;
4231 
4232  if (GetPrivateRefCount(bufid + 1) > 0)
4233  return true;
4234 
4235  return false;
4236 }
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:662

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 3810 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlargeBuffers(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

3811 {
3812  Assert(BufferIsPinned(buffer));
3814  if (BufferIsLocal(buffer))
3815  LocalRefCount[-buffer - 1]++;
3816  else
3817  {
3818  PrivateRefCountEntry *ref;
3819 
3820  ref = GetPrivateRefCountEntry(buffer, true);
3821  Assert(ref != NULL);
3822  ref->refcount++;
3823  }
3825 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
int32 * LocalRefCount
Definition: localbuf.c:45

◆ InitBufferPoolAccess()

void InitBufferPoolAccess ( void  )

Definition at line 2592 of file bufmgr.c.

References HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, and PrivateRefCountArray.

Referenced by BaseInit().

2593 {
2594  HASHCTL hash_ctl;
2595 
2596  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
2597 
2598  hash_ctl.keysize = sizeof(int32);
2599  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
2600 
2601  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
2602  HASH_ELEM | HASH_BLOBS);
2603 }
struct PrivateRefCountEntry PrivateRefCountEntry
#define HASH_ELEM
Definition: hsearch.h:95
Size entrysize
Definition: hsearch.h:76
signed int int32
Definition: c.h:429
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:349
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
#define HASH_BLOBS
Definition: hsearch.h:97
Size keysize
Definition: hsearch.h:75
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198

◆ InitBufferPoolBackend()

void InitBufferPoolBackend ( void  )

Definition at line 2615 of file bufmgr.c.

References AtProcExit_Buffers(), and on_shmem_exit().

Referenced by AuxiliaryProcessMain(), and InitPostgres().

2616 {
2618 }
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:2625

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1458 of file bufmgr.c.

References Assert, BM_LOCKED, BM_TAG_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer, BUFFERTAGS_EQUAL, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), CLEAR_BUFFERTAG, elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr, and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), and FindAndDropRelFileNodeBuffers().

1459 {
1460  BufferTag oldTag;
1461  uint32 oldHash; /* hash value for oldTag */
1462  LWLock *oldPartitionLock; /* buffer partition lock for it */
1463  uint32 oldFlags;
1464  uint32 buf_state;
1465 
1466  /* Save the original buffer tag before dropping the spinlock */
1467  oldTag = buf->tag;
1468 
1469  buf_state = pg_atomic_read_u32(&buf->state);
1470  Assert(buf_state & BM_LOCKED);
1471  UnlockBufHdr(buf, buf_state);
1472 
1473  /*
1474  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1475  * worth storing the hashcode in BufferDesc so we need not recompute it
1476  * here? Probably not.
1477  */
1478  oldHash = BufTableHashCode(&oldTag);
1479  oldPartitionLock = BufMappingPartitionLock(oldHash);
1480 
1481 retry:
1482 
1483  /*
1484  * Acquire exclusive mapping lock in preparation for changing the buffer's
1485  * association.
1486  */
1487  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1488 
1489  /* Re-lock the buffer header */
1490  buf_state = LockBufHdr(buf);
1491 
1492  /* If it's changed while we were waiting for lock, do nothing */
1493  if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
1494  {
1495  UnlockBufHdr(buf, buf_state);
1496  LWLockRelease(oldPartitionLock);
1497  return;
1498  }
1499 
1500  /*
1501  * We assume the only reason for it to be pinned is that someone else is
1502  * flushing the page out. Wait for them to finish. (This could be an
1503  * infinite loop if the refcount is messed up... it would be nice to time
1504  * out after awhile, but there seems no way to be sure how many loops may
1505  * be needed. Note that if the other guy has pinned the buffer but not
1506  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1507  * be busy-looping here.)
1508  */
1509  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1510  {
1511  UnlockBufHdr(buf, buf_state);
1512  LWLockRelease(oldPartitionLock);
1513  /* safety check: should definitely not be our *own* pin */
1515  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1516  WaitIO(buf);
1517  goto retry;
1518  }
1519 
1520  /*
1521  * Clear out the buffer's tag and flags. We must do this to ensure that
1522  * linear scans of the buffer array don't think the buffer is valid.
1523  */
1524  oldFlags = buf_state & BUF_FLAG_MASK;
1525  CLEAR_BUFFERTAG(buf->tag);
1526  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1527  UnlockBufHdr(buf, buf_state);
1528 
1529  /*
1530  * Remove the buffer from the lookup hashtable, if it was in there.
1531  */
1532  if (oldFlags & BM_TAG_VALID)
1533  BufTableDelete(&oldTag, oldHash);
1534 
1535  /*
1536  * Done with mapping lock.
1537  */
1538  LWLockRelease(oldPartitionLock);
1539 
1540  /*
1541  * Insert the buffer at the head of the list of free buffers.
1542  */
1543  StrategyFreeBuffer(buf);
1544 }
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:4355
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:364
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define ERROR
Definition: elog.h:46
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BUFFERTAGS_EQUAL(a, b)
#define BM_LOCKED
Definition: buf_internals.h:58
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define Assert(condition)
Definition: c.h:804
#define CLEAR_BUFFERTAG(a)
Definition: buf_internals.h:98
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
#define BufferDescriptorGetBuffer(bdesc)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
BufferTag tag
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 4301 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr.

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), hash_xlog_split_allocate_page(), and hashbucketcleanup().

4302 {
4303  BufferDesc *bufHdr;
4304  uint32 buf_state;
4305 
4306  Assert(BufferIsValid(buffer));
4307 
4308  if (BufferIsLocal(buffer))
4309  {
4310  /* There should be exactly one pin */
4311  if (LocalRefCount[-buffer - 1] != 1)
4312  return false;
4313  /* Nobody else to wait for */
4314  return true;
4315  }
4316 
4317  /* There should be exactly one local pin */
4318  if (GetPrivateRefCount(buffer) != 1)
4319  return false;
4320 
4321  bufHdr = GetBufferDescriptor(buffer - 1);
4322 
4323  /* caller must hold exclusive lock on buffer */
4325  LW_EXCLUSIVE));
4326 
4327  buf_state = LockBufHdr(bufHdr);
4328 
4329  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4330  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4331  {
4332  /* pincount is OK. */
4333  UnlockBufHdr(bufHdr, buf_state);
4334  return true;
4335  }
4336 
4337  UnlockBufHdr(bufHdr, buf_state);
4338  return false;
4339 }
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1937
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext context)

Definition at line 4779 of file bufmgr.c.

References buftag::blockNum, cur, buftag::forkNum, i, InvalidBackendId, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, RelFileNodeEquals, buftag::rnode, smgropen(), smgrwriteback(), and PendingWriteback::tag.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

4780 {
4781  int i;
4782 
4783  if (context->nr_pending == 0)
4784  return;
4785 
4786  /*
4787  * Executing the writes in-order can make them a lot faster, and allows to
4788  * merge writeback requests to consecutive blocks into larger writebacks.
4789  */
4790  sort_pending_writebacks(context->pending_writebacks, context->nr_pending);
4791 
4792  /*
4793  * Coalesce neighbouring writes, but nothing else. For that we iterate
4794  * through the, now sorted, array of pending flushes, and look forward to
4795  * find all neighbouring (or identical) writes.
4796  */
4797  for (i = 0; i < context->nr_pending; i++)
4798  {
4801  SMgrRelation reln;
4802  int ahead;
4803  BufferTag tag;
4804  Size nblocks = 1;
4805 
4806  cur = &context->pending_writebacks[i];
4807  tag = cur->tag;
4808 
4809  /*
4810  * Peek ahead, into following writeback requests, to see if they can
4811  * be combined with the current one.
4812  */
4813  for (ahead = 0; i + ahead + 1 < context->nr_pending; ahead++)
4814  {
4815  next = &context->pending_writebacks[i + ahead + 1];
4816 
4817  /* different file, stop */
4818  if (!RelFileNodeEquals(cur->tag.rnode, next->tag.rnode) ||
4819  cur->tag.forkNum != next->tag.forkNum)
4820  break;
4821 
4822  /* ok, block queued twice, skip */
4823  if (cur->tag.blockNum == next->tag.blockNum)
4824  continue;
4825 
4826  /* only merge consecutive writes */
4827  if (cur->tag.blockNum + 1 != next->tag.blockNum)
4828  break;
4829 
4830  nblocks++;
4831  cur = next;
4832  }
4833 
4834  i += ahead;
4835 
4836  /* and finally tell the kernel to write the data to storage */
4837  reln = smgropen(tag.rnode, InvalidBackendId);
4838  smgrwriteback(reln, tag.forkNum, tag.blockNum, nblocks);
4839  }
4840 
4841  context->nr_pending = 0;
4842 }
static int32 next
Definition: blutils.c:219
ForkNumber forkNum
Definition: buf_internals.h:94
struct cursor * cur
Definition: ecpg.c:28
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:536
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
#define InvalidBackendId
Definition: backendid.h:23
size_t Size
Definition: c.h:540
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
int i
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 4545 of file bufmgr.c.

References buftag::blockNum, errcontext, buftag::forkNum, MyBackendId, pfree(), relpathbackend, buftag::rnode, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

4546 {
4547  BufferDesc *bufHdr = (BufferDesc *) arg;
4548 
4549  if (bufHdr != NULL)
4550  {
4551  char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId,
4552  bufHdr->tag.forkNum);
4553 
4554  errcontext("writing block %u of relation %s",
4555  bufHdr->tag.blockNum, path);
4556  pfree(path);
4557  }
4558 }
BackendId MyBackendId
Definition: globals.c:84
ForkNumber forkNum
Definition: buf_internals.h:94
void pfree(void *pointer)
Definition: mcxt.c:1169
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define errcontext
Definition: elog.h:204
void * arg
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:78

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 4011 of file bufmgr.c.

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), and LWLockRelease().

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_getnewbuf(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), brinbuild(), brinbuildempty(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), checkXLogConsistency(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), fill_seq_with_data(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbuildempty(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgetpage(), heapgettup(), initBloomState(), lazy_scan_heap(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

4012 {
4013  BufferDesc *buf;
4014 
4015  Assert(BufferIsPinned(buffer));
4016  if (BufferIsLocal(buffer))
4017  return; /* local buffers need no lock */
4018 
4019  buf = GetBufferDescriptor(buffer - 1);
4020 
4021  if (mode == BUFFER_LOCK_UNLOCK)
4023  else if (mode == BUFFER_LOCK_SHARE)
4025  else if (mode == BUFFER_LOCK_EXCLUSIVE)
4027  else
4028  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
4029 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
static PgChecksumMode mode
Definition: pg_checksums.c:65
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define ERROR
Definition: elog.h:46
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
#define elog(elevel,...)
Definition: elog.h:232
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 4068 of file bufmgr.c.

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, DeadlockTimeout, elog, ERROR, get_ps_display(), GetBufferDescriptor, GetCurrentTimestamp(), GetPrivateRefCount(), InHotStandby, LocalRefCount, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcPid, now(), palloc(), pfree(), PG_WAIT_BUFFER_PIN, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr, update_process_title, and BufferDesc::wait_backend_pid.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), ReadBuffer_common(), and XLogReadBufferForRedoExtended().

4069 {
4070  BufferDesc *bufHdr;
4071  char *new_status = NULL;
4072  TimestampTz waitStart = 0;
4073  bool logged_recovery_conflict = false;
4074 
4075  Assert(BufferIsPinned(buffer));
4076  Assert(PinCountWaitBuf == NULL);
4077 
4078  if (BufferIsLocal(buffer))
4079  {
4080  /* There should be exactly one pin */
4081  if (LocalRefCount[-buffer - 1] != 1)
4082  elog(ERROR, "incorrect local pin count: %d",
4083  LocalRefCount[-buffer - 1]);
4084  /* Nobody else to wait for */
4085  return;
4086  }
4087 
4088  /* There should be exactly one local pin */
4089  if (GetPrivateRefCount(buffer) != 1)
4090  elog(ERROR, "incorrect local pin count: %d",
4091  GetPrivateRefCount(buffer));
4092 
4093  bufHdr = GetBufferDescriptor(buffer - 1);
4094 
4095  for (;;)
4096  {
4097  uint32 buf_state;
4098 
4099  /* Try to acquire lock */
4101  buf_state = LockBufHdr(bufHdr);
4102 
4103  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4104  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4105  {
4106  /* Successfully acquired exclusive lock with pincount 1 */
4107  UnlockBufHdr(bufHdr, buf_state);
4108 
4109  /*
4110  * Emit the log message if recovery conflict on buffer pin was
4111  * resolved but the startup process waited longer than
4112  * deadlock_timeout for it.
4113  */
4114  if (logged_recovery_conflict)
4116  waitStart, GetCurrentTimestamp(),
4117  NULL, false);
4118 
4119  /* Report change to non-waiting status */
4120  if (new_status)
4121  {
4122  set_ps_display(new_status);
4123  pfree(new_status);
4124  }
4125  return;
4126  }
4127  /* Failed, so mark myself as waiting for pincount 1 */
4128  if (buf_state & BM_PIN_COUNT_WAITER)
4129  {
4130  UnlockBufHdr(bufHdr, buf_state);
4131  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4132  elog(ERROR, "multiple backends attempting to wait for pincount 1");
4133  }
4134  bufHdr->wait_backend_pid = MyProcPid;
4135  PinCountWaitBuf = bufHdr;
4136  buf_state |= BM_PIN_COUNT_WAITER;
4137  UnlockBufHdr(bufHdr, buf_state);
4138  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4139 
4140  /* Wait to be signaled by UnpinBuffer() */
4141  if (InHotStandby)
4142  {
4143  /* Report change to waiting status */
4144  if (update_process_title && new_status == NULL)
4145  {
4146  const char *old_status;
4147  int len;
4148 
4149  old_status = get_ps_display(&len);
4150  new_status = (char *) palloc(len + 8 + 1);
4151  memcpy(new_status, old_status, len);
4152  strcpy(new_status + len, " waiting");
4153  set_ps_display(new_status);
4154  new_status[len] = '\0'; /* truncate off " waiting" */
4155  }
4156 
4157  /*
4158  * Emit the log message if the startup process is waiting longer
4159  * than deadlock_timeout for recovery conflict on buffer pin.
4160  *
4161  * Skip this if first time through because the startup process has
4162  * not started waiting yet in this case. So, the wait start
4163  * timestamp is set after this logic.
4164  */
4165  if (waitStart != 0 && !logged_recovery_conflict)
4166  {
4168 
4169  if (TimestampDifferenceExceeds(waitStart, now,
4170  DeadlockTimeout))
4171  {
4173  waitStart, now, NULL, true);
4174  logged_recovery_conflict = true;
4175  }
4176  }
4177 
4178  /*
4179  * Set the wait start timestamp if logging is enabled and first
4180  * time through.
4181  */
4182  if (log_recovery_conflict_waits && waitStart == 0)
4183  waitStart = GetCurrentTimestamp();
4184 
4185  /* Publish the bufid that Startup process waits on */
4186  SetStartupBufferPinWaitBufId(buffer - 1);
4187  /* Set alarm and then wait to be signaled by UnpinBuffer() */
4189  /* Reset the published bufid */
4191  }
4192  else
4194 
4195  /*
4196  * Remove flag marking us as waiter. Normally this will not be set
4197  * anymore, but ProcWaitForSignal() can return for other signals as
4198  * well. We take care to only reset the flag if we're the waiter, as
4199  * theoretically another backend could have started waiting. That's
4200  * impossible with the current usages due to table level locking, but
4201  * better be safe.
4202  */
4203  buf_state = LockBufHdr(bufHdr);
4204  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
4205  bufHdr->wait_backend_pid == MyProcPid)
4206  buf_state &= ~BM_PIN_COUNT_WAITER;
4207  UnlockBufHdr(bufHdr, buf_state);
4208 
4209  PinCountWaitBuf = NULL;
4210  /* Loop back and try again */
4211  }
4212 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
int MyProcPid
Definition: globals.c:43
int wait_backend_pid
bool update_process_title
Definition: ps_status.c:36
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
int64 TimestampTz
Definition: timestamp.h:39
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:249
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1711
void set_ps_display(const char *activity)
Definition: ps_status.c:349
void pfree(void *pointer)
Definition: mcxt.c:1169
#define ERROR
Definition: elog.h:46
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:753
const char * get_ps_display(int *displen)
Definition: ps_status.c:430
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:650
#define InHotStandby
Definition: xlogutils.h:57
#define GetBufferDescriptor(id)
#define PG_WAIT_BUFFER_PIN
Definition: wait_event.h:20
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
bool log_recovery_conflict_waits
Definition: standby.c:42
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1897
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4011
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:165
int DeadlockTimeout
Definition: proc.c:60
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1544
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:65

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 4591 of file bufmgr.c.

References BM_LOCKED, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), FindAndDropRelFileNodeBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadBuffer_common(), ReadRecentBuffer(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBuffer(), and WaitIO().

4592 {
4593  SpinDelayStatus delayStatus;
4594  uint32 old_buf_state;
4595 
4596  init_local_spin_delay(&delayStatus);
4597 
4598  while (true)
4599  {
4600  /* set BM_LOCKED flag */
4601  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
4602  /* if it wasn't set before we're OK */
4603  if (!(old_buf_state & BM_LOCKED))
4604  break;
4605  perform_spin_delay(&delayStatus);
4606  }
4607  finish_spin_delay(&delayStatus);
4608  return old_buf_state | BM_LOCKED;
4609 }
#define init_local_spin_delay(status)
Definition: s_lock.h:1043
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:174
unsigned int uint32
Definition: c.h:441
#define BM_LOCKED
Definition: buf_internals.h:58
pg_atomic_uint32 state
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:372
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:124

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 1556 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, BufferIsValid, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newroot(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune(), heap_xlog_update(), heap_xlog_vacuum(), heap_xlog_visible(), lazy_scan_heap(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

1557 {
1558  BufferDesc *bufHdr;
1559  uint32 buf_state;
1560  uint32 old_buf_state;
1561 
1562  if (!BufferIsValid(buffer))
1563  elog(ERROR, "bad buffer ID: %d", buffer);
1564 
1565  if (BufferIsLocal(buffer))
1566  {
1567  MarkLocalBufferDirty(buffer);
1568  return;
1569  }
1570 
1571  bufHdr = GetBufferDescriptor(buffer - 1);
1572 
1573  Assert(BufferIsPinned(buffer));
1575  LW_EXCLUSIVE));
1576 
1577  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
1578  for (;;)
1579  {
1580  if (old_buf_state & BM_LOCKED)
1581  old_buf_state = WaitBufHdrUnlocked(bufHdr);
1582 
1583  buf_state = old_buf_state;
1584 
1585  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1586  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
1587 
1588  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
1589  buf_state))
1590  break;
1591  }
1592 
1593  /*
1594  * If the buffer was not dirty already, do vacuum accounting.
1595  */
1596  if (!(old_buf_state & BM_DIRTY))
1597  {
1598  VacuumPageDirty++;
1600  if (VacuumCostActive)
1602  }
1603 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1937
int VacuumCostBalance
Definition: globals.c:151
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
int64 VacuumPageDirty
Definition: globals.c:149
int64 shared_blks_dirtied
Definition: instrument.h:28
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:143
#define ERROR
Definition: elog.h:46
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
#define BM_LOCKED
Definition: buf_internals.h:58
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4619
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:152
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 3842 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferGetPage, BufferIsLocal, BufferIsValid, PGPROC::delayChkpt, elog, ERROR, GetBufferDescriptor, GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN, pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileNodeSkippingWAL(), buftag::rnode, BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

3843 {
3844  BufferDesc *bufHdr;
3845  Page page = BufferGetPage(buffer);
3846 
3847  if (!BufferIsValid(buffer))
3848  elog(ERROR, "bad buffer ID: %d", buffer);
3849 
3850  if (BufferIsLocal(buffer))
3851  {
3852  MarkLocalBufferDirty(buffer);
3853  return;
3854  }
3855 
3856  bufHdr = GetBufferDescriptor(buffer - 1);
3857 
3858  Assert(GetPrivateRefCount(buffer) > 0);
3859  /* here, either share or exclusive lock is OK */
3861 
3862  /*
3863  * This routine might get called many times on the same page, if we are
3864  * making the first scan after commit of an xact that added/deleted many
3865  * tuples. So, be as quick as we can if the buffer is already dirty. We
3866  * do this by not acquiring spinlock if it looks like the status bits are
3867  * already set. Since we make this test unlocked, there's a chance we
3868  * might fail to notice that the flags have just been cleared, and failed
3869  * to reset them, due to memory-ordering issues. But since this function
3870  * is only intended to be used in cases where failing to write out the
3871  * data would be harmless anyway, it doesn't really matter.
3872  */
3873  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
3875  {
3877  bool dirtied = false;
3878  bool delayChkpt = false;
3879  uint32 buf_state;
3880 
3881  /*
3882  * If we need to protect hint bit updates from torn writes, WAL-log a
3883  * full page image of the page. This full page image is only necessary
3884  * if the hint bit update is the first change to the page since the
3885  * last checkpoint.
3886  *
3887  * We don't check full_page_writes here because that logic is included
3888  * when we call XLogInsert() since the value changes dynamically.
3889  */
3890  if (XLogHintBitIsNeeded() &&
3891  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
3892  {
3893  /*
3894  * If we must not write WAL, due to a relfilenode-specific
3895  * condition or being in recovery, don't dirty the page. We can
3896  * set the hint, just not dirty the page as a result so the hint
3897  * is lost when we evict the page or shutdown.
3898  *
3899  * See src/backend/storage/page/README for longer discussion.
3900  */
3901  if (RecoveryInProgress() ||
3902  RelFileNodeSkippingWAL(bufHdr->tag.rnode))
3903  return;
3904 
3905  /*
3906  * If the block is already dirty because we either made a change
3907  * or set a hint already, then we don't need to write a full page
3908  * image. Note that aggressive cleaning of blocks dirtied by hint
3909  * bit setting would increase the call rate. Bulk setting of hint
3910  * bits would reduce the call rate...
3911  *
3912  * We must issue the WAL record before we mark the buffer dirty.
3913  * Otherwise we might write the page before we write the WAL. That
3914  * causes a race condition, since a checkpoint might occur between
3915  * writing the WAL record and marking the buffer dirty. We solve
3916  * that with a kluge, but one that is already in use during
3917  * transaction commit to prevent race conditions. Basically, we
3918  * simply prevent the checkpoint WAL record from being written
3919  * until we have marked the buffer dirty. We don't start the
3920  * checkpoint flush until we have marked dirty, so our checkpoint
3921  * must flush the change to disk successfully or the checkpoint
3922  * never gets written, so crash recovery will fix.
3923  *
3924  * It's possible we may enter here without an xid, so it is
3925  * essential that CreateCheckpoint waits for virtual transactions
3926  * rather than full transactionids.
3927  */
3928  MyProc->delayChkpt = delayChkpt = true;
3929  lsn = XLogSaveBufferForHint(buffer, buffer_std);
3930  }
3931 
3932  buf_state = LockBufHdr(bufHdr);
3933 
3934  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3935 
3936  if (!(buf_state & BM_DIRTY))
3937  {
3938  dirtied = true; /* Means "will be dirtied by this action" */
3939 
3940  /*
3941  * Set the page LSN if we wrote a backup block. We aren't supposed
3942  * to set this when only holding a share lock but as long as we
3943  * serialise it somehow we're OK. We choose to set LSN while
3944  * holding the buffer header lock, which causes any reader of an
3945  * LSN who holds only a share lock to also obtain a buffer header
3946  * lock before using PageGetLSN(), which is enforced in
3947  * BufferGetLSNAtomic().
3948  *
3949  * If checksums are enabled, you might think we should reset the
3950  * checksum here. That will happen when the page is written
3951  * sometime later in this checkpoint cycle.
3952  */
3953  if (!XLogRecPtrIsInvalid(lsn))
3954  PageSetLSN(page, lsn);
3955  }
3956 
3957  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
3958  UnlockBufHdr(bufHdr, buf_state);
3959 
3960  if (delayChkpt)
3961  MyProc->delayChkpt = false;
3962 
3963  if (dirtied)
3964  {
3965  VacuumPageDirty++;
3967  if (VacuumCostActive)
3969  }
3970  }
3971 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define BM_PERMANENT
Definition: buf_internals.h:67
int VacuumCostBalance
Definition: globals.c:151
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
PGPROC * MyProc
Definition: proc.c:68
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:970
int64 VacuumPageDirty
Definition: globals.c:149
int64 shared_blks_dirtied
Definition: instrument.h:28
bool RecoveryInProgress(void)
Definition: xlog.c:8226
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:143
#define ERROR
Definition: elog.h:46
bool delayChkpt
Definition: proc.h:187
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4591
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
RelFileNode rnode
Definition: buf_internals.h:93
bool RelFileNodeSkippingWAL(RelFileNode rnode)
Definition: storage.c:513
BufferTag tag
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define XLogHintBitIsNeeded()
Definition: xlog.h:177
Pointer Page
Definition: bufpage.h:78
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:152
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 281 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, PrivateRefCountEntry::refcount, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

282 {
284 
285  /* only allowed to be called when a reservation has been made */
286  Assert(ReservedRefCountEntry != NULL);
287 
288  /* use up the reserved entry */
289  res = ReservedRefCountEntry;
290  ReservedRefCountEntry = NULL;
291 
292  /* and fill it */
293  res->buffer = buffer;
294  res->refcount = 0;
295 
296  return res;
297 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define Assert(condition)
Definition: c.h:804

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 1677 of file bufmgr.c.

References Assert, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservePrivateRefCountEntry(), ResourceOwnerRememberBuffer(), BufferDesc::state, VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), and ReadRecentBuffer().

1678 {
1680  bool result;
1681  PrivateRefCountEntry *ref;
1682 
1683  ref = GetPrivateRefCountEntry(b, true);
1684 
1685  if (ref == NULL)
1686  {
1687  uint32 buf_state;
1688  uint32 old_buf_state;
1689 
1691  ref = NewPrivateRefCountEntry(b);
1692 
1693  old_buf_state = pg_atomic_read_u32(&buf->state);
1694  for (;;)
1695  {
1696  if (old_buf_state & BM_LOCKED)
1697  old_buf_state = WaitBufHdrUnlocked(buf);
1698 
1699  buf_state = old_buf_state;
1700 
1701  /* increase refcount */
1702  buf_state += BUF_REFCOUNT_ONE;
1703 
1704  if (strategy == NULL)
1705  {
1706  /* Default case: increase usagecount unless already max. */
1708  buf_state += BUF_USAGECOUNT_ONE;
1709  }
1710  else
1711  {
1712  /*
1713  * Ring buffers shouldn't evict others from pool. Thus we
1714  * don't make usagecount more than 1.
1715  */
1716  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
1717  buf_state += BUF_USAGECOUNT_ONE;
1718  }
1719 
1720  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
1721  buf_state))
1722  {
1723  result = (buf_state & BM_VALID) != 0;
1724 
1725  /*
1726  * Assume that we acquired a buffer pin for the purposes of
1727  * Valgrind buffer client checks (even in !result case) to
1728  * keep things simple. Buffers that are unsafe to access are
1729  * not generally guaranteed to be marked undefined or
1730  * non-accessible in any case.
1731  */
1733  break;
1734  }
1735  }
1736  }
1737  else
1738  {
1739  /*
1740  * If we previously pinned the buffer, it must surely be valid.
1741  *
1742  * Note: We deliberately avoid a Valgrind client request here.
1743  * Individual access methods can optionally superimpose buffer page
1744  * client requests on top of our client requests to enforce that
1745  * buffers are only accessed while locked (and pinned). It's possible
1746  * that the buffer page is legitimately non-accessible here. We
1747  * cannot meddle with that.
1748  */
1749  result = true;
1750  }
1751 
1752  ref->refcount++;
1753  Assert(ref->refcount > 0);
1755  return result;
1756 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:281
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
unsigned int uint32
Definition: c.h:441
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define BM_VALID
Definition: buf_internals.h:60
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
int result
Definition: header.h:19
#define Assert(condition)
Definition: c.h:804
#define BufferDescriptorGetBuffer(bdesc)
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4619
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:77
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
pg_atomic_uint32 state
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:50
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 1780 of file bufmgr.c.

References Assert, BM_LOCKED, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), BufferDesc::state, UnlockBufHdr, and VALGRIND_MAKE_MEM_DEFINED.

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), ReadRecentBuffer(), and SyncOneBuffer().

1781 {
1782  Buffer b;
1783  PrivateRefCountEntry *ref;
1784  uint32 buf_state;
1785 
1786  /*
1787  * As explained, We don't expect any preexisting pins. That allows us to
1788  * manipulate the PrivateRefCount after releasing the spinlock
1789  */
1791 
1792  /*
1793  * Buffer can't have a preexisting pin, so mark its page as defined to
1794  * Valgrind (this is similar to the PinBuffer() case where the backend
1795  * doesn't already have a buffer pin)
1796  */
1798 
1799  /*
1800  * Since we hold the buffer spinlock, we can update the buffer state and
1801  * release the lock in one operation.
1802  */
1803  buf_state = pg_atomic_read_u32(&buf->state);
1804  Assert(buf_state & BM_LOCKED);
1805  buf_state += BUF_REFCOUNT_ONE;
1806  UnlockBufHdr(buf, buf_state);
1807 
1808  b = BufferDescriptorGetBuffer(buf);
1809 
1810  ref = NewPrivateRefCountEntry(b);
1811  ref->refcount++;
1812 
1814 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:281
unsigned int uint32
Definition: c.h:441
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
#define Assert(condition)
Definition: c.h:804
#define BufferDescriptorGetBuffer(bdesc)
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 587 of file bufmgr.c.

References Assert, BlockNumberIsValid, ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by acquire_sample_rows(), BitmapPrefetch(), count_nondeletable_pages(), HeapTupleHeaderAdvanceLatestRemovedXid(), and pg_prewarm().

588 {
589  Assert(RelationIsValid(reln));
590  Assert(BlockNumberIsValid(blockNum));
591 
592  if (RelationUsesLocalBuffers(reln))
593  {
594  /* see comments in ReadBufferExtended */
595  if (RELATION_IS_OTHER_TEMP(reln))
596  ereport(ERROR,
597  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
598  errmsg("cannot access temporary tables of other sessions")));
599 
600  /* pass it off to localbuf.c */
601  return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
602  }
603  else
604  {
605  /* pass it to the shared buffer version */
606  return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
607  }
608 }
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:64
int errcode(int sqlerrcode)
Definition: elog.c:698
#define ERROR
Definition: elog.h:46
#define RelationIsValid(relation)
Definition: rel.h:450
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:500
#define ereport(elevel,...)
Definition: elog.h:157
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define Assert(condition)
Definition: c.h:804
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:631
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:544
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:610
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 500 of file bufmgr.c.

References Assert, BlockNumberIsValid, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), INIT_BUFFERTAG, PrefetchBufferResult::initiated_io, InvalidBuffer, LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeBackend::node, PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rnode, and smgrprefetch().

Referenced by PrefetchBuffer().

503 {
505  BufferTag newTag; /* identity of requested block */
506  uint32 newHash; /* hash value for newTag */
507  LWLock *newPartitionLock; /* buffer partition lock for it */
508  int buf_id;
509 
510  Assert(BlockNumberIsValid(blockNum));
511 
512  /* create a tag so we can lookup the buffer */
513  INIT_BUFFERTAG(newTag, smgr_reln->smgr_rnode.node,
514  forkNum, blockNum);
515 
516  /* determine its hash code and partition lock ID */
517  newHash = BufTableHashCode(&newTag);
518  newPartitionLock = BufMappingPartitionLock(newHash);
519 
520  /* see if the block is in the buffer pool already */
521  LWLockAcquire(newPartitionLock, LW_SHARED);
522  buf_id = BufTableLookup(&newTag, newHash);
523  LWLockRelease(newPartitionLock);
524 
525  /* If not in buffers, initiate prefetch */
526  if (buf_id < 0)
527  {
528 #ifdef USE_PREFETCH
529  /*
530  * Try to initiate an asynchronous read. This returns false in
531  * recovery if the relation file doesn't exist.
532  */
533  if (smgrprefetch(smgr_reln, forkNum, blockNum))
534  result.initiated_io = true;
535 #endif /* USE_PREFETCH */
536  }
537  else
538  {
539  /*
540  * Report the buffer it was in at that time. The caller may be able
541  * to avoid a buffer table lookup, but it's not pinned and it must be
542  * rechecked!
543  */
544  result.recent_buffer = buf_id + 1;
545  }
546 
547  /*
548  * If the block *is* in buffers, we do nothing. This is not really ideal:
549  * the block might be just about to be evicted, which would be stupid
550  * since we know we are going to need it soon. But the only easy answer
551  * is to bump the usage_count, which does not seem like a great solution:
552  * when the caller does ultimately touch the block, usage_count would get
553  * bumped again, resulting in too much favoritism for blocks that are
554  * involved in a prefetch sequence. A real fix would involve some
555  * additional per-buffer state, and it's not clear that there's enough of
556  * a problem to justify that.
557  */
558 
559  return result;
560 }
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
#define InvalidBuffer
Definition: buf.h:25
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
Buffer recent_buffer
Definition: bufmgr.h:54
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
unsigned int uint32
Definition: c.h:441
int result
Definition: header.h:19
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:804
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:487

◆ PrintBufferLeakWarning()

void PrintBufferLeakWarning ( Buffer  buffer)

Definition at line 2685 of file bufmgr.c.

References Assert, buftag::blockNum, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BufferIsLocal, BufferIsValid, elog, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, GetPrivateRefCount(), InvalidBackendId, LocalRefCount, MyBackendId, pfree(), pg_atomic_read_u32(), relpathbackend, buftag::rnode, BufferDesc::state, BufferDesc::tag, and WARNING.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResourceOwnerReleaseInternal().

2686 {
2687  BufferDesc *buf;
2688  int32 loccount;
2689  char *path;
2690  BackendId backend;
2691  uint32 buf_state;
2692 
2693  Assert(BufferIsValid(buffer));
2694  if (BufferIsLocal(buffer))
2695  {
2696  buf = GetLocalBufferDescriptor(-buffer - 1);
2697  loccount = LocalRefCount[-buffer - 1];
2698  backend = MyBackendId;
2699  }
2700  else
2701  {
2702  buf = GetBufferDescriptor(buffer - 1);
2703  loccount = GetPrivateRefCount(buffer);
2704  backend = InvalidBackendId;
2705  }
2706 
2707  /* theoretically we should lock the bufhdr here */
2708  path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
2709  buf_state = pg_atomic_read_u32(&buf->state);
2710  elog(WARNING,
2711  "buffer refcount leak: [%03d] "
2712  "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
2713  buffer, path,
2714  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
2715  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
2716  pfree(path);
2717 }
BackendId MyBackendId
Definition: globals.c:84
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
signed int int32
Definition: c.h:429
void pfree(void *pointer)
Definition: mcxt.c:1169
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define WARNING
Definition: elog.h:40
#define InvalidBackendId
Definition: backendid.h:23
int BackendId
Definition: backendid.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:78
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 694 of file bufmgr.c.

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinbuild(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), fill_seq_with_data(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

695 {
696  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
697 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:741

◆ ReadBuffer_common()

static Buffer ReadBuffer_common ( SMgrRelation  reln,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool hit 
)
static

Definition at line 801 of file bufmgr.c.

References Assert, RelFileNodeBackend::backend, BufferUsage::blk_read_time, BM_VALID, BufferAlloc(), BufferDescriptorGetBuffer, BufferDescriptorGetContentLock, BufHdrGetBlock, CurrentResourceOwner, RelFileNode::dbNode, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), ERROR, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, BufferUsage::local_blks_written, LocalBufferAlloc(), LocalBufHdrGetBlock, LockBufferForCleanup(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), MemSet, RelFileNodeBackend::node, P_NEW, PageIsNew, PageIsVerifiedExtended(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_buffer_read_time, PIV_LOG_WARNING, PIV_REPORT_STAT, RBM_NORMAL, RBM_NORMAL_NO_LOG, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelFileNode::relNode, relpath, ResourceOwnerEnlargeBuffers(), BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, BufferUsage::shared_blks_written, SMgrRelationData::smgr_rnode, smgrextend(), SmgrIsTemp, smgrnblocks(), smgrread(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::state, TerminateBufferIO(), track_io_timing, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, VacuumPageHit, VacuumPageMiss, WARNING, and zero_damaged_pages.

Referenced by ReadBufferExtended(), and ReadBufferWithoutRelcache().

804 {
805  BufferDesc *bufHdr;
806  Block bufBlock;
807  bool found;
808  bool isExtend;
809  bool isLocalBuf = SmgrIsTemp(smgr);
810 
811  *hit = false;
812 
813  /* Make sure we will have room to remember the buffer pin */
815 
816  isExtend = (blockNum == P_NEW);
817 
818  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
819  smgr->smgr_rnode.node.spcNode,
820  smgr->smgr_rnode.node.dbNode,
821  smgr->smgr_rnode.node.relNode,
822  smgr->smgr_rnode.backend,
823  isExtend);
824 
825  /* Substitute proper block number if caller asked for P_NEW */
826  if (isExtend)
827  blockNum = smgrnblocks(smgr, forkNum);
828 
829  if (isLocalBuf)
830  {
831  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
832  if (found)
834  else if (isExtend)
836  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
839  }
840  else
841  {
842  /*
843  * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
844  * not currently in memory.
845  */
846  bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
847  strategy, &found);
848  if (found)
850  else if (isExtend)
852  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
855  }
856 
857  /* At this point we do NOT hold any locks. */
858 
859  /* if it was already in the buffer pool, we're done */
860  if (found)
861  {
862  if (!isExtend)
863  {
864  /* Just need to update stats before we exit */
865  *hit = true;
866  VacuumPageHit++;
867 
868  if (VacuumCostActive)
870 
871  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
872  smgr->smgr_rnode.node.spcNode,
873  smgr->smgr_rnode.node.dbNode,
874  smgr->smgr_rnode.node.relNode,
875  smgr->smgr_rnode.backend,
876  isExtend,
877  found);
878 
879  /*
880  * In RBM_ZERO_AND_LOCK mode the caller expects the page to be
881  * locked on return.
882  */
883  if (!isLocalBuf)
884  {
885  if (mode == RBM_ZERO_AND_LOCK)
8