PostgreSQL Source Code  git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
#include <lib/sort_template.h>
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)   LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static Buffer ReadBuffer_common (SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf, bool fixOwner)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln)
 
static void FindAndDropRelFileNodeBuffers (RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rnode_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *a, const BufferTag *b)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static void InvalidateBuffer (BufferDesc *buf)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
void PrintBufferLeakWarning (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
void BufmgrCommit (void)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelFileNodeBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelFileNodesAllBuffers (SMgrRelation *smgr_reln, int nnodes)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
void AbortBufferIO (void)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *context)
 
void TestForOldSnapshot_impl (Snapshot snapshot, Relation relation)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = 0
 
int maintenance_io_concurrency = 0
 
int checkpoint_flush_after = 0
 
int bgwriter_flush_after = 0
 
int backend_flush_after = 0
 
static BufferDescInProgressBuf = NULL
 
static bool IsForInput
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 79 of file bufmgr.c.

Referenced by DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 69 of file bufmgr.c.

Referenced by BgBufferSync(), and SyncOneBuffer().

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 68 of file bufmgr.c.

Referenced by BgBufferSync(), BufferSync(), and SyncOneBuffer().

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 61 of file bufmgr.c.

Referenced by BufferAlloc(), and FlushBuffer().

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int32 * LocalRefCount
Definition: localbuf.c:45

Definition at line 448 of file bufmgr.c.

Referenced by BufferGetBlockNumber(), BufferGetLSNAtomic(), BufferGetTag(), BufferIsPermanent(), ConditionalLockBuffer(), FlushOneBuffer(), IncrBufferRefCount(), LockBuffer(), LockBufferForCleanup(), MarkBufferDirty(), and ReleaseAndReadBuffer().

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 60 of file bufmgr.c.

Referenced by FlushBuffer(), PinBuffer(), PinBuffer_Locked(), ReadBuffer_common(), and UnpinBuffer().

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 64 of file bufmgr.c.

Referenced by FlushRelationBuffers(), and ReadBuffer_common().

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 71 of file bufmgr.c.

Referenced by DropRelFileNodesAllBuffers(), and FlushRelationsAllBuffers().

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
 
)    ckpt_buforder_comparator(a, b)

Definition at line 4762 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 4762 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 4764 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 4764 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 4761 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 4761 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 4763 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 4763 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 4760 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 4760 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

void AbortBufferIO ( void  )

Definition at line 4477 of file bufmgr.c.

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_VALID, buf, ereport, errcode(), errdetail(), errmsg(), buftag::forkNum, InProgressBuf, IsForInput, LockBufHdr(), pfree(), relpathperm, buftag::rnode, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

4478 {
4480 
4481  if (buf)
4482  {
4483  uint32 buf_state;
4484 
4485  buf_state = LockBufHdr(buf);
4486  Assert(buf_state & BM_IO_IN_PROGRESS);
4487  if (IsForInput)
4488  {
4489  Assert(!(buf_state & BM_DIRTY));
4490 
4491  /* We'd better not think buffer is valid yet */
4492  Assert(!(buf_state & BM_VALID));
4493  UnlockBufHdr(buf, buf_state);
4494  }
4495  else
4496  {
4497  Assert(buf_state & BM_DIRTY);
4498  UnlockBufHdr(buf, buf_state);
4499  /* Issue notice if this is not the first failure... */
4500  if (buf_state & BM_IO_ERROR)
4501  {
4502  /* Buffer is pinned, so we can read tag without spinlock */
4503  char *path;
4504 
4505  path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
4506  ereport(WARNING,
4507  (errcode(ERRCODE_IO_ERROR),
4508  errmsg("could not write block %u of %s",
4509  buf->tag.blockNum, path),
4510  errdetail("Multiple failures --- write error might be permanent.")));
4511  pfree(path);
4512  }
4513  }
4514  TerminateBufferIO(buf, false, BM_IO_ERROR);
4515  }
4516 }
#define relpathperm(rnode, forknum)
Definition: relpath.h:83
ForkNumber forkNum
Definition: buf_internals.h:94
int errcode(int sqlerrcode)
Definition: elog.c:698
#define BM_DIRTY
Definition: buf_internals.h:59
static BufferDesc * InProgressBuf
Definition: bufmgr.c:161
void pfree(void *pointer)
Definition: mcxt.c:1169
static char * buf
Definition: pg_test_fsync.c:68
int errdetail(const char *fmt,...)
Definition: elog.c:1042
unsigned int uint32
Definition: c.h:441
static bool IsForInput
Definition: bufmgr.c:162
#define WARNING
Definition: elog.h:40
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4445
#define BM_VALID
Definition: buf_internals.h:60
#define ereport(elevel,...)
Definition: elog.h:157
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define Assert(condition)
Definition: c.h:804
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define UnlockBufHdr(desc, s)
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:62

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 2579 of file bufmgr.c.

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

2580 {
2582 
2583  AtEOXact_LocalBuffers(isCommit);
2584 
2586 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define Assert(condition)
Definition: c.h:804
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2640
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:577

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 2621 of file bufmgr.c.

References AbortBufferIO(), AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolAccess().

2622 {
2623  AbortBufferIO();
2624  UnlockBuffers();
2625 
2627 
2628  /* localbuf.c needs a chance too */
2630 }
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:588
void UnlockBuffers(void)
Definition: bufmgr.c:3979
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2640
void AbortBufferIO(void)
Definition: bufmgr.c:4477

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 2209 of file bufmgr.c.

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, BUF_REUSABLE, BUF_WRITTEN, CurrentResourceOwner, DEBUG1, DEBUG2, elog, PgStat_MsgBgWriter::m_buf_alloc, PgStat_MsgBgWriter::m_buf_written_clean, PgStat_MsgBgWriter::m_maxwritten_clean, NBuffers, PendingBgWriterStats, ResourceOwnerEnlargeBuffers(), StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

2210 {
2211  /* info obtained from freelist.c */
2212  int strategy_buf_id;
2213  uint32 strategy_passes;
2214  uint32 recent_alloc;
2215 
2216  /*
2217  * Information saved between calls so we can determine the strategy
2218  * point's advance rate and avoid scanning already-cleaned buffers.
2219  */
2220  static bool saved_info_valid = false;
2221  static int prev_strategy_buf_id;
2222  static uint32 prev_strategy_passes;
2223  static int next_to_clean;
2224  static uint32 next_passes;
2225 
2226  /* Moving averages of allocation rate and clean-buffer density */
2227  static float smoothed_alloc = 0;
2228  static float smoothed_density = 10.0;
2229 
2230  /* Potentially these could be tunables, but for now, not */
2231  float smoothing_samples = 16;
2232  float scan_whole_pool_milliseconds = 120000.0;
2233 
2234  /* Used to compute how far we scan ahead */
2235  long strategy_delta;
2236  int bufs_to_lap;
2237  int bufs_ahead;
2238  float scans_per_alloc;
2239  int reusable_buffers_est;
2240  int upcoming_alloc_est;
2241  int min_scan_buffers;
2242 
2243  /* Variables for the scanning loop proper */
2244  int num_to_scan;
2245  int num_written;
2246  int reusable_buffers;
2247 
2248  /* Variables for final smoothed_density update */
2249  long new_strategy_delta;
2250  uint32 new_recent_alloc;
2251 
2252  /*
2253  * Find out where the freelist clock sweep currently is, and how many
2254  * buffer allocations have happened since our last call.
2255  */
2256  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
2257 
2258  /* Report buffer alloc counts to pgstat */
2259  PendingBgWriterStats.m_buf_alloc += recent_alloc;
2260 
2261  /*
2262  * If we're not running the LRU scan, just stop after doing the stats
2263  * stuff. We mark the saved state invalid so that we can recover sanely
2264  * if LRU scan is turned back on later.
2265  */
2266  if (bgwriter_lru_maxpages <= 0)
2267  {
2268  saved_info_valid = false;
2269  return true;
2270  }
2271 
2272  /*
2273  * Compute strategy_delta = how many buffers have been scanned by the
2274  * clock sweep since last time. If first time through, assume none. Then
2275  * see if we are still ahead of the clock sweep, and if so, how many
2276  * buffers we could scan before we'd catch up with it and "lap" it. Note:
2277  * weird-looking coding of xxx_passes comparisons are to avoid bogus
2278  * behavior when the passes counts wrap around.
2279  */
2280  if (saved_info_valid)
2281  {
2282  int32 passes_delta = strategy_passes - prev_strategy_passes;
2283 
2284  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
2285  strategy_delta += (long) passes_delta * NBuffers;
2286 
2287  Assert(strategy_delta >= 0);
2288 
2289  if ((int32) (next_passes - strategy_passes) > 0)
2290  {
2291  /* we're one pass ahead of the strategy point */
2292  bufs_to_lap = strategy_buf_id - next_to_clean;
2293 #ifdef BGW_DEBUG
2294  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2295  next_passes, next_to_clean,
2296  strategy_passes, strategy_buf_id,
2297  strategy_delta, bufs_to_lap);
2298 #endif
2299  }
2300  else if (next_passes == strategy_passes &&
2301  next_to_clean >= strategy_buf_id)
2302  {
2303  /* on same pass, but ahead or at least not behind */
2304  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
2305 #ifdef BGW_DEBUG
2306  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2307  next_passes, next_to_clean,
2308  strategy_passes, strategy_buf_id,
2309  strategy_delta, bufs_to_lap);
2310 #endif
2311  }
2312  else
2313  {
2314  /*
2315  * We're behind, so skip forward to the strategy point and start
2316  * cleaning from there.
2317  */
2318 #ifdef BGW_DEBUG
2319  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
2320  next_passes, next_to_clean,
2321  strategy_passes, strategy_buf_id,
2322  strategy_delta);
2323 #endif
2324  next_to_clean = strategy_buf_id;
2325  next_passes = strategy_passes;
2326  bufs_to_lap = NBuffers;
2327  }
2328  }
2329  else
2330  {
2331  /*
2332  * Initializing at startup or after LRU scanning had been off. Always
2333  * start at the strategy point.
2334  */
2335 #ifdef BGW_DEBUG
2336  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
2337  strategy_passes, strategy_buf_id);
2338 #endif
2339  strategy_delta = 0;
2340  next_to_clean = strategy_buf_id;
2341  next_passes = strategy_passes;
2342  bufs_to_lap = NBuffers;
2343  }
2344 
2345  /* Update saved info for next time */
2346  prev_strategy_buf_id = strategy_buf_id;
2347  prev_strategy_passes = strategy_passes;
2348  saved_info_valid = true;
2349 
2350  /*
2351  * Compute how many buffers had to be scanned for each new allocation, ie,
2352  * 1/density of reusable buffers, and track a moving average of that.
2353  *
2354  * If the strategy point didn't move, we don't update the density estimate
2355  */
2356  if (strategy_delta > 0 && recent_alloc > 0)
2357  {
2358  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
2359  smoothed_density += (scans_per_alloc - smoothed_density) /
2360  smoothing_samples;
2361  }
2362 
2363  /*
2364  * Estimate how many reusable buffers there are between the current
2365  * strategy point and where we've scanned ahead to, based on the smoothed
2366  * density estimate.
2367  */
2368  bufs_ahead = NBuffers - bufs_to_lap;
2369  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
2370 
2371  /*
2372  * Track a moving average of recent buffer allocations. Here, rather than
2373  * a true average we want a fast-attack, slow-decline behavior: we
2374  * immediately follow any increase.
2375  */
2376  if (smoothed_alloc <= (float) recent_alloc)
2377  smoothed_alloc = recent_alloc;
2378  else
2379  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
2380  smoothing_samples;
2381 
2382  /* Scale the estimate by a GUC to allow more aggressive tuning. */
2383  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
2384 
2385  /*
2386  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
2387  * eventually underflow to zero, and the underflows produce annoying
2388  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
2389  * zero, there's no point in tracking smaller and smaller values of
2390  * smoothed_alloc, so just reset it to exactly zero to avoid this
2391  * syndrome. It will pop back up as soon as recent_alloc increases.
2392  */
2393  if (upcoming_alloc_est == 0)
2394  smoothed_alloc = 0;
2395 
2396  /*
2397  * Even in cases where there's been little or no buffer allocation
2398  * activity, we want to make a small amount of progress through the buffer
2399  * cache so that as many reusable buffers as possible are clean after an
2400  * idle period.
2401  *
2402  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
2403  * the BGW will be called during the scan_whole_pool time; slice the
2404  * buffer pool into that many sections.
2405  */
2406  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
2407 
2408  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
2409  {
2410 #ifdef BGW_DEBUG
2411  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
2412  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
2413 #endif
2414  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
2415  }
2416 
2417  /*
2418  * Now write out dirty reusable buffers, working forward from the
2419  * next_to_clean point, until we have lapped the strategy scan, or cleaned
2420  * enough buffers to match our estimate of the next cycle's allocation
2421  * requirements, or hit the bgwriter_lru_maxpages limit.
2422  */
2423 
2424  /* Make sure we can handle the pin inside SyncOneBuffer */
2426 
2427  num_to_scan = bufs_to_lap;
2428  num_written = 0;
2429  reusable_buffers = reusable_buffers_est;
2430 
2431  /* Execute the LRU scan */
2432  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
2433  {
2434  int sync_state = SyncOneBuffer(next_to_clean, true,
2435  wb_context);
2436 
2437  if (++next_to_clean >= NBuffers)
2438  {
2439  next_to_clean = 0;
2440  next_passes++;
2441  }
2442  num_to_scan--;
2443 
2444  if (sync_state & BUF_WRITTEN)
2445  {
2446  reusable_buffers++;
2447  if (++num_written >= bgwriter_lru_maxpages)
2448  {
2450  break;
2451  }
2452  }
2453  else if (sync_state & BUF_REUSABLE)
2454  reusable_buffers++;
2455  }
2456 
2458 
2459 #ifdef BGW_DEBUG
2460  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
2461  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
2462  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
2463  bufs_to_lap - num_to_scan,
2464  num_written,
2465  reusable_buffers - reusable_buffers_est);
2466 #endif
2467 
2468  /*
2469  * Consider the above scan as being like a new allocation scan.
2470  * Characterize its density and update the smoothed one based on it. This
2471  * effectively halves the moving average period in cases where both the
2472  * strategy and the background writer are doing some useful scanning,
2473  * which is helpful because a long memory isn't as desirable on the
2474  * density estimates.
2475  */
2476  new_strategy_delta = bufs_to_lap - num_to_scan;
2477  new_recent_alloc = reusable_buffers - reusable_buffers_est;
2478  if (new_strategy_delta > 0 && new_recent_alloc > 0)
2479  {
2480  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
2481  smoothed_density += (scans_per_alloc - smoothed_density) /
2482  smoothing_samples;
2483 
2484 #ifdef BGW_DEBUG
2485  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
2486  new_recent_alloc, new_strategy_delta,
2487  scans_per_alloc, smoothed_density);
2488 #endif
2489  }
2490 
2491  /* Return true if OK to hibernate */
2492  return (bufs_to_lap == 0 && recent_alloc == 0);
2493 }
PgStat_Counter m_buf_alloc
Definition: pgstat.h:462
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:395
#define DEBUG1
Definition: elog.h:25
int BgWriterDelay
Definition: bgwriter.c:61
PgStat_MsgBgWriter PendingBgWriterStats
Definition: pgstat.c:131
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
PgStat_Counter m_maxwritten_clean
Definition: pgstat.h:461
PgStat_Counter m_buf_written_clean
Definition: pgstat.h:460
double bgwriter_lru_multiplier
Definition: bufmgr.c:134
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2512
signed int int32
Definition: c.h:429
#define BUF_REUSABLE
Definition: bufmgr.c:69
int bgwriter_lru_maxpages
Definition: bufmgr.c:133
#define DEBUG2
Definition: elog.h:24
unsigned int uint32
Definition: c.h:441
#define BUF_WRITTEN
Definition: bufmgr.c:68
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define Assert(condition)
Definition: c.h:804
#define elog(elevel,...)
Definition: elog.h:232
int NBuffers
Definition: globals.c:135

◆ BufferAlloc()

static BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1101 of file bufmgr.c.

References Assert, BackendWritebackContext, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_FLAG_MASK, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BUF_USAGECOUNT_ONE, BufferDescriptorGetContentLock, BufferGetLSN, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, INIT_BUFFERTAG, INIT_FORKNUM, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), RelFileNodeBackend::node, PinBuffer(), PinBuffer_Locked(), RelFileNode::relNode, ReservePrivateRefCountEntry(), ScheduleBufferTagForWriteback(), SMgrRelationData::smgr_rnode, RelFileNode::spcNode, StartBufferIO(), StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr, UnpinBuffer(), and XLogNeedsFlush().

Referenced by ReadBuffer_common().

1105 {
1106  BufferTag newTag; /* identity of requested block */
1107  uint32 newHash; /* hash value for newTag */
1108  LWLock *newPartitionLock; /* buffer partition lock for it */
1109  BufferTag oldTag; /* previous identity of selected buffer */
1110  uint32 oldHash; /* hash value for oldTag */
1111  LWLock *oldPartitionLock; /* buffer partition lock for it */
1112  uint32 oldFlags;
1113  int buf_id;
1114  BufferDesc *buf;
1115  bool valid;
1116  uint32 buf_state;
1117 
1118  /* create a tag so we can lookup the buffer */
1119  INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
1120 
1121  /* determine its hash code and partition lock ID */
1122  newHash = BufTableHashCode(&newTag);
1123  newPartitionLock = BufMappingPartitionLock(newHash);
1124 
1125  /* see if the block is in the buffer pool already */
1126  LWLockAcquire(newPartitionLock, LW_SHARED);
1127  buf_id = BufTableLookup(&newTag, newHash);
1128  if (buf_id >= 0)
1129  {
1130  /*
1131  * Found it. Now, pin the buffer so no one can steal it from the
1132  * buffer pool, and check to see if the correct data has been loaded
1133  * into the buffer.
1134  */
1135  buf = GetBufferDescriptor(buf_id);
1136 
1137  valid = PinBuffer(buf, strategy);
1138 
1139  /* Can release the mapping lock as soon as we've pinned it */
1140  LWLockRelease(newPartitionLock);
1141 
1142  *foundPtr = true;
1143 
1144  if (!valid)
1145  {
1146  /*
1147  * We can only get here if (a) someone else is still reading in
1148  * the page, or (b) a previous read attempt failed. We have to
1149  * wait for any active read attempt to finish, and then set up our
1150  * own read attempt if the page is still not BM_VALID.
1151  * StartBufferIO does it all.
1152  */
1153  if (StartBufferIO(buf, true))
1154  {
1155  /*
1156  * If we get here, previous attempts to read the buffer must
1157  * have failed ... but we shall bravely try again.
1158  */
1159  *foundPtr = false;
1160  }
1161  }
1162 
1163  return buf;
1164  }
1165 
1166  /*
1167  * Didn't find it in the buffer pool. We'll have to initialize a new
1168  * buffer. Remember to unlock the mapping lock while doing the work.
1169  */
1170  LWLockRelease(newPartitionLock);
1171 
1172  /* Loop here in case we have to try another victim buffer */
1173  for (;;)
1174  {
1175  /*
1176  * Ensure, while the spinlock's not yet held, that there's a free
1177  * refcount entry.
1178  */
1180 
1181  /*
1182  * Select a victim buffer. The buffer is returned with its header
1183  * spinlock still held!
1184  */
1185  buf = StrategyGetBuffer(strategy, &buf_state);
1186 
1187  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1188 
1189  /* Must copy buffer flags while we still hold the spinlock */
1190  oldFlags = buf_state & BUF_FLAG_MASK;
1191 
1192  /* Pin the buffer and then release the buffer spinlock */
1193  PinBuffer_Locked(buf);
1194 
1195  /*
1196  * If the buffer was dirty, try to write it out. There is a race
1197  * condition here, in that someone might dirty it after we released it
1198  * above, or even while we are writing it out (since our share-lock
1199  * won't prevent hint-bit updates). We will recheck the dirty bit
1200  * after re-locking the buffer header.
1201  */
1202  if (oldFlags & BM_DIRTY)
1203  {
1204  /*
1205  * We need a share-lock on the buffer contents to write it out
1206  * (else we might write invalid data, eg because someone else is
1207  * compacting the page contents while we write). We must use a
1208  * conditional lock acquisition here to avoid deadlock. Even
1209  * though the buffer was not pinned (and therefore surely not
1210  * locked) when StrategyGetBuffer returned it, someone else could
1211  * have pinned and exclusive-locked it by the time we get here. If
1212  * we try to get the lock unconditionally, we'd block waiting for
1213  * them; if they later block waiting for us, deadlock ensues.
1214  * (This has been observed to happen when two backends are both
1215  * trying to split btree index pages, and the second one just
1216  * happens to be trying to split the page the first one got from
1217  * StrategyGetBuffer.)
1218  */
1220  LW_SHARED))
1221  {
1222  /*
1223  * If using a nondefault strategy, and writing the buffer
1224  * would require a WAL flush, let the strategy decide whether
1225  * to go ahead and write/reuse the buffer or to choose another
1226  * victim. We need lock to inspect the page LSN, so this
1227  * can't be done inside StrategyGetBuffer.
1228  */
1229  if (strategy != NULL)
1230  {
1231  XLogRecPtr lsn;
1232 
1233  /* Read the LSN while holding buffer header lock */
1234  buf_state = LockBufHdr(buf);
1235  lsn = BufferGetLSN(buf);
1236  UnlockBufHdr(buf, buf_state);
1237 
1238  if (XLogNeedsFlush(lsn) &&
1239  StrategyRejectBuffer(strategy, buf))
1240  {
1241  /* Drop lock/pin and loop around for another buffer */
1243  UnpinBuffer(buf, true);
1244  continue;
1245  }
1246  }
1247 
1248  /* OK, do the I/O */
1249  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
1250  smgr->smgr_rnode.node.spcNode,
1251  smgr->smgr_rnode.node.dbNode,
1252  smgr->smgr_rnode.node.relNode);
1253 
1254  FlushBuffer(buf, NULL);
1256 
1258  &buf->tag);
1259 
1260  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
1261  smgr->smgr_rnode.node.spcNode,
1262  smgr->smgr_rnode.node.dbNode,
1263  smgr->smgr_rnode.node.relNode);
1264  }
1265  else
1266  {
1267  /*
1268  * Someone else has locked the buffer, so give it up and loop
1269  * back to get another one.
1270  */
1271  UnpinBuffer(buf, true);
1272  continue;
1273  }
1274  }
1275 
1276  /*
1277  * To change the association of a valid buffer, we'll need to have
1278  * exclusive lock on both the old and new mapping partitions.
1279  */
1280  if (oldFlags & BM_TAG_VALID)
1281  {
1282  /*
1283  * Need to compute the old tag's hashcode and partition lock ID.
1284  * XXX is it worth storing the hashcode in BufferDesc so we need
1285  * not recompute it here? Probably not.
1286  */
1287  oldTag = buf->tag;
1288  oldHash = BufTableHashCode(&oldTag);
1289  oldPartitionLock = BufMappingPartitionLock(oldHash);
1290 
1291  /*
1292  * Must lock the lower-numbered partition first to avoid
1293  * deadlocks.
1294  */
1295  if (oldPartitionLock < newPartitionLock)
1296  {
1297  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1298  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1299  }
1300  else if (oldPartitionLock > newPartitionLock)
1301  {
1302  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1303  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1304  }
1305  else
1306  {
1307  /* only one partition, only one lock */
1308  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1309  }
1310  }
1311  else
1312  {
1313  /* if it wasn't valid, we need only the new partition */
1314  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1315  /* remember we have no old-partition lock or tag */
1316  oldPartitionLock = NULL;
1317  /* keep the compiler quiet about uninitialized variables */
1318  oldHash = 0;
1319  }
1320 
1321  /*
1322  * Try to make a hashtable entry for the buffer under its new tag.
1323  * This could fail because while we were writing someone else
1324  * allocated another buffer for the same block we want to read in.
1325  * Note that we have not yet removed the hashtable entry for the old
1326  * tag.
1327  */
1328  buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
1329 
1330  if (buf_id >= 0)
1331  {
1332  /*
1333  * Got a collision. Someone has already done what we were about to
1334  * do. We'll just handle this as if it were found in the buffer
1335  * pool in the first place. First, give up the buffer we were
1336  * planning to use.
1337  */
1338  UnpinBuffer(buf, true);
1339 
1340  /* Can give up that buffer's mapping partition lock now */
1341  if (oldPartitionLock != NULL &&
1342  oldPartitionLock != newPartitionLock)
1343  LWLockRelease(oldPartitionLock);
1344 
1345  /* remaining code should match code at top of routine */
1346 
1347  buf = GetBufferDescriptor(buf_id);
1348 
1349  valid = PinBuffer(buf, strategy);
1350 
1351  /* Can release the mapping lock as soon as we've pinned it */
1352  LWLockRelease(newPartitionLock);
1353 
1354  *foundPtr = true;
1355 
1356  if (!valid)
1357  {
1358  /*
1359  * We can only get here if (a) someone else is still reading
1360  * in the page, or (b) a previous read attempt failed. We
1361  * have to wait for any active read attempt to finish, and
1362  * then set up our own read attempt if the page is still not
1363  * BM_VALID. StartBufferIO does it all.
1364  */
1365  if (StartBufferIO(buf, true))
1366  {
1367  /*
1368  * If we get here, previous attempts to read the buffer
1369  * must have failed ... but we shall bravely try again.
1370  */
1371  *foundPtr = false;
1372  }
1373  }
1374 
1375  return buf;
1376  }
1377 
1378  /*
1379  * Need to lock the buffer header too in order to change its tag.
1380  */
1381  buf_state = LockBufHdr(buf);
1382 
1383  /*
1384  * Somebody could have pinned or re-dirtied the buffer while we were
1385  * doing the I/O and making the new hashtable entry. If so, we can't
1386  * recycle this buffer; we must undo everything we've done and start
1387  * over with a new victim buffer.
1388  */
1389  oldFlags = buf_state & BUF_FLAG_MASK;
1390  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
1391  break;
1392 
1393  UnlockBufHdr(buf, buf_state);
1394  BufTableDelete(&newTag, newHash);
1395  if (oldPartitionLock != NULL &&
1396  oldPartitionLock != newPartitionLock)
1397  LWLockRelease(oldPartitionLock);
1398  LWLockRelease(newPartitionLock);
1399  UnpinBuffer(buf, true);
1400  }
1401 
1402  /*
1403  * Okay, it's finally safe to rename the buffer.
1404  *
1405  * Clearing BM_VALID here is necessary, clearing the dirtybits is just
1406  * paranoia. We also reset the usage_count since any recency of use of
1407  * the old content is no longer relevant. (The usage_count starts out at
1408  * 1 so that the buffer can survive one clock-sweep pass.)
1409  *
1410  * Make sure BM_PERMANENT is set for buffers that must be written at every
1411  * checkpoint. Unlogged buffers only need to be written at shutdown
1412  * checkpoints, except for their "init" forks, which need to be treated
1413  * just like permanent relations.
1414  */
1415  buf->tag = newTag;
1416  buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
1419  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1420  buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
1421  else
1422  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1423 
1424  UnlockBufHdr(buf, buf_state);
1425 
1426  if (oldPartitionLock != NULL)
1427  {
1428  BufTableDelete(&oldTag, oldHash);
1429  if (oldPartitionLock != newPartitionLock)
1430  LWLockRelease(oldPartitionLock);
1431  }
1432 
1433  LWLockRelease(newPartitionLock);
1434 
1435  /*
1436  * Buffer contents are currently invalid. Try to obtain the right to
1437  * start I/O. If StartBufferIO returns false, then someone else managed
1438  * to read it before we did, so there's nothing left for BufferAlloc() to
1439  * do.
1440  */
1441  if (StartBufferIO(buf, true))
1442  *foundPtr = false;
1443  else
1444  *foundPtr = true;
1445 
1446  return buf;
1447 }
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:1686
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
Definition: freelist.c:201
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
Definition: lwlock.h:31
#define BM_PERMANENT
Definition: buf_internals.h:67
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3208
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2808
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4394
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:119
void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
Definition: bufmgr.c:4734
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
WritebackContext BackendWritebackContext
Definition: buf_init.c:23
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1370
static char * buf
Definition: pg_test_fsync.c:68
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1834
#define BM_VALID
Definition: buf_internals.h:60
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf)
Definition: freelist.c:686
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1789
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
#define UnlockBufHdr(desc, s)
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:61
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 2748 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, GetLocalBufferDescriptor, and BufferDesc::tag.

Referenced by _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_prune_chain(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and XLogReadBufferExtended().

2749 {
2750  BufferDesc *bufHdr;
2751 
2752  Assert(BufferIsPinned(buffer));
2753 
2754  if (BufferIsLocal(buffer))
2755  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2756  else
2757  bufHdr = GetBufferDescriptor(buffer - 1);
2758 
2759  /* pinned, so OK to read tag without spinlock */
2760  return bufHdr->tag.blockNum;
2761 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
BufferTag tag

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 3008 of file bufmgr.c.

References Assert, BufferGetPage, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, LockBufHdr(), PageGetLSN, UnlockBufHdr, and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

3009 {
3010  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
3011  char *page = BufferGetPage(buffer);
3012  XLogRecPtr lsn;
3013  uint32 buf_state;
3014 
3015  /*
3016  * If we don't need locking for correctness, fastpath out.
3017  */
3018  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
3019  return PageGetLSN(page);
3020 
3021  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3022  Assert(BufferIsValid(buffer));
3023  Assert(BufferIsPinned(buffer));
3024 
3025  buf_state = LockBufHdr(bufHdr);
3026  lsn = PageGetLSN(page);
3027  UnlockBufHdr(bufHdr, buf_state);
3028 
3029  return lsn;
3030 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define PageGetLSN(page)
Definition: bufpage.h:366
#define UnlockBufHdr(desc, s)
#define XLogHintBitIsNeeded()
Definition: xlog.h:177

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileNode rnode,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 2769 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, buftag::rnode, and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

2771 {
2772  BufferDesc *bufHdr;
2773 
2774  /* Do the same checks as BufferGetBlockNumber. */
2775  Assert(BufferIsPinned(buffer));
2776 
2777  if (BufferIsLocal(buffer))
2778  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2779  else
2780  bufHdr = GetBufferDescriptor(buffer - 1);
2781 
2782  /* pinned, so OK to read tag without spinlock */
2783  *rnode = bufHdr->tag.rnode;
2784  *forknum = bufHdr->tag.forkNum;
2785  *blknum = bufHdr->tag.blockNum;
2786 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 2978 of file bufmgr.c.

References Assert, BM_PERMANENT, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

2979 {
2980  BufferDesc *bufHdr;
2981 
2982  /* Local buffers are used only for temp relations. */
2983  if (BufferIsLocal(buffer))
2984  return false;
2985 
2986  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2987  Assert(BufferIsValid(buffer));
2988  Assert(BufferIsPinned(buffer));
2989 
2990  /*
2991  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
2992  * need not bother with the buffer header spinlock. Even if someone else
2993  * changes the buffer header state while we're doing this, the state is
2994  * changed atomically, so we'll read the old value or the new value, but
2995  * not random garbage.
2996  */
2997  bufHdr = GetBufferDescriptor(buffer - 1);
2998  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
2999 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define BM_PERMANENT
Definition: buf_internals.h:67
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
pg_atomic_uint32 state
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 1933 of file bufmgr.c.

References Assert, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, CurrentResourceOwner, DatumGetPointer, buftag::forkNum, CkptSortItem::forkNum, GetBufferDescriptor, i, CkptTsStatus::index, InvalidOid, IssuePendingWritebacks(), LockBufHdr(), PgStat_MsgCheckpointer::m_buf_written_checkpoints, NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u32(), PointerGetDatum, ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, RelFileNode::relNode, CkptSortItem::relNode, repalloc(), ResourceOwnerEnlargeBuffers(), buftag::rnode, RelFileNode::spcNode, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr, and WritebackContextInit().

Referenced by CheckPointBuffers().

1934 {
1935  uint32 buf_state;
1936  int buf_id;
1937  int num_to_scan;
1938  int num_spaces;
1939  int num_processed;
1940  int num_written;
1941  CkptTsStatus *per_ts_stat = NULL;
1942  Oid last_tsid;
1943  binaryheap *ts_heap;
1944  int i;
1945  int mask = BM_DIRTY;
1946  WritebackContext wb_context;
1947 
1948  /* Make sure we can handle the pin inside SyncOneBuffer */
1950 
1951  /*
1952  * Unless this is a shutdown checkpoint or we have been explicitly told,
1953  * we write only permanent, dirty buffers. But at shutdown or end of
1954  * recovery, we write all dirty buffers.
1955  */
1958  mask |= BM_PERMANENT;
1959 
1960  /*
1961  * Loop over all buffers, and mark the ones that need to be written with
1962  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
1963  * can estimate how much work needs to be done.
1964  *
1965  * This allows us to write only those pages that were dirty when the
1966  * checkpoint began, and not those that get dirtied while it proceeds.
1967  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
1968  * later in this function, or by normal backends or the bgwriter cleaning
1969  * scan, the flag is cleared. Any buffer dirtied after this point won't
1970  * have the flag set.
1971  *
1972  * Note that if we fail to write some buffer, we may leave buffers with
1973  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
1974  * certainly need to be written for the next checkpoint attempt, too.
1975  */
1976  num_to_scan = 0;
1977  for (buf_id = 0; buf_id < NBuffers; buf_id++)
1978  {
1979  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
1980 
1981  /*
1982  * Header spinlock is enough to examine BM_DIRTY, see comment in
1983  * SyncOneBuffer.
1984  */
1985  buf_state = LockBufHdr(bufHdr);
1986 
1987  if ((buf_state & mask) == mask)
1988  {
1989  CkptSortItem *item;
1990 
1991  buf_state |= BM_CHECKPOINT_NEEDED;
1992 
1993  item = &CkptBufferIds[num_to_scan++];
1994  item->buf_id = buf_id;
1995  item->tsId = bufHdr->tag.rnode.spcNode;
1996  item->relNode = bufHdr->tag.rnode.relNode;
1997  item->forkNum = bufHdr->tag.forkNum;
1998  item->blockNum = bufHdr->tag.blockNum;
1999  }
2000 
2001  UnlockBufHdr(bufHdr, buf_state);
2002 
2003  /* Check for barrier events in case NBuffers is large. */
2006  }
2007 
2008  if (num_to_scan == 0)
2009  return; /* nothing to do */
2010 
2012 
2013  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
2014 
2015  /*
2016  * Sort buffers that need to be written to reduce the likelihood of random
2017  * IO. The sorting is also important for the implementation of balancing
2018  * writes between tablespaces. Without balancing writes we'd potentially
2019  * end up writing to the tablespaces one-by-one; possibly overloading the
2020  * underlying system.
2021  */
2022  sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
2023 
2024  num_spaces = 0;
2025 
2026  /*
2027  * Allocate progress status for each tablespace with buffers that need to
2028  * be flushed. This requires the to-be-flushed array to be sorted.
2029  */
2030  last_tsid = InvalidOid;
2031  for (i = 0; i < num_to_scan; i++)
2032  {
2033  CkptTsStatus *s;
2034  Oid cur_tsid;
2035 
2036  cur_tsid = CkptBufferIds[i].tsId;
2037 
2038  /*
2039  * Grow array of per-tablespace status structs, every time a new
2040  * tablespace is found.
2041  */
2042  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
2043  {
2044  Size sz;
2045 
2046  num_spaces++;
2047 
2048  /*
2049  * Not worth adding grow-by-power-of-2 logic here - even with a
2050  * few hundred tablespaces this should be fine.
2051  */
2052  sz = sizeof(CkptTsStatus) * num_spaces;
2053 
2054  if (per_ts_stat == NULL)
2055  per_ts_stat = (CkptTsStatus *) palloc(sz);
2056  else
2057  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
2058 
2059  s = &per_ts_stat[num_spaces - 1];
2060  memset(s, 0, sizeof(*s));
2061  s->tsId = cur_tsid;
2062 
2063  /*
2064  * The first buffer in this tablespace. As CkptBufferIds is sorted
2065  * by tablespace all (s->num_to_scan) buffers in this tablespace
2066  * will follow afterwards.
2067  */
2068  s->index = i;
2069 
2070  /*
2071  * progress_slice will be determined once we know how many buffers
2072  * are in each tablespace, i.e. after this loop.
2073  */
2074 
2075  last_tsid = cur_tsid;
2076  }
2077  else
2078  {
2079  s = &per_ts_stat[num_spaces - 1];
2080  }
2081 
2082  s->num_to_scan++;
2083 
2084  /* Check for barrier events. */
2087  }
2088 
2089  Assert(num_spaces > 0);
2090 
2091  /*
2092  * Build a min-heap over the write-progress in the individual tablespaces,
2093  * and compute how large a portion of the total progress a single
2094  * processed buffer is.
2095  */
2096  ts_heap = binaryheap_allocate(num_spaces,
2098  NULL);
2099 
2100  for (i = 0; i < num_spaces; i++)
2101  {
2102  CkptTsStatus *ts_stat = &per_ts_stat[i];
2103 
2104  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
2105 
2106  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
2107  }
2108 
2109  binaryheap_build(ts_heap);
2110 
2111  /*
2112  * Iterate through to-be-checkpointed buffers and write the ones (still)
2113  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
2114  * tablespaces; otherwise the sorting would lead to only one tablespace
2115  * receiving writes at a time, making inefficient use of the hardware.
2116  */
2117  num_processed = 0;
2118  num_written = 0;
2119  while (!binaryheap_empty(ts_heap))
2120  {
2121  BufferDesc *bufHdr = NULL;
2122  CkptTsStatus *ts_stat = (CkptTsStatus *)
2124 
2125  buf_id = CkptBufferIds[ts_stat->index].buf_id;
2126  Assert(buf_id != -1);
2127 
2128  bufHdr = GetBufferDescriptor(buf_id);
2129 
2130  num_processed++;
2131 
2132  /*
2133  * We don't need to acquire the lock here, because we're only looking
2134  * at a single bit. It's possible that someone else writes the buffer
2135  * and clears the flag right after we check, but that doesn't matter
2136  * since SyncOneBuffer will then do nothing. However, there is a
2137  * further race condition: it's conceivable that between the time we
2138  * examine the bit here and the time SyncOneBuffer acquires the lock,
2139  * someone else not only wrote the buffer but replaced it with another
2140  * page and dirtied it. In that improbable case, SyncOneBuffer will
2141  * write the buffer though we didn't need to. It doesn't seem worth
2142  * guarding against this, though.
2143  */
2145  {
2146  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
2147  {
2148  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
2150  num_written++;
2151  }
2152  }
2153 
2154  /*
2155  * Measure progress independent of actually having to flush the buffer
2156  * - otherwise writing become unbalanced.
2157  */
2158  ts_stat->progress += ts_stat->progress_slice;
2159  ts_stat->num_scanned++;
2160  ts_stat->index++;
2161 
2162  /* Have all the buffers from the tablespace been processed? */
2163  if (ts_stat->num_scanned == ts_stat->num_to_scan)
2164  {
2165  binaryheap_remove_first(ts_heap);
2166  }
2167  else
2168  {
2169  /* update heap with the new progress */
2170  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
2171  }
2172 
2173  /*
2174  * Sleep to throttle our I/O rate.
2175  *
2176  * (This will check for barrier events even if it doesn't sleep.)
2177  */
2178  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
2179  }
2180 
2181  /* issue all pending flushes */
2182  IssuePendingWritebacks(&wb_context);
2183 
2184  pfree(per_ts_stat);
2185  per_ts_stat = NULL;
2186  binaryheap_free(ts_heap);
2187 
2188  /*
2189  * Update checkpoint statistics. As noted above, this doesn't include
2190  * buffers written by other backends or bgwriter scan.
2191  */
2192  CheckpointStats.ckpt_bufs_written += num_written;
2193 
2194  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
2195 }
PgStat_MsgCheckpointer PendingCheckpointerStats
Definition: pgstat.c:132
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:687
#define BM_PERMANENT
Definition: buf_internals.h:67
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:201
Oid tsId
Definition: bufmgr.c:97
#define binaryheap_empty(h)
Definition: binaryheap.h:52
ForkNumber forkNum
Definition: buf_internals.h:94
#define PointerGetDatum(X)
Definition: postgres.h:600
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:453
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:4699
int checkpoint_flush_after
Definition: bufmgr.c:156
void binaryheap_replace_first(binaryheap *heap, Datum d)
Definition: binaryheap.c:204
unsigned int Oid
Definition: postgres_ext.h:31
#define BM_DIRTY
Definition: buf_internals.h:59
void binaryheap_add_unordered(binaryheap *heap, Datum d)
Definition: binaryheap.c:110
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2512
void IssuePendingWritebacks(WritebackContext *context)
Definition: bufmgr.c:4775
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:4722
void pfree(void *pointer)
Definition: mcxt.c:1169
double float8
Definition: c.h:565
Datum binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:159
int num_to_scan
Definition: bufmgr.c:110
float8 progress_slice
Definition: bufmgr.c:107
int index
Definition: bufmgr.c:115
float8 progress
Definition: bufmgr.c:106
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:197
#define GetBufferDescriptor(id)
PgStat_Counter m_buf_written_checkpoints
Definition: pgstat.h:475
unsigned int uint32
Definition: c.h:441
#define BUF_WRITTEN
Definition: bufmgr.c:68
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
int ckpt_bufs_written
Definition: xlog.h:227
BlockNumber blockNum
#define InvalidOid
Definition: postgres_ext.h:36
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:126
const symbol * s
Definition: header.h:17
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define Assert(condition)
Definition: c.h:804
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:37
CheckpointStatsData CheckpointStats
Definition: xlog.c:188
CkptSortItem * CkptBufferIds
Definition: buf_init.c:24
size_t Size
Definition: c.h:540
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:69
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:33
#define DatumGetPointer(X)
Definition: postgres.h:593
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
pg_atomic_uint32 state
Datum binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:174
int num_scanned
Definition: bufmgr.c:112
ForkNumber forkNum
struct CkptTsStatus CkptTsStatus
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:196
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag a,
const BufferTag b 
)
inlinestatic

Definition at line 4639 of file bufmgr.c.

References buftag::blockNum, buftag::forkNum, buftag::rnode, and rnode_comparator().

4640 {
4641  int ret;
4642 
4643  ret = rnode_comparator(&ba->rnode, &bb->rnode);
4644 
4645  if (ret != 0)
4646  return ret;
4647 
4648  if (ba->forkNum < bb->forkNum)
4649  return -1;
4650  if (ba->forkNum > bb->forkNum)
4651  return 1;
4652 
4653  if (ba->blockNum < bb->blockNum)
4654  return -1;
4655  if (ba->blockNum > bb->blockNum)
4656  return 1;
4657 
4658  return 0;
4659 }
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4560

◆ BufmgrCommit()

void BufmgrCommit ( void  )

Definition at line 2734 of file bufmgr.c.

Referenced by PrepareTransaction(), and RecordTransactionCommit().

2735 {
2736  /* Nothing to do in bufmgr anymore... */
2737 }

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 2640 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, PrintBufferLeakWarning(), PrivateRefCountArray, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

2641 {
2642 #ifdef USE_ASSERT_CHECKING
2643  int RefCountErrors = 0;
2644  PrivateRefCountEntry *res;
2645  int i;
2646 
2647  /* check the array */
2648  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
2649  {
2650  res = &PrivateRefCountArray[i];
2651 
2652  if (res->buffer != InvalidBuffer)
2653  {
2655  RefCountErrors++;
2656  }
2657  }
2658 
2659  /* if necessary search the hash */
2661  {
2662  HASH_SEQ_STATUS hstat;
2663 
2665  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
2666  {
2668  RefCountErrors++;
2669  }
2670 
2671  }
2672 
2673  Assert(RefCountErrors == 0);
2674 #endif
2675 }
void PrintBufferLeakWarning(Buffer buffer)
Definition: bufmgr.c:2681
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int i

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 2724 of file bufmgr.c.

References BufferSync().

Referenced by CheckPointGuts().

2725 {
2726  BufferSync(flags);
2727 }
static void BufferSync(int flags)
Definition: bufmgr.c:1933

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 4668 of file bufmgr.c.

References CkptSortItem::blockNum, CkptSortItem::forkNum, CkptSortItem::relNode, and CkptSortItem::tsId.

4669 {
4670  /* compare tablespace */
4671  if (a->tsId < b->tsId)
4672  return -1;
4673  else if (a->tsId > b->tsId)
4674  return 1;
4675  /* compare relation */
4676  if (a->relNode < b->relNode)
4677  return -1;
4678  else if (a->relNode > b->relNode)
4679  return 1;
4680  /* compare fork */
4681  else if (a->forkNum < b->forkNum)
4682  return -1;
4683  else if (a->forkNum > b->forkNum)
4684  return 1;
4685  /* compare block number */
4686  else if (a->blockNum < b->blockNum)
4687  return -1;
4688  else if (a->blockNum > b->blockNum)
4689  return 1;
4690  /* equal page IDs are unlikely, but not impossible */
4691  return 0;
4692 }
BlockNumber blockNum
ForkNumber forkNum

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 4033 of file bufmgr.c.

References Assert, buf, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

4034 {
4035  BufferDesc *buf;
4036 
4037  Assert(BufferIsPinned(buffer));
4038  if (BufferIsLocal(buffer))
4039  return true; /* act as though we got it */
4040 
4041  buf = GetBufferDescriptor(buffer - 1);
4042 
4044  LW_EXCLUSIVE);
4045 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1370
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 4241 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid, ConditionalLockBuffer(), GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr.

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

4242 {
4243  BufferDesc *bufHdr;
4244  uint32 buf_state,
4245  refcount;
4246 
4247  Assert(BufferIsValid(buffer));
4248 
4249  if (BufferIsLocal(buffer))
4250  {
4251  refcount = LocalRefCount[-buffer - 1];
4252  /* There should be exactly one pin */
4253  Assert(refcount > 0);
4254  if (refcount != 1)
4255  return false;
4256  /* Nobody else to wait for */
4257  return true;
4258  }
4259 
4260  /* There should be exactly one local pin */
4261  refcount = GetPrivateRefCount(buffer);
4262  Assert(refcount);
4263  if (refcount != 1)
4264  return false;
4265 
4266  /* Try to acquire lock */
4267  if (!ConditionalLockBuffer(buffer))
4268  return false;
4269 
4270  bufHdr = GetBufferDescriptor(buffer - 1);
4271  buf_state = LockBufHdr(bufHdr);
4272  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
4273 
4274  Assert(refcount > 0);
4275  if (refcount == 1)
4276  {
4277  /* Successfully acquired exclusive lock with pincount 1 */
4278  UnlockBufHdr(bufHdr, buf_state);
4279  return true;
4280  }
4281 
4282  /* Failed, so release the lock */
4283  UnlockBufHdr(bufHdr, buf_state);
4284  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4285  return false;
4286 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:4033
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4007
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 3408 of file bufmgr.c.

References buftag::blockNum, buf, BufferDescriptorGetBuffer, RelFileNode::dbNode, elog, buftag::forkNum, BufferDesc::freeNext, GetBufferDescriptor, GetPrivateRefCount(), i, InvalidateBuffer(), InvalidBackendId, LockBufHdr(), LOG, NBuffers, relpathbackend, relpathperm, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by dbase_redo(), dropdb(), and movedb().

3409 {
3410  int i;
3411 
3412  /*
3413  * We needn't consider local buffers, since by assumption the target
3414  * database isn't our own.
3415  */
3416 
3417  for (i = 0; i < NBuffers; i++)
3418  {
3419  BufferDesc *bufHdr = GetBufferDescriptor(i);
3420  uint32 buf_state;
3421 
3422  /*
3423  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3424  * and saves some cycles.
3425  */
3426  if (bufHdr->tag.rnode.dbNode != dbid)
3427  continue;
3428 
3429  buf_state = LockBufHdr(bufHdr);
3430  if (bufHdr->tag.rnode.dbNode == dbid)
3431  InvalidateBuffer(bufHdr); /* releases spinlock */
3432  else
3433  UnlockBufHdr(bufHdr, buf_state);
3434  }
3435 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1467
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135

◆ DropRelFileNodeBuffers()

void DropRelFileNodeBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 3054 of file bufmgr.c.

References RelFileNodeBackend::backend, buftag::blockNum, BlockNumberIsValid, BUF_DROP_FULL_SCAN_THRESHOLD, DropRelFileNodeLocalBuffers(), FindAndDropRelFileNodeBuffers(), buftag::forkNum, GetBufferDescriptor, i, InvalidateBuffer(), InvalidBlockNumber, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, RelFileNodeBackend::node, RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, SMgrRelationData::smgr_rnode, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrtruncate().

3056 {
3057  int i;
3058  int j;
3059  RelFileNodeBackend rnode;
3060  BlockNumber nForkBlock[MAX_FORKNUM];
3061  uint64 nBlocksToInvalidate = 0;
3062 
3063  rnode = smgr_reln->smgr_rnode;
3064 
3065  /* If it's a local relation, it's localbuf.c's problem. */
3066  if (RelFileNodeBackendIsTemp(rnode))
3067  {
3068  if (rnode.backend == MyBackendId)
3069  {
3070  for (j = 0; j < nforks; j++)
3071  DropRelFileNodeLocalBuffers(rnode.node, forkNum[j],
3072  firstDelBlock[j]);
3073  }
3074  return;
3075  }
3076 
3077  /*
3078  * To remove all the pages of the specified relation forks from the buffer
3079  * pool, we need to scan the entire buffer pool but we can optimize it by
3080  * finding the buffers from BufMapping table provided we know the exact
3081  * size of each fork of the relation. The exact size is required to ensure
3082  * that we don't leave any buffer for the relation being dropped as
3083  * otherwise the background writer or checkpointer can lead to a PANIC
3084  * error while flushing buffers corresponding to files that don't exist.
3085  *
3086  * To know the exact size, we rely on the size cached for each fork by us
3087  * during recovery which limits the optimization to recovery and on
3088  * standbys but we can easily extend it once we have shared cache for
3089  * relation size.
3090  *
3091  * In recovery, we cache the value returned by the first lseek(SEEK_END)
3092  * and the future writes keeps the cached value up-to-date. See
3093  * smgrextend. It is possible that the value of the first lseek is smaller
3094  * than the actual number of existing blocks in the file due to buggy
3095  * Linux kernels that might not have accounted for the recent write. But
3096  * that should be fine because there must not be any buffers after that
3097  * file size.
3098  */
3099  for (i = 0; i < nforks; i++)
3100  {
3101  /* Get the number of blocks for a relation's fork */
3102  nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
3103 
3104  if (nForkBlock[i] == InvalidBlockNumber)
3105  {
3106  nBlocksToInvalidate = InvalidBlockNumber;
3107  break;
3108  }
3109 
3110  /* calculate the number of blocks to be invalidated */
3111  nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
3112  }
3113 
3114  /*
3115  * We apply the optimization iff the total number of blocks to invalidate
3116  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3117  */
3118  if (BlockNumberIsValid(nBlocksToInvalidate) &&
3119  nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3120  {
3121  for (j = 0; j < nforks; j++)
3122  FindAndDropRelFileNodeBuffers(rnode.node, forkNum[j],
3123  nForkBlock[j], firstDelBlock[j]);
3124  return;
3125  }
3126 
3127  for (i = 0; i < NBuffers; i++)
3128  {
3129  BufferDesc *bufHdr = GetBufferDescriptor(i);
3130  uint32 buf_state;
3131 
3132  /*
3133  * We can make this a tad faster by prechecking the buffer tag before
3134  * we attempt to lock the buffer; this saves a lot of lock
3135  * acquisitions in typical cases. It should be safe because the
3136  * caller must have AccessExclusiveLock on the relation, or some other
3137  * reason to be certain that no one is loading new pages of the rel
3138  * into the buffer pool. (Otherwise we might well miss such pages
3139  * entirely.) Therefore, while the tag might be changing while we
3140  * look at it, it can't be changing *to* a value we care about, only
3141  * *away* from such a value. So false negatives are impossible, and
3142  * false positives are safe because we'll recheck after getting the
3143  * buffer lock.
3144  *
3145  * We could check forkNum and blockNum as well as the rnode, but the
3146  * incremental win from doing so seems small.
3147  */
3148  if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
3149  continue;
3150 
3151  buf_state = LockBufHdr(bufHdr);
3152 
3153  for (j = 0; j < nforks; j++)
3154  {
3155  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
3156  bufHdr->tag.forkNum == forkNum[j] &&
3157  bufHdr->tag.blockNum >= firstDelBlock[j])
3158  {
3159  InvalidateBuffer(bufHdr); /* releases spinlock */
3160  break;
3161  }
3162  }
3163  if (j >= nforks)
3164  UnlockBufHdr(bufHdr, buf_state);
3165  }
3166 }
BackendId MyBackendId
Definition: globals.c:84
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ForkNumber forkNum
Definition: buf_internals.h:94
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1467
void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:326
uint32 BlockNumber
Definition: block.h:31
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:79
static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3347
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:572
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
BackendId backend
Definition: relfilenode.h:75
#define InvalidBlockNumber
Definition: block.h:33
#define MAX_FORKNUM
Definition: relpath.h:55
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ DropRelFileNodesAllBuffers()

void DropRelFileNodesAllBuffers ( SMgrRelation smgr_reln,
int  nnodes 
)

Definition at line 3178 of file bufmgr.c.

References BlockNumberIsValid, BUF_DROP_FULL_SCAN_THRESHOLD, DropRelFileNodeAllLocalBuffers(), FindAndDropRelFileNodeBuffers(), GetBufferDescriptor, i, InvalidateBuffer(), InvalidBlockNumber, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, SMgrRelationData::node, palloc(), pfree(), pg_qsort(), RelFileNodeBackendIsTemp, RelFileNodeEquals, RELS_BSEARCH_THRESHOLD, buftag::rnode, rnode_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrdounlinkall().

3179 {
3180  int i;
3181  int j;
3182  int n = 0;
3183  SMgrRelation *rels;
3184  BlockNumber (*block)[MAX_FORKNUM + 1];
3185  uint64 nBlocksToInvalidate = 0;
3186  RelFileNode *nodes;
3187  bool cached = true;
3188  bool use_bsearch;
3189 
3190  if (nnodes == 0)
3191  return;
3192 
3193  rels = palloc(sizeof(SMgrRelation) * nnodes); /* non-local relations */
3194 
3195  /* If it's a local relation, it's localbuf.c's problem. */
3196  for (i = 0; i < nnodes; i++)
3197  {
3198  if (RelFileNodeBackendIsTemp(smgr_reln[i]->smgr_rnode))
3199  {
3200  if (smgr_reln[i]->smgr_rnode.backend == MyBackendId)
3201  DropRelFileNodeAllLocalBuffers(smgr_reln[i]->smgr_rnode.node);
3202  }
3203  else
3204  rels[n++] = smgr_reln[i];
3205  }
3206 
3207  /*
3208  * If there are no non-local relations, then we're done. Release the
3209  * memory and return.
3210  */
3211  if (n == 0)
3212  {
3213  pfree(rels);
3214  return;
3215  }
3216 
3217  /*
3218  * This is used to remember the number of blocks for all the relations
3219  * forks.
3220  */
3221  block = (BlockNumber (*)[MAX_FORKNUM + 1])
3222  palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
3223 
3224  /*
3225  * We can avoid scanning the entire buffer pool if we know the exact size
3226  * of each of the given relation forks. See DropRelFileNodeBuffers.
3227  */
3228  for (i = 0; i < n && cached; i++)
3229  {
3230  for (j = 0; j <= MAX_FORKNUM; j++)
3231  {
3232  /* Get the number of blocks for a relation's fork. */
3233  block[i][j] = smgrnblocks_cached(rels[i], j);
3234 
3235  /* We need to only consider the relation forks that exists. */
3236  if (block[i][j] == InvalidBlockNumber)
3237  {
3238  if (!smgrexists(rels[i], j))
3239  continue;
3240  cached = false;
3241  break;
3242  }
3243 
3244  /* calculate the total number of blocks to be invalidated */
3245  nBlocksToInvalidate += block[i][j];
3246  }
3247  }
3248 
3249  /*
3250  * We apply the optimization iff the total number of blocks to invalidate
3251  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3252  */
3253  if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3254  {
3255  for (i = 0; i < n; i++)
3256  {
3257  for (j = 0; j <= MAX_FORKNUM; j++)
3258  {
3259  /* ignore relation forks that doesn't exist */
3260  if (!BlockNumberIsValid(block[i][j]))
3261  continue;
3262 
3263  /* drop all the buffers for a particular relation fork */
3264  FindAndDropRelFileNodeBuffers(rels[i]->smgr_rnode.node,
3265  j, block[i][j], 0);
3266  }
3267  }
3268 
3269  pfree(block);
3270  pfree(rels);
3271  return;
3272  }
3273 
3274  pfree(block);
3275  nodes = palloc(sizeof(RelFileNode) * n); /* non-local relations */
3276  for (i = 0; i < n; i++)
3277  nodes[i] = rels[i]->smgr_rnode.node;
3278 
3279  /*
3280  * For low number of relations to drop just use a simple walk through, to
3281  * save the bsearch overhead. The threshold to use is rather a guess than
3282  * an exactly determined value, as it depends on many factors (CPU and RAM
3283  * speeds, amount of shared buffers etc.).
3284  */
3285  use_bsearch = n > RELS_BSEARCH_THRESHOLD;
3286 
3287  /* sort the list of rnodes if necessary */
3288  if (use_bsearch)
3289  pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
3290 
3291  for (i = 0; i < NBuffers; i++)
3292  {
3293  RelFileNode *rnode = NULL;
3294  BufferDesc *bufHdr = GetBufferDescriptor(i);
3295  uint32 buf_state;
3296 
3297  /*
3298  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3299  * and saves some cycles.
3300  */
3301 
3302  if (!use_bsearch)
3303  {
3304  int j;
3305 
3306  for (j = 0; j < n; j++)
3307  {
3308  if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
3309  {
3310  rnode = &nodes[j];
3311  break;
3312  }
3313  }
3314  }
3315  else
3316  {
3317  rnode = bsearch((const void *) &(bufHdr->tag.rnode),
3318  nodes, n, sizeof(RelFileNode),
3320  }
3321 
3322  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3323  if (rnode == NULL)
3324  continue;
3325 
3326  buf_state = LockBufHdr(bufHdr);
3327  if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
3328  InvalidateBuffer(bufHdr); /* releases spinlock */
3329  else
3330  UnlockBufHdr(bufHdr, buf_state);
3331  }
3332 
3333  pfree(nodes);
3334  pfree(rels);
3335 }
BackendId MyBackendId
Definition: globals.c:84
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1467
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:71
uint32 BlockNumber
Definition: block.h:31
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:79
static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3347
void DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
Definition: localbuf.c:373
void pfree(void *pointer)
Definition: mcxt.c:1169
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:572
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4560
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define InvalidBlockNumber
Definition: block.h:33
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
#define MAX_FORKNUM
Definition: relpath.h:55
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
dlist_node node
Definition: smgr.h:72

◆ FindAndDropRelFileNodeBuffers()

static void FindAndDropRelFileNodeBuffers ( RelFileNode  rnode,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 3347 of file bufmgr.c.

References buftag::blockNum, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), buftag::forkNum, GetBufferDescriptor, INIT_BUFFERTAG, InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeEquals, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

3350 {
3351  BlockNumber curBlock;
3352 
3353  for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
3354  {
3355  uint32 bufHash; /* hash value for tag */
3356  BufferTag bufTag; /* identity of requested block */
3357  LWLock *bufPartitionLock; /* buffer partition lock for it */
3358  int buf_id;
3359  BufferDesc *bufHdr;
3360  uint32 buf_state;
3361 
3362  /* create a tag so we can lookup the buffer */
3363  INIT_BUFFERTAG(bufTag, rnode, forkNum, curBlock);
3364 
3365  /* determine its hash code and partition lock ID */
3366  bufHash = BufTableHashCode(&bufTag);
3367  bufPartitionLock = BufMappingPartitionLock(bufHash);
3368 
3369  /* Check that it is in the buffer pool. If not, do nothing. */
3370  LWLockAcquire(bufPartitionLock, LW_SHARED);
3371  buf_id = BufTableLookup(&bufTag, bufHash);
3372  LWLockRelease(bufPartitionLock);
3373 
3374  if (buf_id < 0)
3375  continue;
3376 
3377  bufHdr = GetBufferDescriptor(buf_id);
3378 
3379  /*
3380  * We need to lock the buffer header and recheck if the buffer is
3381  * still associated with the same block because the buffer could be
3382  * evicted by some other backend loading blocks for a different
3383  * relation after we release lock on the BufMapping table.
3384  */
3385  buf_state = LockBufHdr(bufHdr);
3386 
3387  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
3388  bufHdr->tag.forkNum == forkNum &&
3389  bufHdr->tag.blockNum >= firstDelBlock)
3390  InvalidateBuffer(bufHdr); /* releases spinlock */
3391  else
3392  UnlockBufHdr(bufHdr, buf_state);
3393  }
3394 }
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
ForkNumber forkNum
Definition: buf_internals.h:94
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1467
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
uint32 BlockNumber
Definition: block.h:31
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln 
)
static

Definition at line 2808 of file bufmgr.c.

References ErrorContextCallback::arg, BufferUsage::blk_write_time, buftag::blockNum, BM_JUST_DIRTIED, BM_PERMANENT, BufferGetLSN, BufHdrGetBlock, ErrorContextCallback::callback, RelFileNode::dbNode, error_context_stack, buftag::forkNum, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBackendId, LockBufHdr(), RelFileNodeBackend::node, PageSetChecksumCopy(), pgBufferUsage, pgstat_count_buffer_write_time, ErrorContextCallback::previous, RelFileNode::relNode, buftag::rnode, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rnode, smgropen(), smgrwrite(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr, and XLogFlush().

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

2809 {
2810  XLogRecPtr recptr;
2811  ErrorContextCallback errcallback;
2812  instr_time io_start,
2813  io_time;
2814  Block bufBlock;
2815  char *bufToWrite;
2816  uint32 buf_state;
2817 
2818  /*
2819  * Try to start an I/O operation. If StartBufferIO returns false, then
2820  * someone else flushed the buffer before we could, so we need not do
2821  * anything.
2822  */
2823  if (!StartBufferIO(buf, false))
2824  return;
2825 
2826  /* Setup error traceback support for ereport() */
2828  errcallback.arg = (void *) buf;
2829  errcallback.previous = error_context_stack;
2830  error_context_stack = &errcallback;
2831 
2832  /* Find smgr relation for buffer */
2833  if (reln == NULL)
2834  reln = smgropen(buf->tag.rnode, InvalidBackendId);
2835 
2836  TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
2837  buf->tag.blockNum,
2838  reln->smgr_rnode.node.spcNode,
2839  reln->smgr_rnode.node.dbNode,
2840  reln->smgr_rnode.node.relNode);
2841 
2842  buf_state = LockBufHdr(buf);
2843 
2844  /*
2845  * Run PageGetLSN while holding header lock, since we don't have the
2846  * buffer locked exclusively in all cases.
2847  */
2848  recptr = BufferGetLSN(buf);
2849 
2850  /* To check if block content changes while flushing. - vadim 01/17/97 */
2851  buf_state &= ~BM_JUST_DIRTIED;
2852  UnlockBufHdr(buf, buf_state);
2853 
2854  /*
2855  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
2856  * rule that log updates must hit disk before any of the data-file changes
2857  * they describe do.
2858  *
2859  * However, this rule does not apply to unlogged relations, which will be
2860  * lost after a crash anyway. Most unlogged relation pages do not bear
2861  * LSNs since we never emit WAL records for them, and therefore flushing
2862  * up through the buffer LSN would be useless, but harmless. However,
2863  * GiST indexes use LSNs internally to track page-splits, and therefore
2864  * unlogged GiST pages bear "fake" LSNs generated by
2865  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
2866  * LSN counter could advance past the WAL insertion point; and if it did
2867  * happen, attempting to flush WAL through that location would fail, with
2868  * disastrous system-wide consequences. To make sure that can't happen,
2869  * skip the flush if the buffer isn't permanent.
2870  */
2871  if (buf_state & BM_PERMANENT)
2872  XLogFlush(recptr);
2873 
2874  /*
2875  * Now it's safe to write buffer to disk. Note that no one else should
2876  * have been able to write it while we were busy with log flushing because
2877  * only one process at a time can set the BM_IO_IN_PROGRESS bit.
2878  */
2879  bufBlock = BufHdrGetBlock(buf);
2880 
2881  /*
2882  * Update page checksum if desired. Since we have only shared lock on the
2883  * buffer, other processes might be updating hint bits in it, so we must
2884  * copy the page to private storage if we do checksumming.
2885  */
2886  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
2887 
2888  if (track_io_timing)
2889  INSTR_TIME_SET_CURRENT(io_start);
2890 
2891  /*
2892  * bufToWrite is either the shared buffer or a copy, as appropriate.
2893  */
2894  smgrwrite(reln,
2895  buf->tag.forkNum,
2896  buf->tag.blockNum,
2897  bufToWrite,
2898  false);
2899 
2900  if (track_io_timing)
2901  {
2902  INSTR_TIME_SET_CURRENT(io_time);
2903  INSTR_TIME_SUBTRACT(io_time, io_start);
2906  }
2907 
2909 
2910  /*
2911  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
2912  * end the BM_IO_IN_PROGRESS state.
2913  */
2914  TerminateBufferIO(buf, true, 0);
2915 
2916  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(buf->tag.forkNum,
2917  buf->tag.blockNum,
2918  reln->smgr_rnode.node.spcNode,
2919  reln->smgr_rnode.node.dbNode,
2920  reln->smgr_rnode.node.relNode);
2921 
2922  /* Pop the error context stack */
2923  error_context_stack = errcallback.previous;
2924 }
#define BM_PERMANENT
Definition: buf_internals.h:67
ForkNumber forkNum
Definition: buf_internals.h:94
struct timeval instr_time
Definition: instr_time.h:150
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1503
void(* callback)(void *arg)
Definition: elog.h:247
struct ErrorContextCallback * previous
Definition: elog.h:246
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2887
ErrorContextCallback * error_context_stack
Definition: elog.c:93
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4394
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:158
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4445
#define InvalidBackendId
Definition: backendid.h:23
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
instr_time blk_write_time
Definition: instrument.h:37
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:1089
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
BufferTag tag
#define UnlockBufHdr(desc, s)
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:61
int64 shared_blks_written
Definition: instrument.h:29
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4522
bool track_io_timing
Definition: bufmgr.c:135
Pointer Page
Definition: bufpage.h:78
BufferUsage pgBufferUsage
Definition: instrument.c:20
void * Block
Definition: bufmgr.h:24

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 3705 of file bufmgr.c.

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by dbase_redo().

3706 {
3707  int i;
3708  BufferDesc *bufHdr;
3709 
3710  /* Make sure we can handle the pin inside the loop */
3712 
3713  for (i = 0; i < NBuffers; i++)
3714  {
3715  uint32 buf_state;
3716 
3717  bufHdr = GetBufferDescriptor(i);
3718 
3719  /*
3720  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3721  * and saves some cycles.
3722  */
3723  if (bufHdr->tag.rnode.dbNode != dbid)
3724  continue;
3725 
3727 
3728  buf_state = LockBufHdr(bufHdr);
3729  if (bufHdr->tag.rnode.dbNode == dbid &&
3730  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3731  {
3732  PinBuffer_Locked(bufHdr);
3734  FlushBuffer(bufHdr, NULL);
3736  UnpinBuffer(bufHdr, true);
3737  }
3738  else
3739  UnlockBufHdr(bufHdr, buf_state);
3740  }
3741 }
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2808
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1834
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define BM_VALID
Definition: buf_internals.h:60
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1789
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 3748 of file bufmgr.c.

References Assert, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor, and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

3749 {
3750  BufferDesc *bufHdr;
3751 
3752  /* currently not needed, but no fundamental reason not to support */
3753  Assert(!BufferIsLocal(buffer));
3754 
3755  Assert(BufferIsPinned(buffer));
3756 
3757  bufHdr = GetBufferDescriptor(buffer - 1);
3758 
3760 
3761  FlushBuffer(bufHdr, NULL);
3762 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2808
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 3512 of file bufmgr.c.

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock, ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, i, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_node, RelationGetSmgr(), RelationUsesLocalBuffers, RelFileNodeEquals, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, smgrwrite(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by heapam_relation_copy_data(), and index_copy_data().

3513 {
3514  int i;
3515  BufferDesc *bufHdr;
3516 
3517  if (RelationUsesLocalBuffers(rel))
3518  {
3519  for (i = 0; i < NLocBuffer; i++)
3520  {
3521  uint32 buf_state;
3522 
3523  bufHdr = GetLocalBufferDescriptor(i);
3524  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3525  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
3526  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3527  {
3528  ErrorContextCallback errcallback;
3529  Page localpage;
3530 
3531  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
3532 
3533  /* Setup error traceback support for ereport() */
3535  errcallback.arg = (void *) bufHdr;
3536  errcallback.previous = error_context_stack;
3537  error_context_stack = &errcallback;
3538 
3539  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
3540 
3542  bufHdr->tag.forkNum,
3543  bufHdr->tag.blockNum,
3544  localpage,
3545  false);
3546 
3547  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
3548  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
3549 
3550  /* Pop the error context stack */
3551  error_context_stack = errcallback.previous;
3552  }
3553  }
3554 
3555  return;
3556  }
3557 
3558  /* Make sure we can handle the pin inside the loop */
3560 
3561  for (i = 0; i < NBuffers; i++)
3562  {
3563  uint32 buf_state;
3564 
3565  bufHdr = GetBufferDescriptor(i);
3566 
3567  /*
3568  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3569  * and saves some cycles.
3570  */
3571  if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
3572  continue;
3573 
3575 
3576  buf_state = LockBufHdr(bufHdr);
3577  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3578  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3579  {
3580  PinBuffer_Locked(bufHdr);
3582  FlushBuffer(bufHdr, RelationGetSmgr(rel));
3584  UnpinBuffer(bufHdr, true);
3585  }
3586  else
3587  UnlockBufHdr(bufHdr, buf_state);
3588  }
3589 }
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:64
ForkNumber forkNum
Definition: buf_internals.h:94
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4541
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define GetLocalBufferDescriptor(id)
#define BM_DIRTY
Definition: buf_internals.h:59
void(* callback)(void *arg)
Definition: elog.h:247
struct ErrorContextCallback * previous
Definition: elog.h:246
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2808
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
ErrorContextCallback * error_context_stack
Definition: elog.c:93
int NLocBuffer
Definition: localbuf.c:41
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1834
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define BM_VALID
Definition: buf_internals.h:60
RelFileNode rd_node
Definition: rel.h:56
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:544
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1789
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1532
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:610
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:277
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
pg_atomic_uint32 state
Pointer Page
Definition: bufpage.h:78
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 3601 of file bufmgr.c.

References Assert, BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, RelFileNodeBackend::node, palloc(), pfree(), pg_qsort(), PinBuffer_Locked(), RelFileNodeBackendIsTemp, RelFileNodeEquals, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, SMgrSortArray::rnode, rnode_comparator(), SMgrRelationData::smgr_rnode, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by smgrdosyncall().

3602 {
3603  int i;
3604  SMgrSortArray *srels;
3605  bool use_bsearch;
3606 
3607  if (nrels == 0)
3608  return;
3609 
3610  /* fill-in array for qsort */
3611  srels = palloc(sizeof(SMgrSortArray) * nrels);
3612 
3613  for (i = 0; i < nrels; i++)
3614  {
3615  Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
3616 
3617  srels[i].rnode = smgrs[i]->smgr_rnode.node;
3618  srels[i].srel = smgrs[i];
3619  }
3620 
3621  /*
3622  * Save the bsearch overhead for low number of relations to sync. See
3623  * DropRelFileNodesAllBuffers for details.
3624  */
3625  use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
3626 
3627  /* sort the list of SMgrRelations if necessary */
3628  if (use_bsearch)
3629  pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
3630 
3631  /* Make sure we can handle the pin inside the loop */
3633 
3634  for (i = 0; i < NBuffers; i++)
3635  {
3636  SMgrSortArray *srelent = NULL;
3637  BufferDesc *bufHdr = GetBufferDescriptor(i);
3638  uint32 buf_state;
3639 
3640  /*
3641  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3642  * and saves some cycles.
3643  */
3644 
3645  if (!use_bsearch)
3646  {
3647  int j;
3648 
3649  for (j = 0; j < nrels; j++)
3650  {
3651  if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
3652  {
3653  srelent = &srels[j];
3654  break;
3655  }
3656  }
3657 
3658  }
3659  else
3660  {
3661  srelent = bsearch((const void *) &(bufHdr->tag.rnode),
3662  srels, nrels, sizeof(SMgrSortArray),
3664  }
3665 
3666  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3667  if (srelent == NULL)
3668  continue;
3669 
3671 
3672  buf_state = LockBufHdr(bufHdr);
3673  if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
3674  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3675  {
3676  PinBuffer_Locked(bufHdr);
3678  FlushBuffer(bufHdr, srelent->srel);
3680  UnpinBuffer(bufHdr, true);
3681  }
3682  else
3683  UnlockBufHdr(bufHdr, buf_state);
3684  }
3685 
3686  pfree(srels);
3687 }
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:71
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2808
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
void pfree(void *pointer)
Definition: mcxt.c:1169
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
SMgrRelation srel
Definition: bufmgr.c:128
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1834
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4560
#define BM_VALID
Definition: buf_internals.h:60
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define Assert(condition)
Definition: c.h:804
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1789
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
RelFileNode rnode
Definition: bufmgr.c:127
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 410 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, and REFCOUNT_ARRAY_ENTRIES.

Referenced by UnpinBuffer().

411 {
412  Assert(ref->refcount == 0);
413 
414  if (ref >= &PrivateRefCountArray[0] &&
416  {
417  ref->buffer = InvalidBuffer;
418 
419  /*
420  * Mark the just used entry as reserved - in many scenarios that
421  * allows us to avoid ever having to search the array/hash for free
422  * entries.
423  */
424  ReservedRefCountEntry = ref;
425  }
426  else
427  {
428  bool found;
429  Buffer buffer = ref->buffer;
430 
432  (void *) &buffer,
433  HASH_REMOVE,
434  &found);
435  Assert(found);
438  }
439 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int Buffer
Definition: buf.h:23

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 387 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsValid, GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), PrintBufferLeakWarning(), and ReadRecentBuffer().

388 {
390 
391  Assert(BufferIsValid(buffer));
392  Assert(!BufferIsLocal(buffer));
393 
394  /*
395  * Not moving the entry - that's ok for the current users, but we might
396  * want to change this one day.
397  */
398  ref = GetPrivateRefCountEntry(buffer, false);
399 
400  if (ref == NULL)
401  return 0;
402  return ref->refcount;
403 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 307 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid, free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBuffer().

308 {
310  int i;
311 
312  Assert(BufferIsValid(buffer));
313  Assert(!BufferIsLocal(buffer));
314 
315  /*
316  * First search for references in the array, that'll be sufficient in the
317  * majority of cases.
318  */
319  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
320  {
321  res = &PrivateRefCountArray[i];
322 
323  if (res->buffer == buffer)
324  return res;
325  }
326 
327  /*
328  * By here we know that the buffer, if already pinned, isn't residing in
329  * the array.
330  *
331  * Only look up the buffer in the hashtable if we've previously overflowed
332  * into it.
333  */
334  if (PrivateRefCountOverflowed == 0)
335  return NULL;
336 
338  (void *) &buffer,
339  HASH_FIND,
340  NULL);
341 
342  if (res == NULL)
343  return NULL;
344  else if (!do_move)
345  {
346  /* caller doesn't want us to move the hash entry into the array */
347  return res;
348  }
349  else
350  {
351  /* move buffer from hashtable into the free array slot */
352  bool found;
354 
355  /* Ensure there's a free array slot */
357 
358  /* Use up the reserved slot */
359  Assert(ReservedRefCountEntry != NULL);
360  free = ReservedRefCountEntry;
361  ReservedRefCountEntry = NULL;
362  Assert(free->buffer == InvalidBuffer);
363 
364  /* and fill it */
365  free->buffer = buffer;
366  free->refcount = res->refcount;
367 
368  /* delete from hashtable */
370  (void *) &buffer,
371  HASH_REMOVE,
372  &found);
373  Assert(found);
376 
377  return free;
378  }
379 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define free(a)
Definition: header.h:65
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int i
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 4215 of file bufmgr.c.

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and RecoveryConflictInterrupt().

4216 {
4217  int bufid = GetStartupBufferPinWaitBufId();
4218 
4219  /*
4220  * If we get woken slowly then it's possible that the Startup process was
4221  * already woken by other backends before we got here. Also possible that
4222  * we get here by multiple interrupts or interrupts at inappropriate
4223  * times, so make sure we do nothing if the bufid is not set.
4224  */
4225  if (bufid < 0)
4226  return false;
4227 
4228  if (GetPrivateRefCount(bufid + 1) > 0)
4229  return true;
4230 
4231  return false;
4232 }
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:662

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 3806 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlargeBuffers(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

3807 {
3808  Assert(BufferIsPinned(buffer));
3810  if (BufferIsLocal(buffer))
3811  LocalRefCount[-buffer - 1]++;
3812  else
3813  {
3814  PrivateRefCountEntry *ref;
3815 
3816  ref = GetPrivateRefCountEntry(buffer, true);
3817  Assert(ref != NULL);
3818  ref->refcount++;
3819  }
3821 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
int32 * LocalRefCount
Definition: localbuf.c:45

◆ InitBufferPoolAccess()

void InitBufferPoolAccess ( void  )

Definition at line 2596 of file bufmgr.c.

References Assert, AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MyProc, on_shmem_exit(), and PrivateRefCountArray.

Referenced by BaseInit().

2597 {
2598  HASHCTL hash_ctl;
2599 
2600  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
2601 
2602  hash_ctl.keysize = sizeof(int32);
2603  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
2604 
2605  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
2606  HASH_ELEM | HASH_BLOBS);
2607 
2608  /*
2609  * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
2610  * the corresponding phase of backend shutdown.
2611  */
2612  Assert(MyProc != NULL);
2614 }
struct PrivateRefCountEntry PrivateRefCountEntry
#define HASH_ELEM
Definition: hsearch.h:95
PGPROC * MyProc
Definition: proc.c:68
Size entrysize
Definition: hsearch.h:76
signed int int32
Definition: c.h:429
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:349
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
#define HASH_BLOBS
Definition: hsearch.h:97
Size keysize
Definition: hsearch.h:75
#define Assert(condition)
Definition: c.h:804
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:2621

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1467 of file bufmgr.c.

References Assert, BM_LOCKED, BM_TAG_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer, BUFFERTAGS_EQUAL, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), CLEAR_BUFFERTAG, elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr, and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), and FindAndDropRelFileNodeBuffers().

1468 {
1469  BufferTag oldTag;
1470  uint32 oldHash; /* hash value for oldTag */
1471  LWLock *oldPartitionLock; /* buffer partition lock for it */
1472  uint32 oldFlags;
1473  uint32 buf_state;
1474 
1475  /* Save the original buffer tag before dropping the spinlock */
1476  oldTag = buf->tag;
1477 
1478  buf_state = pg_atomic_read_u32(&buf->state);
1479  Assert(buf_state & BM_LOCKED);
1480  UnlockBufHdr(buf, buf_state);
1481 
1482  /*
1483  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1484  * worth storing the hashcode in BufferDesc so we need not recompute it
1485  * here? Probably not.
1486  */
1487  oldHash = BufTableHashCode(&oldTag);
1488  oldPartitionLock = BufMappingPartitionLock(oldHash);
1489 
1490 retry:
1491 
1492  /*
1493  * Acquire exclusive mapping lock in preparation for changing the buffer's
1494  * association.
1495  */
1496  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1497 
1498  /* Re-lock the buffer header */
1499  buf_state = LockBufHdr(buf);
1500 
1501  /* If it's changed while we were waiting for lock, do nothing */
1502  if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
1503  {
1504  UnlockBufHdr(buf, buf_state);
1505  LWLockRelease(oldPartitionLock);
1506  return;
1507  }
1508 
1509  /*
1510  * We assume the only reason for it to be pinned is that someone else is
1511  * flushing the page out. Wait for them to finish. (This could be an
1512  * infinite loop if the refcount is messed up... it would be nice to time
1513  * out after awhile, but there seems no way to be sure how many loops may
1514  * be needed. Note that if the other guy has pinned the buffer but not
1515  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1516  * be busy-looping here.)
1517  */
1518  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1519  {
1520  UnlockBufHdr(buf, buf_state);
1521  LWLockRelease(oldPartitionLock);
1522  /* safety check: should definitely not be our *own* pin */
1524  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1525  WaitIO(buf);
1526  goto retry;
1527  }
1528 
1529  /*
1530  * Clear out the buffer's tag and flags. We must do this to ensure that
1531  * linear scans of the buffer array don't think the buffer is valid.
1532  */
1533  oldFlags = buf_state & BUF_FLAG_MASK;
1534  CLEAR_BUFFERTAG(buf->tag);
1535  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1536  UnlockBufHdr(buf, buf_state);
1537 
1538  /*
1539  * Remove the buffer from the lookup hashtable, if it was in there.
1540  */
1541  if (oldFlags & BM_TAG_VALID)
1542  BufTableDelete(&oldTag, oldHash);
1543 
1544  /*
1545  * Done with mapping lock.
1546  */
1547  LWLockRelease(oldPartitionLock);
1548 
1549  /*
1550  * Insert the buffer at the head of the list of free buffers.
1551  */
1552  StrategyFreeBuffer(buf);
1553 }
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:4351
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:364
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define ERROR
Definition: elog.h:46
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BUFFERTAGS_EQUAL(a, b)
#define BM_LOCKED
Definition: buf_internals.h:58
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define Assert(condition)
Definition: c.h:804
#define CLEAR_BUFFERTAG(a)
Definition: buf_internals.h:98
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
#define BufferDescriptorGetBuffer(bdesc)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
BufferTag tag
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 4297 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr.

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), hash_xlog_split_allocate_page(), and hashbucketcleanup().

4298 {
4299  BufferDesc *bufHdr;
4300  uint32 buf_state;
4301 
4302  Assert(BufferIsValid(buffer));
4303 
4304  if (BufferIsLocal(buffer))
4305  {
4306  /* There should be exactly one pin */
4307  if (LocalRefCount[-buffer - 1] != 1)
4308  return false;
4309  /* Nobody else to wait for */
4310  return true;
4311  }
4312 
4313  /* There should be exactly one local pin */
4314  if (GetPrivateRefCount(buffer) != 1)
4315  return false;
4316 
4317  bufHdr = GetBufferDescriptor(buffer - 1);
4318 
4319  /* caller must hold exclusive lock on buffer */
4321  LW_EXCLUSIVE));
4322 
4323  buf_state = LockBufHdr(bufHdr);
4324 
4325  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4326  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4327  {
4328  /* pincount is OK. */
4329  UnlockBufHdr(bufHdr, buf_state);
4330  return true;
4331  }
4332 
4333  UnlockBufHdr(bufHdr, buf_state);
4334  return false;
4335 }
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1937
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext context)

Definition at line 4775 of file bufmgr.c.

References buftag::blockNum, cur, buftag::forkNum, i, InvalidBackendId, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, RelFileNodeEquals, buftag::rnode, smgropen(), smgrwriteback(), and PendingWriteback::tag.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

4776 {
4777  int i;
4778 
4779  if (context->nr_pending == 0)
4780  return;
4781 
4782  /*
4783  * Executing the writes in-order can make them a lot faster, and allows to
4784  * merge writeback requests to consecutive blocks into larger writebacks.
4785  */
4786  sort_pending_writebacks(context->pending_writebacks, context->nr_pending);
4787 
4788  /*
4789  * Coalesce neighbouring writes, but nothing else. For that we iterate
4790  * through the, now sorted, array of pending flushes, and look forward to
4791  * find all neighbouring (or identical) writes.
4792  */
4793  for (i = 0; i < context->nr_pending; i++)
4794  {
4797  SMgrRelation reln;
4798  int ahead;
4799  BufferTag tag;
4800  Size nblocks = 1;
4801 
4802  cur = &context->pending_writebacks[i];
4803  tag = cur->tag;
4804 
4805  /*
4806  * Peek ahead, into following writeback requests, to see if they can
4807  * be combined with the current one.
4808  */
4809  for (ahead = 0; i + ahead + 1 < context->nr_pending; ahead++)
4810  {
4811  next = &context->pending_writebacks[i + ahead + 1];
4812 
4813  /* different file, stop */
4814  if (!RelFileNodeEquals(cur->tag.rnode, next->tag.rnode) ||
4815  cur->tag.forkNum != next->tag.forkNum)
4816  break;
4817 
4818  /* ok, block queued twice, skip */
4819  if (cur->tag.blockNum == next->tag.blockNum)
4820  continue;
4821 
4822  /* only merge consecutive writes */
4823  if (cur->tag.blockNum + 1 != next->tag.blockNum)
4824  break;
4825 
4826  nblocks++;
4827  cur = next;
4828  }
4829 
4830  i += ahead;
4831 
4832  /* and finally tell the kernel to write the data to storage */
4833  reln = smgropen(tag.rnode, InvalidBackendId);
4834  smgrwriteback(reln, tag.forkNum, tag.blockNum, nblocks);
4835  }
4836 
4837  context->nr_pending = 0;
4838 }
static int32 next
Definition: blutils.c:219
ForkNumber forkNum
Definition: buf_internals.h:94
struct cursor * cur
Definition: ecpg.c:28
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:536
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
#define InvalidBackendId
Definition: backendid.h:23
size_t Size
Definition: c.h:540
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
int i
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 4541 of file bufmgr.c.

References buftag::blockNum, errcontext, buftag::forkNum, MyBackendId, pfree(), relpathbackend, buftag::rnode, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

4542 {
4543  BufferDesc *bufHdr = (BufferDesc *) arg;
4544 
4545  if (bufHdr != NULL)
4546  {
4547  char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId,
4548  bufHdr->tag.forkNum);
4549 
4550  errcontext("writing block %u of relation %s",
4551  bufHdr->tag.blockNum, path);
4552  pfree(path);
4553  }
4554 }
BackendId MyBackendId
Definition: globals.c:84
ForkNumber forkNum
Definition: buf_internals.h:94
void pfree(void *pointer)
Definition: mcxt.c:1169
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define errcontext
Definition: elog.h:204
void * arg
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:78

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 4007 of file bufmgr.c.

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), and LWLockRelease().

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_getnewbuf(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), brinbuild(), brinbuildempty(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), checkXLogConsistency(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), fill_seq_with_data(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbuildempty(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgetpage(), heapgettup(), initBloomState(), lazy_scan_heap(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

4008 {
4009  BufferDesc *buf;
4010 
4011  Assert(BufferIsPinned(buffer));
4012  if (BufferIsLocal(buffer))
4013  return; /* local buffers need no lock */
4014 
4015  buf = GetBufferDescriptor(buffer - 1);
4016 
4017  if (mode == BUFFER_LOCK_UNLOCK)
4019  else if (mode == BUFFER_LOCK_SHARE)
4021  else if (mode == BUFFER_LOCK_EXCLUSIVE)
4023  else
4024  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
4025 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
static PgChecksumMode mode
Definition: pg_checksums.c:65
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
#define ERROR
Definition: elog.h:46
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
#define elog(elevel,...)
Definition: elog.h:232
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 4064 of file bufmgr.c.

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, DeadlockTimeout, elog, ERROR, get_ps_display(), GetBufferDescriptor, GetCurrentTimestamp(), GetPrivateRefCount(), InHotStandby, LocalRefCount, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcPid, now(), palloc(), pfree(), PG_WAIT_BUFFER_PIN, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr, update_process_title, and BufferDesc::wait_backend_pid.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), ReadBuffer_common(), and XLogReadBufferForRedoExtended().

4065 {
4066  BufferDesc *bufHdr;
4067  char *new_status = NULL;
4068  TimestampTz waitStart = 0;
4069  bool logged_recovery_conflict = false;
4070 
4071  Assert(BufferIsPinned(buffer));
4072  Assert(PinCountWaitBuf == NULL);
4073 
4074  if (BufferIsLocal(buffer))
4075  {
4076  /* There should be exactly one pin */
4077  if (LocalRefCount[-buffer - 1] != 1)
4078  elog(ERROR, "incorrect local pin count: %d",
4079  LocalRefCount[-buffer - 1]);
4080  /* Nobody else to wait for */
4081  return;
4082  }
4083 
4084  /* There should be exactly one local pin */
4085  if (GetPrivateRefCount(buffer) != 1)
4086  elog(ERROR, "incorrect local pin count: %d",
4087  GetPrivateRefCount(buffer));
4088 
4089  bufHdr = GetBufferDescriptor(buffer - 1);
4090 
4091  for (;;)
4092  {
4093  uint32 buf_state;
4094 
4095  /* Try to acquire lock */
4097  buf_state = LockBufHdr(bufHdr);
4098 
4099  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4100  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4101  {
4102  /* Successfully acquired exclusive lock with pincount 1 */
4103  UnlockBufHdr(bufHdr, buf_state);
4104 
4105  /*
4106  * Emit the log message if recovery conflict on buffer pin was
4107  * resolved but the startup process waited longer than
4108  * deadlock_timeout for it.
4109  */
4110  if (logged_recovery_conflict)
4112  waitStart, GetCurrentTimestamp(),
4113  NULL, false);
4114 
4115  /* Report change to non-waiting status */
4116  if (new_status)
4117  {
4118  set_ps_display(new_status);
4119  pfree(new_status);
4120  }
4121  return;
4122  }
4123  /* Failed, so mark myself as waiting for pincount 1 */
4124  if (buf_state & BM_PIN_COUNT_WAITER)
4125  {
4126  UnlockBufHdr(bufHdr, buf_state);
4127  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4128  elog(ERROR, "multiple backends attempting to wait for pincount 1");
4129  }
4130  bufHdr->wait_backend_pid = MyProcPid;
4131  PinCountWaitBuf = bufHdr;
4132  buf_state |= BM_PIN_COUNT_WAITER;
4133  UnlockBufHdr(bufHdr, buf_state);
4134  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4135 
4136  /* Wait to be signaled by UnpinBuffer() */
4137  if (InHotStandby)
4138  {
4139  /* Report change to waiting status */
4140  if (update_process_title && new_status == NULL)
4141  {
4142  const char *old_status;
4143  int len;
4144 
4145  old_status = get_ps_display(&len);
4146  new_status = (char *) palloc(len + 8 + 1);
4147  memcpy(new_status, old_status, len);
4148  strcpy(new_status + len, " waiting");
4149  set_ps_display(new_status);
4150  new_status[len] = '\0'; /* truncate off " waiting" */
4151  }
4152 
4153  /*
4154  * Emit the log message if the startup process is waiting longer
4155  * than deadlock_timeout for recovery conflict on buffer pin.
4156  *
4157  * Skip this if first time through because the startup process has
4158  * not started waiting yet in this case. So, the wait start
4159  * timestamp is set after this logic.
4160  */
4161  if (waitStart != 0 && !logged_recovery_conflict)
4162  {
4164 
4165  if (TimestampDifferenceExceeds(waitStart, now,
4166  DeadlockTimeout))
4167  {
4169  waitStart, now, NULL, true);
4170  logged_recovery_conflict = true;
4171  }
4172  }
4173 
4174  /*
4175  * Set the wait start timestamp if logging is enabled and first
4176  * time through.
4177  */
4178  if (log_recovery_conflict_waits && waitStart == 0)
4179  waitStart = GetCurrentTimestamp();
4180 
4181  /* Publish the bufid that Startup process waits on */
4182  SetStartupBufferPinWaitBufId(buffer - 1);
4183  /* Set alarm and then wait to be signaled by UnpinBuffer() */
4185  /* Reset the published bufid */
4187  }
4188  else
4190 
4191  /*
4192  * Remove flag marking us as waiter. Normally this will not be set
4193  * anymore, but ProcWaitForSignal() can return for other signals as
4194  * well. We take care to only reset the flag if we're the waiter, as
4195  * theoretically another backend could have started waiting. That's
4196  * impossible with the current usages due to table level locking, but
4197  * better be safe.
4198  */
4199  buf_state = LockBufHdr(bufHdr);
4200  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
4201  bufHdr->wait_backend_pid == MyProcPid)
4202  buf_state &= ~BM_PIN_COUNT_WAITER;
4203  UnlockBufHdr(bufHdr, buf_state);
4204 
4205  PinCountWaitBuf = NULL;
4206  /* Loop back and try again */
4207  }
4208 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
int MyProcPid
Definition: globals.c:43
int wait_backend_pid
bool update_process_title
Definition: ps_status.c:36
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
int64 TimestampTz
Definition: timestamp.h:39
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:249
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1711
void set_ps_display(const char *activity)
Definition: ps_status.c:349
void pfree(void *pointer)
Definition: mcxt.c:1169
#define ERROR
Definition: elog.h:46
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:753
const char * get_ps_display(int *displen)
Definition: ps_status.c:430
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:650
#define InHotStandby
Definition: xlogutils.h:57
#define GetBufferDescriptor(id)
#define PG_WAIT_BUFFER_PIN
Definition: wait_event.h:20
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
bool log_recovery_conflict_waits
Definition: standby.c:42
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1897
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4007
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:165
int DeadlockTimeout
Definition: proc.c:60
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1544
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:65

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 4587 of file bufmgr.c.

References BM_LOCKED, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), FindAndDropRelFileNodeBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadBuffer_common(), ReadRecentBuffer(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBuffer(), and WaitIO().

4588 {
4589  SpinDelayStatus delayStatus;
4590  uint32 old_buf_state;
4591 
4592  init_local_spin_delay(&delayStatus);
4593 
4594  while (true)
4595  {
4596  /* set BM_LOCKED flag */
4597  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
4598  /* if it wasn't set before we're OK */
4599  if (!(old_buf_state & BM_LOCKED))
4600  break;
4601  perform_spin_delay(&delayStatus);
4602  }
4603  finish_spin_delay(&delayStatus);
4604  return old_buf_state | BM_LOCKED;
4605 }
#define init_local_spin_delay(status)
Definition: s_lock.h:1067
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:174
unsigned int uint32
Definition: c.h:441
#define BM_LOCKED
Definition: buf_internals.h:58
pg_atomic_uint32 state
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:372
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:124

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 1565 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, BufferIsValid, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newroot(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune(), heap_xlog_update(), heap_xlog_vacuum(), heap_xlog_visible(), lazy_scan_heap(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

1566 {
1567  BufferDesc *bufHdr;
1568  uint32 buf_state;
1569  uint32 old_buf_state;
1570 
1571  if (!BufferIsValid(buffer))
1572  elog(ERROR, "bad buffer ID: %d", buffer);
1573 
1574  if (BufferIsLocal(buffer))
1575  {
1576  MarkLocalBufferDirty(buffer);
1577  return;
1578  }
1579 
1580  bufHdr = GetBufferDescriptor(buffer - 1);
1581 
1582  Assert(BufferIsPinned(buffer));
1584  LW_EXCLUSIVE));
1585 
1586  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
1587  for (;;)
1588  {
1589  if (old_buf_state & BM_LOCKED)
1590  old_buf_state = WaitBufHdrUnlocked(bufHdr);
1591 
1592  buf_state = old_buf_state;
1593 
1594  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1595  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
1596 
1597  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
1598  buf_state))
1599  break;
1600  }
1601 
1602  /*
1603  * If the buffer was not dirty already, do vacuum accounting.
1604  */
1605  if (!(old_buf_state & BM_DIRTY))
1606  {
1607  VacuumPageDirty++;
1609  if (VacuumCostActive)
1611  }
1612 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1937
int VacuumCostBalance
Definition: globals.c:151
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
int64 VacuumPageDirty
Definition: globals.c:149
int64 shared_blks_dirtied
Definition: instrument.h:28
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:143
#define ERROR
Definition: elog.h:46
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
#define BM_LOCKED
Definition: buf_internals.h:58
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4615
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:152
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 3838 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferGetPage, BufferIsLocal, BufferIsValid, PGPROC::delayChkpt, elog, ERROR, GetBufferDescriptor, GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN, pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileNodeSkippingWAL(), buftag::rnode, BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

3839 {
3840  BufferDesc *bufHdr;
3841  Page page = BufferGetPage(buffer);
3842 
3843  if (!BufferIsValid(buffer))
3844  elog(ERROR, "bad buffer ID: %d", buffer);
3845 
3846  if (BufferIsLocal(buffer))
3847  {
3848  MarkLocalBufferDirty(buffer);
3849  return;
3850  }
3851 
3852  bufHdr = GetBufferDescriptor(buffer - 1);
3853 
3854  Assert(GetPrivateRefCount(buffer) > 0);
3855  /* here, either share or exclusive lock is OK */
3857 
3858  /*
3859  * This routine might get called many times on the same page, if we are
3860  * making the first scan after commit of an xact that added/deleted many
3861  * tuples. So, be as quick as we can if the buffer is already dirty. We
3862  * do this by not acquiring spinlock if it looks like the status bits are
3863  * already set. Since we make this test unlocked, there's a chance we
3864  * might fail to notice that the flags have just been cleared, and failed
3865  * to reset them, due to memory-ordering issues. But since this function
3866  * is only intended to be used in cases where failing to write out the
3867  * data would be harmless anyway, it doesn't really matter.
3868  */
3869  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
3871  {
3873  bool dirtied = false;
3874  bool delayChkpt = false;
3875  uint32 buf_state;
3876 
3877  /*
3878  * If we need to protect hint bit updates from torn writes, WAL-log a
3879  * full page image of the page. This full page image is only necessary
3880  * if the hint bit update is the first change to the page since the
3881  * last checkpoint.
3882  *
3883  * We don't check full_page_writes here because that logic is included
3884  * when we call XLogInsert() since the value changes dynamically.
3885  */
3886  if (XLogHintBitIsNeeded() &&
3887  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
3888  {
3889  /*
3890  * If we must not write WAL, due to a relfilenode-specific
3891  * condition or being in recovery, don't dirty the page. We can
3892  * set the hint, just not dirty the page as a result so the hint
3893  * is lost when we evict the page or shutdown.
3894  *
3895  * See src/backend/storage/page/README for longer discussion.
3896  */
3897  if (RecoveryInProgress() ||
3898  RelFileNodeSkippingWAL(bufHdr->tag.rnode))
3899  return;
3900 
3901  /*
3902  * If the block is already dirty because we either made a change
3903  * or set a hint already, then we don't need to write a full page
3904  * image. Note that aggressive cleaning of blocks dirtied by hint
3905  * bit setting would increase the call rate. Bulk setting of hint
3906  * bits would reduce the call rate...
3907  *
3908  * We must issue the WAL record before we mark the buffer dirty.
3909  * Otherwise we might write the page before we write the WAL. That
3910  * causes a race condition, since a checkpoint might occur between
3911  * writing the WAL record and marking the buffer dirty. We solve
3912  * that with a kluge, but one that is already in use during
3913  * transaction commit to prevent race conditions. Basically, we
3914  * simply prevent the checkpoint WAL record from being written
3915  * until we have marked the buffer dirty. We don't start the
3916  * checkpoint flush until we have marked dirty, so our checkpoint
3917  * must flush the change to disk successfully or the checkpoint
3918  * never gets written, so crash recovery will fix.
3919  *
3920  * It's possible we may enter here without an xid, so it is
3921  * essential that CreateCheckpoint waits for virtual transactions
3922  * rather than full transactionids.
3923  */
3924  MyProc->delayChkpt = delayChkpt = true;
3925  lsn = XLogSaveBufferForHint(buffer, buffer_std);
3926  }
3927 
3928  buf_state = LockBufHdr(bufHdr);
3929 
3930  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3931 
3932  if (!(buf_state & BM_DIRTY))
3933  {
3934  dirtied = true; /* Means "will be dirtied by this action" */
3935 
3936  /*
3937  * Set the page LSN if we wrote a backup block. We aren't supposed
3938  * to set this when only holding a share lock but as long as we
3939  * serialise it somehow we're OK. We choose to set LSN while
3940  * holding the buffer header lock, which causes any reader of an
3941  * LSN who holds only a share lock to also obtain a buffer header
3942  * lock before using PageGetLSN(), which is enforced in
3943  * BufferGetLSNAtomic().
3944  *
3945  * If checksums are enabled, you might think we should reset the
3946  * checksum here. That will happen when the page is written
3947  * sometime later in this checkpoint cycle.
3948  */
3949  if (!XLogRecPtrIsInvalid(lsn))
3950  PageSetLSN(page, lsn);
3951  }
3952 
3953  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
3954  UnlockBufHdr(bufHdr, buf_state);
3955 
3956  if (delayChkpt)
3957  MyProc->delayChkpt = false;
3958 
3959  if (dirtied)
3960  {
3961  VacuumPageDirty++;
3963  if (VacuumCostActive)
3965  }
3966  }
3967 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define BM_PERMANENT
Definition: buf_internals.h:67
int VacuumCostBalance
Definition: globals.c:151
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
PGPROC * MyProc
Definition: proc.c:68
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:972
int64 VacuumPageDirty
Definition: globals.c:149
int64 shared_blks_dirtied
Definition: instrument.h:28
bool RecoveryInProgress(void)
Definition: xlog.c:8328
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:143
#define ERROR
Definition: elog.h:46
bool delayChkpt
Definition: proc.h:187
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4587
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
RelFileNode rnode
Definition: buf_internals.h:93
bool RelFileNodeSkippingWAL(RelFileNode rnode)
Definition: storage.c:513
BufferTag tag
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define XLogHintBitIsNeeded()
Definition: xlog.h:177
Pointer Page
Definition: bufpage.h:78
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:152
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 281 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, PrivateRefCountEntry::refcount, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

282 {
284 
285  /* only allowed to be called when a reservation has been made */
286  Assert(ReservedRefCountEntry != NULL);
287 
288  /* use up the reserved entry */
289  res = ReservedRefCountEntry;
290  ReservedRefCountEntry = NULL;
291 
292  /* and fill it */
293  res->buffer = buffer;
294  res->refcount = 0;
295 
296  return res;
297 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define Assert(condition)
Definition: c.h:804

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 1686 of file bufmgr.c.

References Assert, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservePrivateRefCountEntry(), ResourceOwnerRememberBuffer(), BufferDesc::state, VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), and ReadRecentBuffer().

1687 {
1689  bool result;
1690  PrivateRefCountEntry *ref;
1691 
1692  ref = GetPrivateRefCountEntry(b, true);
1693 
1694  if (ref == NULL)
1695  {
1696  uint32 buf_state;
1697  uint32 old_buf_state;
1698 
1700  ref = NewPrivateRefCountEntry(b);
1701 
1702  old_buf_state = pg_atomic_read_u32(&buf->state);
1703  for (;;)
1704  {
1705  if (old_buf_state & BM_LOCKED)
1706  old_buf_state = WaitBufHdrUnlocked(buf);
1707 
1708  buf_state = old_buf_state;
1709 
1710  /* increase refcount */
1711  buf_state += BUF_REFCOUNT_ONE;
1712 
1713  if (strategy == NULL)
1714  {
1715  /* Default case: increase usagecount unless already max. */
1717  buf_state += BUF_USAGECOUNT_ONE;
1718  }
1719  else
1720  {
1721  /*
1722  * Ring buffers shouldn't evict others from pool. Thus we
1723  * don't make usagecount more than 1.
1724  */
1725  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
1726  buf_state += BUF_USAGECOUNT_ONE;
1727  }
1728 
1729  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
1730  buf_state))
1731  {
1732  result = (buf_state & BM_VALID) != 0;
1733 
1734  /*
1735  * Assume that we acquired a buffer pin for the purposes of
1736  * Valgrind buffer client checks (even in !result case) to
1737  * keep things simple. Buffers that are unsafe to access are
1738  * not generally guaranteed to be marked undefined or
1739  * non-accessible in any case.
1740  */
1742  break;
1743  }
1744  }
1745  }
1746  else
1747  {
1748  /*
1749  * If we previously pinned the buffer, it must surely be valid.
1750  *
1751  * Note: We deliberately avoid a Valgrind client request here.
1752  * Individual access methods can optionally superimpose buffer page
1753  * client requests on top of our client requests to enforce that
1754  * buffers are only accessed while locked (and pinned). It's possible
1755  * that the buffer page is legitimately non-accessible here. We
1756  * cannot meddle with that.
1757  */
1758  result = true;
1759  }
1760 
1761  ref->refcount++;
1762  Assert(ref->refcount > 0);
1764  return result;
1765 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:281
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
unsigned int uint32
Definition: c.h:441
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define BM_VALID
Definition: buf_internals.h:60
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
int result
Definition: header.h:19
#define Assert(condition)
Definition: c.h:804
#define BufferDescriptorGetBuffer(bdesc)
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4615
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:77
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
pg_atomic_uint32 state
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:50
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 1789 of file bufmgr.c.

References Assert, BM_LOCKED, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), BufferDesc::state, UnlockBufHdr, and VALGRIND_MAKE_MEM_DEFINED.

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), ReadRecentBuffer(), and SyncOneBuffer().

1790 {
1791  Buffer b;
1792  PrivateRefCountEntry *ref;
1793  uint32 buf_state;
1794 
1795  /*
1796  * As explained, We don't expect any preexisting pins. That allows us to
1797  * manipulate the PrivateRefCount after releasing the spinlock
1798  */
1800 
1801  /*
1802  * Buffer can't have a preexisting pin, so mark its page as defined to
1803  * Valgrind (this is similar to the PinBuffer() case where the backend
1804  * doesn't already have a buffer pin)
1805  */
1807 
1808  /*
1809  * Since we hold the buffer spinlock, we can update the buffer state and
1810  * release the lock in one operation.
1811  */
1812  buf_state = pg_atomic_read_u32(&buf->state);
1813  Assert(buf_state & BM_LOCKED);
1814  buf_state += BUF_REFCOUNT_ONE;
1815  UnlockBufHdr(buf, buf_state);
1816 
1817  b = BufferDescriptorGetBuffer(buf);
1818 
1819  ref = NewPrivateRefCountEntry(b);
1820  ref->refcount++;
1821 
1823 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:281
unsigned int uint32
Definition: c.h:441
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
#define Assert(condition)
Definition: c.h:804
#define BufferDescriptorGetBuffer(bdesc)
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 587 of file bufmgr.c.

References Assert, BlockNumberIsValid, ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by acquire_sample_rows(), BitmapPrefetch(), count_nondeletable_pages(), HeapTupleHeaderAdvanceLatestRemovedXid(), and pg_prewarm().

588 {
589  Assert(RelationIsValid(reln));
590  Assert(BlockNumberIsValid(blockNum));
591 
592  if (RelationUsesLocalBuffers(reln))
593  {
594  /* see comments in ReadBufferExtended */
595  if (RELATION_IS_OTHER_TEMP(reln))
596  ereport(ERROR,
597  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
598  errmsg("cannot access temporary tables of other sessions")));
599 
600  /* pass it off to localbuf.c */
601  return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
602  }
603  else
604  {
605  /* pass it to the shared buffer version */
606  return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
607  }
608 }
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:64
int errcode(int sqlerrcode)
Definition: elog.c:698
#define ERROR
Definition: elog.h:46
#define RelationIsValid(relation)
Definition: rel.h:450
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:500
#define ereport(elevel,...)
Definition: elog.h:157
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define Assert(condition)
Definition: c.h:804
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:631
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:544
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:610
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 500 of file bufmgr.c.

References Assert, BlockNumberIsValid, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), INIT_BUFFERTAG, PrefetchBufferResult::initiated_io, InvalidBuffer, LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeBackend::node, PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rnode, and smgrprefetch().

Referenced by PrefetchBuffer().

503 {
505  BufferTag newTag; /* identity of requested block */
506  uint32 newHash; /* hash value for newTag */
507  LWLock *newPartitionLock; /* buffer partition lock for it */
508  int buf_id;
509 
510  Assert(BlockNumberIsValid(blockNum));
511 
512  /* create a tag so we can lookup the buffer */
513  INIT_BUFFERTAG(newTag, smgr_reln->smgr_rnode.node,
514  forkNum, blockNum);
515 
516  /* determine its hash code and partition lock ID */
517  newHash = BufTableHashCode(&newTag);
518  newPartitionLock = BufMappingPartitionLock(newHash);
519 
520  /* see if the block is in the buffer pool already */
521  LWLockAcquire(newPartitionLock, LW_SHARED);
522  buf_id = BufTableLookup(&newTag, newHash);
523  LWLockRelease(newPartitionLock);
524 
525  /* If not in buffers, initiate prefetch */
526  if (buf_id < 0)
527  {
528 #ifdef USE_PREFETCH
529  /*
530  * Try to initiate an asynchronous read. This returns false in
531  * recovery if the relation file doesn't exist.
532  */
533  if (smgrprefetch(smgr_reln, forkNum, blockNum))
534  result.initiated_io = true;
535 #endif /* USE_PREFETCH */
536  }
537  else
538  {
539  /*
540  * Report the buffer it was in at that time. The caller may be able
541  * to avoid a buffer table lookup, but it's not pinned and it must be
542  * rechecked!
543  */
544  result.recent_buffer = buf_id + 1;
545  }
546 
547  /*
548  * If the block *is* in buffers, we do nothing. This is not really ideal:
549  * the block might be just about to be evicted, which would be stupid
550  * since we know we are going to need it soon. But the only easy answer
551  * is to bump the usage_count, which does not seem like a great solution:
552  * when the caller does ultimately touch the block, usage_count would get
553  * bumped again, resulting in too much favoritism for blocks that are
554  * involved in a prefetch sequence. A real fix would involve some
555  * additional per-buffer state, and it's not clear that there's enough of
556  * a problem to justify that.
557  */
558 
559  return result;
560 }
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
#define InvalidBuffer
Definition: buf.h:25
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
Buffer recent_buffer
Definition: bufmgr.h:54
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
unsigned int uint32
Definition: c.h:441
int result
Definition: header.h:19
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:804
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:487

◆ PrintBufferLeakWarning()

void PrintBufferLeakWarning ( Buffer  buffer)

Definition at line 2681 of file bufmgr.c.

References Assert, buftag::blockNum, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BufferIsLocal, BufferIsValid, elog, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, GetPrivateRefCount(), InvalidBackendId, LocalRefCount, MyBackendId, pfree(), pg_atomic_read_u32(), relpathbackend, buftag::rnode, BufferDesc::state, BufferDesc::tag, and WARNING.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResourceOwnerReleaseInternal().

2682 {
2683  BufferDesc *buf;
2684  int32 loccount;
2685  char *path;
2686  BackendId backend;
2687  uint32 buf_state;
2688 
2689  Assert(BufferIsValid(buffer));
2690  if (BufferIsLocal(buffer))
2691  {
2692  buf = GetLocalBufferDescriptor(-buffer - 1);
2693  loccount = LocalRefCount[-buffer - 1];
2694  backend = MyBackendId;
2695  }
2696  else
2697  {
2698  buf = GetBufferDescriptor(buffer - 1);
2699  loccount = GetPrivateRefCount(buffer);
2700  backend = InvalidBackendId;
2701  }
2702 
2703  /* theoretically we should lock the bufhdr here */
2704  path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
2705  buf_state = pg_atomic_read_u32(&buf->state);
2706  elog(WARNING,
2707  "buffer refcount leak: [%03d] "
2708  "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
2709  buffer, path,
2710  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
2711  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
2712  pfree(path);
2713 }
BackendId MyBackendId
Definition: globals.c:84
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
signed int int32
Definition: c.h:429
void pfree(void *pointer)
Definition: mcxt.c:1169
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define WARNING
Definition: elog.h:40
#define InvalidBackendId
Definition: backendid.h:23
int BackendId
Definition: backendid.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:78
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 694 of file bufmgr.c.

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinbuild(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), fill_seq_with_data(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

695 {
696  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
697 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:741

◆ ReadBuffer_common()

static Buffer ReadBuffer_common ( SMgrRelation  reln,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool hit 
)
static

Definition at line 801 of file bufmgr.c.

References Assert, RelFileNodeBackend::backend, BufferUsage::blk_read_time, BM_VALID, BufferAlloc(), BufferDescriptorGetBuffer, BufferDescriptorGetContentLock, BufHdrGetBlock, CurrentResourceOwner, RelFileNode::dbNode, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), ERROR, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, BufferUsage::local_blks_written, LocalBufferAlloc(), LocalBufHdrGetBlock, LockBufferForCleanup(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), MemSet, RelFileNodeBackend::node, P_NEW, PageIsNew, PageIsVerifiedExtended(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_buffer_read_time, PIV_LOG_WARNING, PIV_REPORT_STAT, RBM_NORMAL, RBM_NORMAL_NO_LOG, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelFileNode::relNode, relpath, ResourceOwnerEnlargeBuffers(), BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, BufferUsage::shared_blks_written, SMgrRelationData::smgr_rnode, smgrextend(), SmgrIsTemp, smgrnblocks(), smgrread(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::state, TerminateBufferIO(), track_io_timing, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, VacuumPageHit, VacuumPageMiss, WARNING, and zero_damaged_pages.

Referenced by ReadBufferExtended(), and ReadBufferWithoutRelcache().

804 {
805  BufferDesc *bufHdr;
806  Block bufBlock;
807  bool found;
808  bool isExtend;
809  bool isLocalBuf = SmgrIsTemp(smgr);
810 
811  *hit = false;
812 
813  /* Make sure we will have room to remember the buffer pin */
815 
816  isExtend = (blockNum == P_NEW);
817 
818  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
819  smgr->smgr_rnode.node.spcNode,
820  smgr->smgr_rnode.node.dbNode,
821  smgr->smgr_rnode.node.relNode,
822  smgr->smgr_rnode.backend,
823  isExtend);
824 
825  /* Substitute proper block number if caller asked for P_NEW */
826  if (isExtend)
827  {
828  blockNum = smgrnblocks(smgr, forkNum);
829  /* Fail if relation is already at maximum possible length */
830  if (blockNum == P_NEW)
831  ereport(ERROR,
832  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
833  errmsg("cannot extend relation %s beyond %u blocks",
834  relpath(smgr->smgr_rnode, forkNum),
835  P_NEW)));
836  }
837 
838  if (isLocalBuf)
839  {
840  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
841  if (found)
843  else if (isExtend)
845  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
848  }
849  else
850  {
851  /*
852  * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
853  * not currently in memory.
854  */
855  bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
856  strategy, &found);
857  if (found)
859  else if (isExtend)
861  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
864  }
865 
866  /* At this point we do NOT hold any locks. */
867 
868  /* if it was already in the buffer pool, we're done */
869  if (found)
870  {
871  if (!isExtend)
872  {
873  /* Just need to update stats before we exit */
874  *hit = true;
875  VacuumPageHit++;
876 
877  if (VacuumCostActive)
879 
880  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
881  smgr->smgr_rnode.node.spcNode,
882  smgr->smgr_rnode.node.dbNode,
883