PostgreSQL Source Code  git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
#include <lib/sort_template.h>
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)   LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static Buffer ReadBuffer_common (SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf, bool fixOwner)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln)
 
static void FindAndDropRelFileNodeBuffers (RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rnode_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *a, const BufferTag *b)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static void InvalidateBuffer (BufferDesc *buf)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
void InitBufferPoolBackend (void)
 
void PrintBufferLeakWarning (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
void BufmgrCommit (void)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelFileNodeBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelFileNodesAllBuffers (SMgrRelation *smgr_reln, int nnodes)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
void AbortBufferIO (void)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *context)
 
void TestForOldSnapshot_impl (Snapshot snapshot, Relation relation)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = 0
 
int maintenance_io_concurrency = 0
 
int checkpoint_flush_after = 0
 
int bgwriter_flush_after = 0
 
int backend_flush_after = 0
 
static BufferDescInProgressBuf = NULL
 
static bool IsForInput
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 79 of file bufmgr.c.

Referenced by DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 69 of file bufmgr.c.

Referenced by BgBufferSync(), and SyncOneBuffer().

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 68 of file bufmgr.c.

Referenced by BgBufferSync(), BufferSync(), and SyncOneBuffer().

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 61 of file bufmgr.c.

Referenced by BufferAlloc(), and FlushBuffer().

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int32 * LocalRefCount
Definition: localbuf.c:45

Definition at line 448 of file bufmgr.c.

Referenced by BufferGetBlockNumber(), BufferGetLSNAtomic(), BufferGetTag(), BufferIsPermanent(), ConditionalLockBuffer(), FlushOneBuffer(), IncrBufferRefCount(), LockBuffer(), LockBufferForCleanup(), MarkBufferDirty(), and ReleaseAndReadBuffer().

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 60 of file bufmgr.c.

Referenced by FlushBuffer(), PinBuffer(), PinBuffer_Locked(), ReadBuffer_common(), and UnpinBuffer().

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 64 of file bufmgr.c.

Referenced by FlushRelationBuffers(), and ReadBuffer_common().

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 71 of file bufmgr.c.

Referenced by DropRelFileNodesAllBuffers(), and FlushRelationsAllBuffers().

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
 
)    ckpt_buforder_comparator(a, b)

Definition at line 4778 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 4778 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 4780 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 4780 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 4777 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 4777 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 4779 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 4779 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 4776 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 4776 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

void AbortBufferIO ( void  )

Definition at line 4493 of file bufmgr.c.

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_VALID, buf, ereport, errcode(), errdetail(), errmsg(), buftag::forkNum, InProgressBuf, IsForInput, LockBufHdr(), pfree(), relpathperm, buftag::rnode, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

4494 {
4496 
4497  if (buf)
4498  {
4499  uint32 buf_state;
4500 
4501  buf_state = LockBufHdr(buf);
4502  Assert(buf_state & BM_IO_IN_PROGRESS);
4503  if (IsForInput)
4504  {
4505  Assert(!(buf_state & BM_DIRTY));
4506 
4507  /* We'd better not think buffer is valid yet */
4508  Assert(!(buf_state & BM_VALID));
4509  UnlockBufHdr(buf, buf_state);
4510  }
4511  else
4512  {
4513  Assert(buf_state & BM_DIRTY);
4514  UnlockBufHdr(buf, buf_state);
4515  /* Issue notice if this is not the first failure... */
4516  if (buf_state & BM_IO_ERROR)
4517  {
4518  /* Buffer is pinned, so we can read tag without spinlock */
4519  char *path;
4520 
4521  path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
4522  ereport(WARNING,
4523  (errcode(ERRCODE_IO_ERROR),
4524  errmsg("could not write block %u of %s",
4525  buf->tag.blockNum, path),
4526  errdetail("Multiple failures --- write error might be permanent.")));
4527  pfree(path);
4528  }
4529  }
4530  TerminateBufferIO(buf, false, BM_IO_ERROR);
4531  }
4532 }
#define relpathperm(rnode, forknum)
Definition: relpath.h:83
ForkNumber forkNum
Definition: buf_internals.h:94
int errcode(int sqlerrcode)
Definition: elog.c:698
#define BM_DIRTY
Definition: buf_internals.h:59
static BufferDesc * InProgressBuf
Definition: bufmgr.c:161
void pfree(void *pointer)
Definition: mcxt.c:1169
static char * buf
Definition: pg_test_fsync.c:68
int errdetail(const char *fmt,...)
Definition: elog.c:1042
unsigned int uint32
Definition: c.h:441
static bool IsForInput
Definition: bufmgr.c:162
#define WARNING
Definition: elog.h:40
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4461
#define BM_VALID
Definition: buf_internals.h:60
#define ereport(elevel,...)
Definition: elog.h:157
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define Assert(condition)
Definition: c.h:804
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define UnlockBufHdr(desc, s)
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:62

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 2576 of file bufmgr.c.

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

2577 {
2579 
2580  AtEOXact_LocalBuffers(isCommit);
2581 
2583 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define Assert(condition)
Definition: c.h:804
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2650
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:577

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 2631 of file bufmgr.c.

References AbortBufferIO(), AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolBackend().

2632 {
2633  AbortBufferIO();
2634  UnlockBuffers();
2635 
2637 
2638  /* localbuf.c needs a chance too */
2640 }
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:588
void UnlockBuffers(void)
Definition: bufmgr.c:3995
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2650
void AbortBufferIO(void)
Definition: bufmgr.c:4493

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 2206 of file bufmgr.c.

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, BgWriterStats, BUF_REUSABLE, BUF_WRITTEN, CurrentResourceOwner, DEBUG1, DEBUG2, elog, PgStat_MsgBgWriter::m_buf_alloc, PgStat_MsgBgWriter::m_buf_written_clean, PgStat_MsgBgWriter::m_maxwritten_clean, NBuffers, ResourceOwnerEnlargeBuffers(), StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

2207 {
2208  /* info obtained from freelist.c */
2209  int strategy_buf_id;
2210  uint32 strategy_passes;
2211  uint32 recent_alloc;
2212 
2213  /*
2214  * Information saved between calls so we can determine the strategy
2215  * point's advance rate and avoid scanning already-cleaned buffers.
2216  */
2217  static bool saved_info_valid = false;
2218  static int prev_strategy_buf_id;
2219  static uint32 prev_strategy_passes;
2220  static int next_to_clean;
2221  static uint32 next_passes;
2222 
2223  /* Moving averages of allocation rate and clean-buffer density */
2224  static float smoothed_alloc = 0;
2225  static float smoothed_density = 10.0;
2226 
2227  /* Potentially these could be tunables, but for now, not */
2228  float smoothing_samples = 16;
2229  float scan_whole_pool_milliseconds = 120000.0;
2230 
2231  /* Used to compute how far we scan ahead */
2232  long strategy_delta;
2233  int bufs_to_lap;
2234  int bufs_ahead;
2235  float scans_per_alloc;
2236  int reusable_buffers_est;
2237  int upcoming_alloc_est;
2238  int min_scan_buffers;
2239 
2240  /* Variables for the scanning loop proper */
2241  int num_to_scan;
2242  int num_written;
2243  int reusable_buffers;
2244 
2245  /* Variables for final smoothed_density update */
2246  long new_strategy_delta;
2247  uint32 new_recent_alloc;
2248 
2249  /*
2250  * Find out where the freelist clock sweep currently is, and how many
2251  * buffer allocations have happened since our last call.
2252  */
2253  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
2254 
2255  /* Report buffer alloc counts to pgstat */
2256  BgWriterStats.m_buf_alloc += recent_alloc;
2257 
2258  /*
2259  * If we're not running the LRU scan, just stop after doing the stats
2260  * stuff. We mark the saved state invalid so that we can recover sanely
2261  * if LRU scan is turned back on later.
2262  */
2263  if (bgwriter_lru_maxpages <= 0)
2264  {
2265  saved_info_valid = false;
2266  return true;
2267  }
2268 
2269  /*
2270  * Compute strategy_delta = how many buffers have been scanned by the
2271  * clock sweep since last time. If first time through, assume none. Then
2272  * see if we are still ahead of the clock sweep, and if so, how many
2273  * buffers we could scan before we'd catch up with it and "lap" it. Note:
2274  * weird-looking coding of xxx_passes comparisons are to avoid bogus
2275  * behavior when the passes counts wrap around.
2276  */
2277  if (saved_info_valid)
2278  {
2279  int32 passes_delta = strategy_passes - prev_strategy_passes;
2280 
2281  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
2282  strategy_delta += (long) passes_delta * NBuffers;
2283 
2284  Assert(strategy_delta >= 0);
2285 
2286  if ((int32) (next_passes - strategy_passes) > 0)
2287  {
2288  /* we're one pass ahead of the strategy point */
2289  bufs_to_lap = strategy_buf_id - next_to_clean;
2290 #ifdef BGW_DEBUG
2291  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2292  next_passes, next_to_clean,
2293  strategy_passes, strategy_buf_id,
2294  strategy_delta, bufs_to_lap);
2295 #endif
2296  }
2297  else if (next_passes == strategy_passes &&
2298  next_to_clean >= strategy_buf_id)
2299  {
2300  /* on same pass, but ahead or at least not behind */
2301  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
2302 #ifdef BGW_DEBUG
2303  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2304  next_passes, next_to_clean,
2305  strategy_passes, strategy_buf_id,
2306  strategy_delta, bufs_to_lap);
2307 #endif
2308  }
2309  else
2310  {
2311  /*
2312  * We're behind, so skip forward to the strategy point and start
2313  * cleaning from there.
2314  */
2315 #ifdef BGW_DEBUG
2316  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
2317  next_passes, next_to_clean,
2318  strategy_passes, strategy_buf_id,
2319  strategy_delta);
2320 #endif
2321  next_to_clean = strategy_buf_id;
2322  next_passes = strategy_passes;
2323  bufs_to_lap = NBuffers;
2324  }
2325  }
2326  else
2327  {
2328  /*
2329  * Initializing at startup or after LRU scanning had been off. Always
2330  * start at the strategy point.
2331  */
2332 #ifdef BGW_DEBUG
2333  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
2334  strategy_passes, strategy_buf_id);
2335 #endif
2336  strategy_delta = 0;
2337  next_to_clean = strategy_buf_id;
2338  next_passes = strategy_passes;
2339  bufs_to_lap = NBuffers;
2340  }
2341 
2342  /* Update saved info for next time */
2343  prev_strategy_buf_id = strategy_buf_id;
2344  prev_strategy_passes = strategy_passes;
2345  saved_info_valid = true;
2346 
2347  /*
2348  * Compute how many buffers had to be scanned for each new allocation, ie,
2349  * 1/density of reusable buffers, and track a moving average of that.
2350  *
2351  * If the strategy point didn't move, we don't update the density estimate
2352  */
2353  if (strategy_delta > 0 && recent_alloc > 0)
2354  {
2355  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
2356  smoothed_density += (scans_per_alloc - smoothed_density) /
2357  smoothing_samples;
2358  }
2359 
2360  /*
2361  * Estimate how many reusable buffers there are between the current
2362  * strategy point and where we've scanned ahead to, based on the smoothed
2363  * density estimate.
2364  */
2365  bufs_ahead = NBuffers - bufs_to_lap;
2366  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
2367 
2368  /*
2369  * Track a moving average of recent buffer allocations. Here, rather than
2370  * a true average we want a fast-attack, slow-decline behavior: we
2371  * immediately follow any increase.
2372  */
2373  if (smoothed_alloc <= (float) recent_alloc)
2374  smoothed_alloc = recent_alloc;
2375  else
2376  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
2377  smoothing_samples;
2378 
2379  /* Scale the estimate by a GUC to allow more aggressive tuning. */
2380  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
2381 
2382  /*
2383  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
2384  * eventually underflow to zero, and the underflows produce annoying
2385  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
2386  * zero, there's no point in tracking smaller and smaller values of
2387  * smoothed_alloc, so just reset it to exactly zero to avoid this
2388  * syndrome. It will pop back up as soon as recent_alloc increases.
2389  */
2390  if (upcoming_alloc_est == 0)
2391  smoothed_alloc = 0;
2392 
2393  /*
2394  * Even in cases where there's been little or no buffer allocation
2395  * activity, we want to make a small amount of progress through the buffer
2396  * cache so that as many reusable buffers as possible are clean after an
2397  * idle period.
2398  *
2399  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
2400  * the BGW will be called during the scan_whole_pool time; slice the
2401  * buffer pool into that many sections.
2402  */
2403  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
2404 
2405  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
2406  {
2407 #ifdef BGW_DEBUG
2408  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
2409  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
2410 #endif
2411  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
2412  }
2413 
2414  /*
2415  * Now write out dirty reusable buffers, working forward from the
2416  * next_to_clean point, until we have lapped the strategy scan, or cleaned
2417  * enough buffers to match our estimate of the next cycle's allocation
2418  * requirements, or hit the bgwriter_lru_maxpages limit.
2419  */
2420 
2421  /* Make sure we can handle the pin inside SyncOneBuffer */
2423 
2424  num_to_scan = bufs_to_lap;
2425  num_written = 0;
2426  reusable_buffers = reusable_buffers_est;
2427 
2428  /* Execute the LRU scan */
2429  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
2430  {
2431  int sync_state = SyncOneBuffer(next_to_clean, true,
2432  wb_context);
2433 
2434  if (++next_to_clean >= NBuffers)
2435  {
2436  next_to_clean = 0;
2437  next_passes++;
2438  }
2439  num_to_scan--;
2440 
2441  if (sync_state & BUF_WRITTEN)
2442  {
2443  reusable_buffers++;
2444  if (++num_written >= bgwriter_lru_maxpages)
2445  {
2447  break;
2448  }
2449  }
2450  else if (sync_state & BUF_REUSABLE)
2451  reusable_buffers++;
2452  }
2453 
2454  BgWriterStats.m_buf_written_clean += num_written;
2455 
2456 #ifdef BGW_DEBUG
2457  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
2458  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
2459  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
2460  bufs_to_lap - num_to_scan,
2461  num_written,
2462  reusable_buffers - reusable_buffers_est);
2463 #endif
2464 
2465  /*
2466  * Consider the above scan as being like a new allocation scan.
2467  * Characterize its density and update the smoothed one based on it. This
2468  * effectively halves the moving average period in cases where both the
2469  * strategy and the background writer are doing some useful scanning,
2470  * which is helpful because a long memory isn't as desirable on the
2471  * density estimates.
2472  */
2473  new_strategy_delta = bufs_to_lap - num_to_scan;
2474  new_recent_alloc = reusable_buffers - reusable_buffers_est;
2475  if (new_strategy_delta > 0 && new_recent_alloc > 0)
2476  {
2477  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
2478  smoothed_density += (scans_per_alloc - smoothed_density) /
2479  smoothing_samples;
2480 
2481 #ifdef BGW_DEBUG
2482  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
2483  new_recent_alloc, new_strategy_delta,
2484  scans_per_alloc, smoothed_density);
2485 #endif
2486  }
2487 
2488  /* Return true if OK to hibernate */
2489  return (bufs_to_lap == 0 && recent_alloc == 0);
2490 }
PgStat_Counter m_buf_alloc
Definition: pgstat.h:494
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:395
#define DEBUG1
Definition: elog.h:25
int BgWriterDelay
Definition: bgwriter.c:64
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
PgStat_Counter m_maxwritten_clean
Definition: pgstat.h:491
PgStat_Counter m_buf_written_clean
Definition: pgstat.h:490
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:131
double bgwriter_lru_multiplier
Definition: bufmgr.c:134
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2509
signed int int32
Definition: c.h:429
#define BUF_REUSABLE
Definition: bufmgr.c:69
int bgwriter_lru_maxpages
Definition: bufmgr.c:133
#define DEBUG2
Definition: elog.h:24
unsigned int uint32
Definition: c.h:441
#define BUF_WRITTEN
Definition: bufmgr.c:68
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define Assert(condition)
Definition: c.h:804
#define elog(elevel,...)
Definition: elog.h:232
int NBuffers
Definition: globals.c:135

◆ BufferAlloc()

static BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1098 of file bufmgr.c.

References Assert, BackendWritebackContext, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_FLAG_MASK, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BUF_USAGECOUNT_ONE, BufferDescriptorGetContentLock, BufferGetLSN, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, INIT_BUFFERTAG, INIT_FORKNUM, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), RelFileNodeBackend::node, PinBuffer(), PinBuffer_Locked(), RelFileNode::relNode, ReservePrivateRefCountEntry(), ScheduleBufferTagForWriteback(), SMgrRelationData::smgr_rnode, RelFileNode::spcNode, StartBufferIO(), StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr, UnpinBuffer(), and XLogNeedsFlush().

Referenced by ReadBuffer_common().

1102 {
1103  BufferTag newTag; /* identity of requested block */
1104  uint32 newHash; /* hash value for newTag */
1105  LWLock *newPartitionLock; /* buffer partition lock for it */
1106  BufferTag oldTag; /* previous identity of selected buffer */
1107  uint32 oldHash; /* hash value for oldTag */
1108  LWLock *oldPartitionLock; /* buffer partition lock for it */
1109  uint32 oldFlags;
1110  int buf_id;
1111  BufferDesc *buf;
1112  bool valid;
1113  uint32 buf_state;
1114 
1115  /* create a tag so we can lookup the buffer */
1116  INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
1117 
1118  /* determine its hash code and partition lock ID */
1119  newHash = BufTableHashCode(&newTag);
1120  newPartitionLock = BufMappingPartitionLock(newHash);
1121 
1122  /* see if the block is in the buffer pool already */
1123  LWLockAcquire(newPartitionLock, LW_SHARED);
1124  buf_id = BufTableLookup(&newTag, newHash);
1125  if (buf_id >= 0)
1126  {
1127  /*
1128  * Found it. Now, pin the buffer so no one can steal it from the
1129  * buffer pool, and check to see if the correct data has been loaded
1130  * into the buffer.
1131  */
1132  buf = GetBufferDescriptor(buf_id);
1133 
1134  valid = PinBuffer(buf, strategy);
1135 
1136  /* Can release the mapping lock as soon as we've pinned it */
1137  LWLockRelease(newPartitionLock);
1138 
1139  *foundPtr = true;
1140 
1141  if (!valid)
1142  {
1143  /*
1144  * We can only get here if (a) someone else is still reading in
1145  * the page, or (b) a previous read attempt failed. We have to
1146  * wait for any active read attempt to finish, and then set up our
1147  * own read attempt if the page is still not BM_VALID.
1148  * StartBufferIO does it all.
1149  */
1150  if (StartBufferIO(buf, true))
1151  {
1152  /*
1153  * If we get here, previous attempts to read the buffer must
1154  * have failed ... but we shall bravely try again.
1155  */
1156  *foundPtr = false;
1157  }
1158  }
1159 
1160  return buf;
1161  }
1162 
1163  /*
1164  * Didn't find it in the buffer pool. We'll have to initialize a new
1165  * buffer. Remember to unlock the mapping lock while doing the work.
1166  */
1167  LWLockRelease(newPartitionLock);
1168 
1169  /* Loop here in case we have to try another victim buffer */
1170  for (;;)
1171  {
1172  /*
1173  * Ensure, while the spinlock's not yet held, that there's a free
1174  * refcount entry.
1175  */
1177 
1178  /*
1179  * Select a victim buffer. The buffer is returned with its header
1180  * spinlock still held!
1181  */
1182  buf = StrategyGetBuffer(strategy, &buf_state);
1183 
1184  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1185 
1186  /* Must copy buffer flags while we still hold the spinlock */
1187  oldFlags = buf_state & BUF_FLAG_MASK;
1188 
1189  /* Pin the buffer and then release the buffer spinlock */
1190  PinBuffer_Locked(buf);
1191 
1192  /*
1193  * If the buffer was dirty, try to write it out. There is a race
1194  * condition here, in that someone might dirty it after we released it
1195  * above, or even while we are writing it out (since our share-lock
1196  * won't prevent hint-bit updates). We will recheck the dirty bit
1197  * after re-locking the buffer header.
1198  */
1199  if (oldFlags & BM_DIRTY)
1200  {
1201  /*
1202  * We need a share-lock on the buffer contents to write it out
1203  * (else we might write invalid data, eg because someone else is
1204  * compacting the page contents while we write). We must use a
1205  * conditional lock acquisition here to avoid deadlock. Even
1206  * though the buffer was not pinned (and therefore surely not
1207  * locked) when StrategyGetBuffer returned it, someone else could
1208  * have pinned and exclusive-locked it by the time we get here. If
1209  * we try to get the lock unconditionally, we'd block waiting for
1210  * them; if they later block waiting for us, deadlock ensues.
1211  * (This has been observed to happen when two backends are both
1212  * trying to split btree index pages, and the second one just
1213  * happens to be trying to split the page the first one got from
1214  * StrategyGetBuffer.)
1215  */
1217  LW_SHARED))
1218  {
1219  /*
1220  * If using a nondefault strategy, and writing the buffer
1221  * would require a WAL flush, let the strategy decide whether
1222  * to go ahead and write/reuse the buffer or to choose another
1223  * victim. We need lock to inspect the page LSN, so this
1224  * can't be done inside StrategyGetBuffer.
1225  */
1226  if (strategy != NULL)
1227  {
1228  XLogRecPtr lsn;
1229 
1230  /* Read the LSN while holding buffer header lock */
1231  buf_state = LockBufHdr(buf);
1232  lsn = BufferGetLSN(buf);
1233  UnlockBufHdr(buf, buf_state);
1234 
1235  if (XLogNeedsFlush(lsn) &&
1236  StrategyRejectBuffer(strategy, buf))
1237  {
1238  /* Drop lock/pin and loop around for another buffer */
1240  UnpinBuffer(buf, true);
1241  continue;
1242  }
1243  }
1244 
1245  /* OK, do the I/O */
1246  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
1247  smgr->smgr_rnode.node.spcNode,
1248  smgr->smgr_rnode.node.dbNode,
1249  smgr->smgr_rnode.node.relNode);
1250 
1251  FlushBuffer(buf, NULL);
1253 
1255  &buf->tag);
1256 
1257  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
1258  smgr->smgr_rnode.node.spcNode,
1259  smgr->smgr_rnode.node.dbNode,
1260  smgr->smgr_rnode.node.relNode);
1261  }
1262  else
1263  {
1264  /*
1265  * Someone else has locked the buffer, so give it up and loop
1266  * back to get another one.
1267  */
1268  UnpinBuffer(buf, true);
1269  continue;
1270  }
1271  }
1272 
1273  /*
1274  * To change the association of a valid buffer, we'll need to have
1275  * exclusive lock on both the old and new mapping partitions.
1276  */
1277  if (oldFlags & BM_TAG_VALID)
1278  {
1279  /*
1280  * Need to compute the old tag's hashcode and partition lock ID.
1281  * XXX is it worth storing the hashcode in BufferDesc so we need
1282  * not recompute it here? Probably not.
1283  */
1284  oldTag = buf->tag;
1285  oldHash = BufTableHashCode(&oldTag);
1286  oldPartitionLock = BufMappingPartitionLock(oldHash);
1287 
1288  /*
1289  * Must lock the lower-numbered partition first to avoid
1290  * deadlocks.
1291  */
1292  if (oldPartitionLock < newPartitionLock)
1293  {
1294  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1295  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1296  }
1297  else if (oldPartitionLock > newPartitionLock)
1298  {
1299  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1300  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1301  }
1302  else
1303  {
1304  /* only one partition, only one lock */
1305  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1306  }
1307  }
1308  else
1309  {
1310  /* if it wasn't valid, we need only the new partition */
1311  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1312  /* remember we have no old-partition lock or tag */
1313  oldPartitionLock = NULL;
1314  /* keep the compiler quiet about uninitialized variables */
1315  oldHash = 0;
1316  }
1317 
1318  /*
1319  * Try to make a hashtable entry for the buffer under its new tag.
1320  * This could fail because while we were writing someone else
1321  * allocated another buffer for the same block we want to read in.
1322  * Note that we have not yet removed the hashtable entry for the old
1323  * tag.
1324  */
1325  buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
1326 
1327  if (buf_id >= 0)
1328  {
1329  /*
1330  * Got a collision. Someone has already done what we were about to
1331  * do. We'll just handle this as if it were found in the buffer
1332  * pool in the first place. First, give up the buffer we were
1333  * planning to use.
1334  */
1335  UnpinBuffer(buf, true);
1336 
1337  /* Can give up that buffer's mapping partition lock now */
1338  if (oldPartitionLock != NULL &&
1339  oldPartitionLock != newPartitionLock)
1340  LWLockRelease(oldPartitionLock);
1341 
1342  /* remaining code should match code at top of routine */
1343 
1344  buf = GetBufferDescriptor(buf_id);
1345 
1346  valid = PinBuffer(buf, strategy);
1347 
1348  /* Can release the mapping lock as soon as we've pinned it */
1349  LWLockRelease(newPartitionLock);
1350 
1351  *foundPtr = true;
1352 
1353  if (!valid)
1354  {
1355  /*
1356  * We can only get here if (a) someone else is still reading
1357  * in the page, or (b) a previous read attempt failed. We
1358  * have to wait for any active read attempt to finish, and
1359  * then set up our own read attempt if the page is still not
1360  * BM_VALID. StartBufferIO does it all.
1361  */
1362  if (StartBufferIO(buf, true))
1363  {
1364  /*
1365  * If we get here, previous attempts to read the buffer
1366  * must have failed ... but we shall bravely try again.
1367  */
1368  *foundPtr = false;
1369  }
1370  }
1371 
1372  return buf;
1373  }
1374 
1375  /*
1376  * Need to lock the buffer header too in order to change its tag.
1377  */
1378  buf_state = LockBufHdr(buf);
1379 
1380  /*
1381  * Somebody could have pinned or re-dirtied the buffer while we were
1382  * doing the I/O and making the new hashtable entry. If so, we can't
1383  * recycle this buffer; we must undo everything we've done and start
1384  * over with a new victim buffer.
1385  */
1386  oldFlags = buf_state & BUF_FLAG_MASK;
1387  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
1388  break;
1389 
1390  UnlockBufHdr(buf, buf_state);
1391  BufTableDelete(&newTag, newHash);
1392  if (oldPartitionLock != NULL &&
1393  oldPartitionLock != newPartitionLock)
1394  LWLockRelease(oldPartitionLock);
1395  LWLockRelease(newPartitionLock);
1396  UnpinBuffer(buf, true);
1397  }
1398 
1399  /*
1400  * Okay, it's finally safe to rename the buffer.
1401  *
1402  * Clearing BM_VALID here is necessary, clearing the dirtybits is just
1403  * paranoia. We also reset the usage_count since any recency of use of
1404  * the old content is no longer relevant. (The usage_count starts out at
1405  * 1 so that the buffer can survive one clock-sweep pass.)
1406  *
1407  * Make sure BM_PERMANENT is set for buffers that must be written at every
1408  * checkpoint. Unlogged buffers only need to be written at shutdown
1409  * checkpoints, except for their "init" forks, which need to be treated
1410  * just like permanent relations.
1411  */
1412  buf->tag = newTag;
1413  buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
1416  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1417  buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
1418  else
1419  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1420 
1421  UnlockBufHdr(buf, buf_state);
1422 
1423  if (oldPartitionLock != NULL)
1424  {
1425  BufTableDelete(&oldTag, oldHash);
1426  if (oldPartitionLock != newPartitionLock)
1427  LWLockRelease(oldPartitionLock);
1428  }
1429 
1430  LWLockRelease(newPartitionLock);
1431 
1432  /*
1433  * Buffer contents are currently invalid. Try to obtain the right to
1434  * start I/O. If StartBufferIO returns false, then someone else managed
1435  * to read it before we did, so there's nothing left for BufferAlloc() to
1436  * do.
1437  */
1438  if (StartBufferIO(buf, true))
1439  *foundPtr = false;
1440  else
1441  *foundPtr = true;
1442 
1443  return buf;
1444 }
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:1683
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
Definition: freelist.c:201
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
Definition: lwlock.h:31
#define BM_PERMANENT
Definition: buf_internals.h:67
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3202
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2818
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4410
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:119
void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
Definition: bufmgr.c:4750
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
WritebackContext BackendWritebackContext
Definition: buf_init.c:23
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1375
static char * buf
Definition: pg_test_fsync.c:68
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1831
#define BM_VALID
Definition: buf_internals.h:60
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf)
Definition: freelist.c:686
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1786
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
#define UnlockBufHdr(desc, s)
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:61
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 2758 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, GetLocalBufferDescriptor, and BufferDesc::tag.

Referenced by _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_prune_chain(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and XLogReadBufferExtended().

2759 {
2760  BufferDesc *bufHdr;
2761 
2762  Assert(BufferIsPinned(buffer));
2763 
2764  if (BufferIsLocal(buffer))
2765  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2766  else
2767  bufHdr = GetBufferDescriptor(buffer - 1);
2768 
2769  /* pinned, so OK to read tag without spinlock */
2770  return bufHdr->tag.blockNum;
2771 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
BufferTag tag

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 3021 of file bufmgr.c.

References Assert, BufferGetPage, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, LockBufHdr(), PageGetLSN, UnlockBufHdr, and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

3022 {
3023  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
3024  char *page = BufferGetPage(buffer);
3025  XLogRecPtr lsn;
3026  uint32 buf_state;
3027 
3028  /*
3029  * If we don't need locking for correctness, fastpath out.
3030  */
3031  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
3032  return PageGetLSN(page);
3033 
3034  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3035  Assert(BufferIsValid(buffer));
3036  Assert(BufferIsPinned(buffer));
3037 
3038  buf_state = LockBufHdr(bufHdr);
3039  lsn = PageGetLSN(page);
3040  UnlockBufHdr(bufHdr, buf_state);
3041 
3042  return lsn;
3043 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define PageGetLSN(page)
Definition: bufpage.h:366
#define UnlockBufHdr(desc, s)
#define XLogHintBitIsNeeded()
Definition: xlog.h:212

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileNode rnode,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 2779 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, buftag::rnode, and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

2781 {
2782  BufferDesc *bufHdr;
2783 
2784  /* Do the same checks as BufferGetBlockNumber. */
2785  Assert(BufferIsPinned(buffer));
2786 
2787  if (BufferIsLocal(buffer))
2788  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2789  else
2790  bufHdr = GetBufferDescriptor(buffer - 1);
2791 
2792  /* pinned, so OK to read tag without spinlock */
2793  *rnode = bufHdr->tag.rnode;
2794  *forknum = bufHdr->tag.forkNum;
2795  *blknum = bufHdr->tag.blockNum;
2796 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 2991 of file bufmgr.c.

References Assert, BM_PERMANENT, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

2992 {
2993  BufferDesc *bufHdr;
2994 
2995  /* Local buffers are used only for temp relations. */
2996  if (BufferIsLocal(buffer))
2997  return false;
2998 
2999  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3000  Assert(BufferIsValid(buffer));
3001  Assert(BufferIsPinned(buffer));
3002 
3003  /*
3004  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
3005  * need not bother with the buffer header spinlock. Even if someone else
3006  * changes the buffer header state while we're doing this, the state is
3007  * changed atomically, so we'll read the old value or the new value, but
3008  * not random garbage.
3009  */
3010  bufHdr = GetBufferDescriptor(buffer - 1);
3011  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
3012 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define BM_PERMANENT
Definition: buf_internals.h:67
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
pg_atomic_uint32 state
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 1930 of file bufmgr.c.

References Assert, BgWriterStats, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, CurrentResourceOwner, DatumGetPointer, buftag::forkNum, CkptSortItem::forkNum, GetBufferDescriptor, i, CkptTsStatus::index, InvalidOid, IssuePendingWritebacks(), LockBufHdr(), PgStat_MsgBgWriter::m_buf_written_checkpoints, NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), pfree(), pg_atomic_read_u32(), PointerGetDatum, ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, RelFileNode::relNode, CkptSortItem::relNode, repalloc(), ResourceOwnerEnlargeBuffers(), buftag::rnode, RelFileNode::spcNode, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr, and WritebackContextInit().

Referenced by CheckPointBuffers().

1931 {
1932  uint32 buf_state;
1933  int buf_id;
1934  int num_to_scan;
1935  int num_spaces;
1936  int num_processed;
1937  int num_written;
1938  CkptTsStatus *per_ts_stat = NULL;
1939  Oid last_tsid;
1940  binaryheap *ts_heap;
1941  int i;
1942  int mask = BM_DIRTY;
1943  WritebackContext wb_context;
1944 
1945  /* Make sure we can handle the pin inside SyncOneBuffer */
1947 
1948  /*
1949  * Unless this is a shutdown checkpoint or we have been explicitly told,
1950  * we write only permanent, dirty buffers. But at shutdown or end of
1951  * recovery, we write all dirty buffers.
1952  */
1955  mask |= BM_PERMANENT;
1956 
1957  /*
1958  * Loop over all buffers, and mark the ones that need to be written with
1959  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
1960  * can estimate how much work needs to be done.
1961  *
1962  * This allows us to write only those pages that were dirty when the
1963  * checkpoint began, and not those that get dirtied while it proceeds.
1964  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
1965  * later in this function, or by normal backends or the bgwriter cleaning
1966  * scan, the flag is cleared. Any buffer dirtied after this point won't
1967  * have the flag set.
1968  *
1969  * Note that if we fail to write some buffer, we may leave buffers with
1970  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
1971  * certainly need to be written for the next checkpoint attempt, too.
1972  */
1973  num_to_scan = 0;
1974  for (buf_id = 0; buf_id < NBuffers; buf_id++)
1975  {
1976  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
1977 
1978  /*
1979  * Header spinlock is enough to examine BM_DIRTY, see comment in
1980  * SyncOneBuffer.
1981  */
1982  buf_state = LockBufHdr(bufHdr);
1983 
1984  if ((buf_state & mask) == mask)
1985  {
1986  CkptSortItem *item;
1987 
1988  buf_state |= BM_CHECKPOINT_NEEDED;
1989 
1990  item = &CkptBufferIds[num_to_scan++];
1991  item->buf_id = buf_id;
1992  item->tsId = bufHdr->tag.rnode.spcNode;
1993  item->relNode = bufHdr->tag.rnode.relNode;
1994  item->forkNum = bufHdr->tag.forkNum;
1995  item->blockNum = bufHdr->tag.blockNum;
1996  }
1997 
1998  UnlockBufHdr(bufHdr, buf_state);
1999 
2000  /* Check for barrier events in case NBuffers is large. */
2003  }
2004 
2005  if (num_to_scan == 0)
2006  return; /* nothing to do */
2007 
2009 
2010  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
2011 
2012  /*
2013  * Sort buffers that need to be written to reduce the likelihood of random
2014  * IO. The sorting is also important for the implementation of balancing
2015  * writes between tablespaces. Without balancing writes we'd potentially
2016  * end up writing to the tablespaces one-by-one; possibly overloading the
2017  * underlying system.
2018  */
2019  sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
2020 
2021  num_spaces = 0;
2022 
2023  /*
2024  * Allocate progress status for each tablespace with buffers that need to
2025  * be flushed. This requires the to-be-flushed array to be sorted.
2026  */
2027  last_tsid = InvalidOid;
2028  for (i = 0; i < num_to_scan; i++)
2029  {
2030  CkptTsStatus *s;
2031  Oid cur_tsid;
2032 
2033  cur_tsid = CkptBufferIds[i].tsId;
2034 
2035  /*
2036  * Grow array of per-tablespace status structs, every time a new
2037  * tablespace is found.
2038  */
2039  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
2040  {
2041  Size sz;
2042 
2043  num_spaces++;
2044 
2045  /*
2046  * Not worth adding grow-by-power-of-2 logic here - even with a
2047  * few hundred tablespaces this should be fine.
2048  */
2049  sz = sizeof(CkptTsStatus) * num_spaces;
2050 
2051  if (per_ts_stat == NULL)
2052  per_ts_stat = (CkptTsStatus *) palloc(sz);
2053  else
2054  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
2055 
2056  s = &per_ts_stat[num_spaces - 1];
2057  memset(s, 0, sizeof(*s));
2058  s->tsId = cur_tsid;
2059 
2060  /*
2061  * The first buffer in this tablespace. As CkptBufferIds is sorted
2062  * by tablespace all (s->num_to_scan) buffers in this tablespace
2063  * will follow afterwards.
2064  */
2065  s->index = i;
2066 
2067  /*
2068  * progress_slice will be determined once we know how many buffers
2069  * are in each tablespace, i.e. after this loop.
2070  */
2071 
2072  last_tsid = cur_tsid;
2073  }
2074  else
2075  {
2076  s = &per_ts_stat[num_spaces - 1];
2077  }
2078 
2079  s->num_to_scan++;
2080 
2081  /* Check for barrier events. */
2084  }
2085 
2086  Assert(num_spaces > 0);
2087 
2088  /*
2089  * Build a min-heap over the write-progress in the individual tablespaces,
2090  * and compute how large a portion of the total progress a single
2091  * processed buffer is.
2092  */
2093  ts_heap = binaryheap_allocate(num_spaces,
2095  NULL);
2096 
2097  for (i = 0; i < num_spaces; i++)
2098  {
2099  CkptTsStatus *ts_stat = &per_ts_stat[i];
2100 
2101  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
2102 
2103  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
2104  }
2105 
2106  binaryheap_build(ts_heap);
2107 
2108  /*
2109  * Iterate through to-be-checkpointed buffers and write the ones (still)
2110  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
2111  * tablespaces; otherwise the sorting would lead to only one tablespace
2112  * receiving writes at a time, making inefficient use of the hardware.
2113  */
2114  num_processed = 0;
2115  num_written = 0;
2116  while (!binaryheap_empty(ts_heap))
2117  {
2118  BufferDesc *bufHdr = NULL;
2119  CkptTsStatus *ts_stat = (CkptTsStatus *)
2121 
2122  buf_id = CkptBufferIds[ts_stat->index].buf_id;
2123  Assert(buf_id != -1);
2124 
2125  bufHdr = GetBufferDescriptor(buf_id);
2126 
2127  num_processed++;
2128 
2129  /*
2130  * We don't need to acquire the lock here, because we're only looking
2131  * at a single bit. It's possible that someone else writes the buffer
2132  * and clears the flag right after we check, but that doesn't matter
2133  * since SyncOneBuffer will then do nothing. However, there is a
2134  * further race condition: it's conceivable that between the time we
2135  * examine the bit here and the time SyncOneBuffer acquires the lock,
2136  * someone else not only wrote the buffer but replaced it with another
2137  * page and dirtied it. In that improbable case, SyncOneBuffer will
2138  * write the buffer though we didn't need to. It doesn't seem worth
2139  * guarding against this, though.
2140  */
2142  {
2143  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
2144  {
2145  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
2147  num_written++;
2148  }
2149  }
2150 
2151  /*
2152  * Measure progress independent of actually having to flush the buffer
2153  * - otherwise writing become unbalanced.
2154  */
2155  ts_stat->progress += ts_stat->progress_slice;
2156  ts_stat->num_scanned++;
2157  ts_stat->index++;
2158 
2159  /* Have all the buffers from the tablespace been processed? */
2160  if (ts_stat->num_scanned == ts_stat->num_to_scan)
2161  {
2162  binaryheap_remove_first(ts_heap);
2163  }
2164  else
2165  {
2166  /* update heap with the new progress */
2167  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
2168  }
2169 
2170  /*
2171  * Sleep to throttle our I/O rate.
2172  *
2173  * (This will check for barrier events even if it doesn't sleep.)
2174  */
2175  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
2176  }
2177 
2178  /* issue all pending flushes */
2179  IssuePendingWritebacks(&wb_context);
2180 
2181  pfree(per_ts_stat);
2182  per_ts_stat = NULL;
2183  binaryheap_free(ts_heap);
2184 
2185  /*
2186  * Update checkpoint statistics. As noted above, this doesn't include
2187  * buffers written by other backends or bgwriter scan.
2188  */
2189  CheckpointStats.ckpt_bufs_written += num_written;
2190 
2191  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
2192 }
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:694
PgStat_Counter m_buf_written_checkpoints
Definition: pgstat.h:489
#define BM_PERMANENT
Definition: buf_internals.h:67
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:236
Oid tsId
Definition: bufmgr.c:97
#define binaryheap_empty(h)
Definition: binaryheap.h:52
ForkNumber forkNum
Definition: buf_internals.h:94
#define PointerGetDatum(X)
Definition: postgres.h:600
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:453
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:4715
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:131
int checkpoint_flush_after
Definition: bufmgr.c:156
void binaryheap_replace_first(binaryheap *heap, Datum d)
Definition: binaryheap.c:204
unsigned int Oid
Definition: postgres_ext.h:31
#define BM_DIRTY
Definition: buf_internals.h:59
void binaryheap_add_unordered(binaryheap *heap, Datum d)
Definition: binaryheap.c:110
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2509
void IssuePendingWritebacks(WritebackContext *context)
Definition: bufmgr.c:4791
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:4738
void pfree(void *pointer)
Definition: mcxt.c:1169
double float8
Definition: c.h:565
Datum binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:159
int num_to_scan
Definition: bufmgr.c:110
float8 progress_slice
Definition: bufmgr.c:107
int index
Definition: bufmgr.c:115
float8 progress
Definition: bufmgr.c:106
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:232
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
#define BUF_WRITTEN
Definition: bufmgr.c:68
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
int ckpt_bufs_written
Definition: xlog.h:262
BlockNumber blockNum
#define InvalidOid
Definition: postgres_ext.h:36
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:126
const symbol * s
Definition: header.h:17
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define Assert(condition)
Definition: c.h:804
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:37
CheckpointStatsData CheckpointStats
Definition: xlog.c:190
CkptSortItem * CkptBufferIds
Definition: buf_init.c:24
size_t Size
Definition: c.h:540
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:69
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:33
#define DatumGetPointer(X)
Definition: postgres.h:593
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
pg_atomic_uint32 state
Datum binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:174
int num_scanned
Definition: bufmgr.c:112
ForkNumber forkNum
struct CkptTsStatus CkptTsStatus
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:231
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag a,
const BufferTag b 
)
inlinestatic

Definition at line 4655 of file bufmgr.c.

References buftag::blockNum, buftag::forkNum, buftag::rnode, and rnode_comparator().

4656 {
4657  int ret;
4658 
4659  ret = rnode_comparator(&ba->rnode, &bb->rnode);
4660 
4661  if (ret != 0)
4662  return ret;
4663 
4664  if (ba->forkNum < bb->forkNum)
4665  return -1;
4666  if (ba->forkNum > bb->forkNum)
4667  return 1;
4668 
4669  if (ba->blockNum < bb->blockNum)
4670  return -1;
4671  if (ba->blockNum > bb->blockNum)
4672  return 1;
4673 
4674  return 0;
4675 }
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4576

◆ BufmgrCommit()

void BufmgrCommit ( void  )

Definition at line 2744 of file bufmgr.c.

Referenced by PrepareTransaction(), and RecordTransactionCommit().

2745 {
2746  /* Nothing to do in bufmgr anymore... */
2747 }

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 2650 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, PrintBufferLeakWarning(), PrivateRefCountArray, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

2651 {
2652 #ifdef USE_ASSERT_CHECKING
2653  int RefCountErrors = 0;
2654  PrivateRefCountEntry *res;
2655  int i;
2656 
2657  /* check the array */
2658  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
2659  {
2660  res = &PrivateRefCountArray[i];
2661 
2662  if (res->buffer != InvalidBuffer)
2663  {
2665  RefCountErrors++;
2666  }
2667  }
2668 
2669  /* if necessary search the hash */
2671  {
2672  HASH_SEQ_STATUS hstat;
2673 
2675  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
2676  {
2678  RefCountErrors++;
2679  }
2680 
2681  }
2682 
2683  Assert(RefCountErrors == 0);
2684 #endif
2685 }
void PrintBufferLeakWarning(Buffer buffer)
Definition: bufmgr.c:2691
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int i

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 2734 of file bufmgr.c.

References BufferSync().

Referenced by CheckPointGuts().

2735 {
2736  BufferSync(flags);
2737 }
static void BufferSync(int flags)
Definition: bufmgr.c:1930

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 4684 of file bufmgr.c.

References CkptSortItem::blockNum, CkptSortItem::forkNum, CkptSortItem::relNode, and CkptSortItem::tsId.

4685 {
4686  /* compare tablespace */
4687  if (a->tsId < b->tsId)
4688  return -1;
4689  else if (a->tsId > b->tsId)
4690  return 1;
4691  /* compare relation */
4692  if (a->relNode < b->relNode)
4693  return -1;
4694  else if (a->relNode > b->relNode)
4695  return 1;
4696  /* compare fork */
4697  else if (a->forkNum < b->forkNum)
4698  return -1;
4699  else if (a->forkNum > b->forkNum)
4700  return 1;
4701  /* compare block number */
4702  else if (a->blockNum < b->blockNum)
4703  return -1;
4704  else if (a->blockNum > b->blockNum)
4705  return 1;
4706  /* equal page IDs are unlikely, but not impossible */
4707  return 0;
4708 }
BlockNumber blockNum
ForkNumber forkNum

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 4049 of file bufmgr.c.

References Assert, buf, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

4050 {
4051  BufferDesc *buf;
4052 
4053  Assert(BufferIsPinned(buffer));
4054  if (BufferIsLocal(buffer))
4055  return true; /* act as though we got it */
4056 
4057  buf = GetBufferDescriptor(buffer - 1);
4058 
4060  LW_EXCLUSIVE);
4061 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1375
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 4257 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid, ConditionalLockBuffer(), GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr.

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

4258 {
4259  BufferDesc *bufHdr;
4260  uint32 buf_state,
4261  refcount;
4262 
4263  Assert(BufferIsValid(buffer));
4264 
4265  if (BufferIsLocal(buffer))
4266  {
4267  refcount = LocalRefCount[-buffer - 1];
4268  /* There should be exactly one pin */
4269  Assert(refcount > 0);
4270  if (refcount != 1)
4271  return false;
4272  /* Nobody else to wait for */
4273  return true;
4274  }
4275 
4276  /* There should be exactly one local pin */
4277  refcount = GetPrivateRefCount(buffer);
4278  Assert(refcount);
4279  if (refcount != 1)
4280  return false;
4281 
4282  /* Try to acquire lock */
4283  if (!ConditionalLockBuffer(buffer))
4284  return false;
4285 
4286  bufHdr = GetBufferDescriptor(buffer - 1);
4287  buf_state = LockBufHdr(bufHdr);
4288  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
4289 
4290  Assert(refcount > 0);
4291  if (refcount == 1)
4292  {
4293  /* Successfully acquired exclusive lock with pincount 1 */
4294  UnlockBufHdr(bufHdr, buf_state);
4295  return true;
4296  }
4297 
4298  /* Failed, so release the lock */
4299  UnlockBufHdr(bufHdr, buf_state);
4300  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4301  return false;
4302 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:4049
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4023
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 3421 of file bufmgr.c.

References buftag::blockNum, buf, BufferDescriptorGetBuffer, RelFileNode::dbNode, elog, buftag::forkNum, BufferDesc::freeNext, GetBufferDescriptor, GetPrivateRefCount(), i, InvalidateBuffer(), InvalidBackendId, LockBufHdr(), LOG, NBuffers, relpathbackend, relpathperm, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by dbase_redo(), dropdb(), and movedb().

3422 {
3423  int i;
3424 
3425  /*
3426  * We needn't consider local buffers, since by assumption the target
3427  * database isn't our own.
3428  */
3429 
3430  for (i = 0; i < NBuffers; i++)
3431  {
3432  BufferDesc *bufHdr = GetBufferDescriptor(i);
3433  uint32 buf_state;
3434 
3435  /*
3436  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3437  * and saves some cycles.
3438  */
3439  if (bufHdr->tag.rnode.dbNode != dbid)
3440  continue;
3441 
3442  buf_state = LockBufHdr(bufHdr);
3443  if (bufHdr->tag.rnode.dbNode == dbid)
3444  InvalidateBuffer(bufHdr); /* releases spinlock */
3445  else
3446  UnlockBufHdr(bufHdr, buf_state);
3447  }
3448 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1464
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135

◆ DropRelFileNodeBuffers()

void DropRelFileNodeBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 3067 of file bufmgr.c.

References RelFileNodeBackend::backend, buftag::blockNum, BlockNumberIsValid, BUF_DROP_FULL_SCAN_THRESHOLD, DropRelFileNodeLocalBuffers(), FindAndDropRelFileNodeBuffers(), buftag::forkNum, GetBufferDescriptor, i, InvalidateBuffer(), InvalidBlockNumber, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, RelFileNodeBackend::node, RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, SMgrRelationData::smgr_rnode, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrtruncate().

3069 {
3070  int i;
3071  int j;
3072  RelFileNodeBackend rnode;
3073  BlockNumber nForkBlock[MAX_FORKNUM];
3074  uint64 nBlocksToInvalidate = 0;
3075 
3076  rnode = smgr_reln->smgr_rnode;
3077 
3078  /* If it's a local relation, it's localbuf.c's problem. */
3079  if (RelFileNodeBackendIsTemp(rnode))
3080  {
3081  if (rnode.backend == MyBackendId)
3082  {
3083  for (j = 0; j < nforks; j++)
3084  DropRelFileNodeLocalBuffers(rnode.node, forkNum[j],
3085  firstDelBlock[j]);
3086  }
3087  return;
3088  }
3089 
3090  /*
3091  * To remove all the pages of the specified relation forks from the buffer
3092  * pool, we need to scan the entire buffer pool but we can optimize it by
3093  * finding the buffers from BufMapping table provided we know the exact
3094  * size of each fork of the relation. The exact size is required to ensure
3095  * that we don't leave any buffer for the relation being dropped as
3096  * otherwise the background writer or checkpointer can lead to a PANIC
3097  * error while flushing buffers corresponding to files that don't exist.
3098  *
3099  * To know the exact size, we rely on the size cached for each fork by us
3100  * during recovery which limits the optimization to recovery and on
3101  * standbys but we can easily extend it once we have shared cache for
3102  * relation size.
3103  *
3104  * In recovery, we cache the value returned by the first lseek(SEEK_END)
3105  * and the future writes keeps the cached value up-to-date. See
3106  * smgrextend. It is possible that the value of the first lseek is smaller
3107  * than the actual number of existing blocks in the file due to buggy
3108  * Linux kernels that might not have accounted for the recent write. But
3109  * that should be fine because there must not be any buffers after that
3110  * file size.
3111  */
3112  for (i = 0; i < nforks; i++)
3113  {
3114  /* Get the number of blocks for a relation's fork */
3115  nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
3116 
3117  if (nForkBlock[i] == InvalidBlockNumber)
3118  {
3119  nBlocksToInvalidate = InvalidBlockNumber;
3120  break;
3121  }
3122 
3123  /* calculate the number of blocks to be invalidated */
3124  nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
3125  }
3126 
3127  /*
3128  * We apply the optimization iff the total number of blocks to invalidate
3129  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3130  */
3131  if (BlockNumberIsValid(nBlocksToInvalidate) &&
3132  nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3133  {
3134  for (j = 0; j < nforks; j++)
3135  FindAndDropRelFileNodeBuffers(rnode.node, forkNum[j],
3136  nForkBlock[j], firstDelBlock[j]);
3137  return;
3138  }
3139 
3140  for (i = 0; i < NBuffers; i++)
3141  {
3142  BufferDesc *bufHdr = GetBufferDescriptor(i);
3143  uint32 buf_state;
3144 
3145  /*
3146  * We can make this a tad faster by prechecking the buffer tag before
3147  * we attempt to lock the buffer; this saves a lot of lock
3148  * acquisitions in typical cases. It should be safe because the
3149  * caller must have AccessExclusiveLock on the relation, or some other
3150  * reason to be certain that no one is loading new pages of the rel
3151  * into the buffer pool. (Otherwise we might well miss such pages
3152  * entirely.) Therefore, while the tag might be changing while we
3153  * look at it, it can't be changing *to* a value we care about, only
3154  * *away* from such a value. So false negatives are impossible, and
3155  * false positives are safe because we'll recheck after getting the
3156  * buffer lock.
3157  *
3158  * We could check forkNum and blockNum as well as the rnode, but the
3159  * incremental win from doing so seems small.
3160  */
3161  if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
3162  continue;
3163 
3164  buf_state = LockBufHdr(bufHdr);
3165 
3166  for (j = 0; j < nforks; j++)
3167  {
3168  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
3169  bufHdr->tag.forkNum == forkNum[j] &&
3170  bufHdr->tag.blockNum >= firstDelBlock[j])
3171  {
3172  InvalidateBuffer(bufHdr); /* releases spinlock */
3173  break;
3174  }
3175  }
3176  if (j >= nforks)
3177  UnlockBufHdr(bufHdr, buf_state);
3178  }
3179 }
BackendId MyBackendId
Definition: globals.c:84
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ForkNumber forkNum
Definition: buf_internals.h:94
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1464
void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:326
uint32 BlockNumber
Definition: block.h:31
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:79
static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3360
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:572
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
BackendId backend
Definition: relfilenode.h:75
#define InvalidBlockNumber
Definition: block.h:33
#define MAX_FORKNUM
Definition: relpath.h:55
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ DropRelFileNodesAllBuffers()

void DropRelFileNodesAllBuffers ( SMgrRelation smgr_reln,
int  nnodes 
)

Definition at line 3191 of file bufmgr.c.

References BlockNumberIsValid, BUF_DROP_FULL_SCAN_THRESHOLD, DropRelFileNodeAllLocalBuffers(), FindAndDropRelFileNodeBuffers(), GetBufferDescriptor, i, InvalidateBuffer(), InvalidBlockNumber, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, SMgrRelationData::node, palloc(), pfree(), pg_qsort(), RelFileNodeBackendIsTemp, RelFileNodeEquals, RELS_BSEARCH_THRESHOLD, buftag::rnode, rnode_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrdounlinkall().

3192 {
3193  int i;
3194  int j;
3195  int n = 0;
3196  SMgrRelation *rels;
3197  BlockNumber (*block)[MAX_FORKNUM + 1];
3198  uint64 nBlocksToInvalidate = 0;
3199  RelFileNode *nodes;
3200  bool cached = true;
3201  bool use_bsearch;
3202 
3203  if (nnodes == 0)
3204  return;
3205 
3206  rels = palloc(sizeof(SMgrRelation) * nnodes); /* non-local relations */
3207 
3208  /* If it's a local relation, it's localbuf.c's problem. */
3209  for (i = 0; i < nnodes; i++)
3210  {
3211  if (RelFileNodeBackendIsTemp(smgr_reln[i]->smgr_rnode))
3212  {
3213  if (smgr_reln[i]->smgr_rnode.backend == MyBackendId)
3214  DropRelFileNodeAllLocalBuffers(smgr_reln[i]->smgr_rnode.node);
3215  }
3216  else
3217  rels[n++] = smgr_reln[i];
3218  }
3219 
3220  /*
3221  * If there are no non-local relations, then we're done. Release the
3222  * memory and return.
3223  */
3224  if (n == 0)
3225  {
3226  pfree(rels);
3227  return;
3228  }
3229 
3230  /*
3231  * This is used to remember the number of blocks for all the relations
3232  * forks.
3233  */
3234  block = (BlockNumber (*)[MAX_FORKNUM + 1])
3235  palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
3236 
3237  /*
3238  * We can avoid scanning the entire buffer pool if we know the exact size
3239  * of each of the given relation forks. See DropRelFileNodeBuffers.
3240  */
3241  for (i = 0; i < n && cached; i++)
3242  {
3243  for (j = 0; j <= MAX_FORKNUM; j++)
3244  {
3245  /* Get the number of blocks for a relation's fork. */
3246  block[i][j] = smgrnblocks_cached(rels[i], j);
3247 
3248  /* We need to only consider the relation forks that exists. */
3249  if (block[i][j] == InvalidBlockNumber)
3250  {
3251  if (!smgrexists(rels[i], j))
3252  continue;
3253  cached = false;
3254  break;
3255  }
3256 
3257  /* calculate the total number of blocks to be invalidated */
3258  nBlocksToInvalidate += block[i][j];
3259  }
3260  }
3261 
3262  /*
3263  * We apply the optimization iff the total number of blocks to invalidate
3264  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3265  */
3266  if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3267  {
3268  for (i = 0; i < n; i++)
3269  {
3270  for (j = 0; j <= MAX_FORKNUM; j++)
3271  {
3272  /* ignore relation forks that doesn't exist */
3273  if (!BlockNumberIsValid(block[i][j]))
3274  continue;
3275 
3276  /* drop all the buffers for a particular relation fork */
3277  FindAndDropRelFileNodeBuffers(rels[i]->smgr_rnode.node,
3278  j, block[i][j], 0);
3279  }
3280  }
3281 
3282  pfree(block);
3283  pfree(rels);
3284  return;
3285  }
3286 
3287  pfree(block);
3288  nodes = palloc(sizeof(RelFileNode) * n); /* non-local relations */
3289  for (i = 0; i < n; i++)
3290  nodes[i] = rels[i]->smgr_rnode.node;
3291 
3292  /*
3293  * For low number of relations to drop just use a simple walk through, to
3294  * save the bsearch overhead. The threshold to use is rather a guess than
3295  * an exactly determined value, as it depends on many factors (CPU and RAM
3296  * speeds, amount of shared buffers etc.).
3297  */
3298  use_bsearch = n > RELS_BSEARCH_THRESHOLD;
3299 
3300  /* sort the list of rnodes if necessary */
3301  if (use_bsearch)
3302  pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
3303 
3304  for (i = 0; i < NBuffers; i++)
3305  {
3306  RelFileNode *rnode = NULL;
3307  BufferDesc *bufHdr = GetBufferDescriptor(i);
3308  uint32 buf_state;
3309 
3310  /*
3311  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3312  * and saves some cycles.
3313  */
3314 
3315  if (!use_bsearch)
3316  {
3317  int j;
3318 
3319  for (j = 0; j < n; j++)
3320  {
3321  if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
3322  {
3323  rnode = &nodes[j];
3324  break;
3325  }
3326  }
3327  }
3328  else
3329  {
3330  rnode = bsearch((const void *) &(bufHdr->tag.rnode),
3331  nodes, n, sizeof(RelFileNode),
3333  }
3334 
3335  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3336  if (rnode == NULL)
3337  continue;
3338 
3339  buf_state = LockBufHdr(bufHdr);
3340  if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
3341  InvalidateBuffer(bufHdr); /* releases spinlock */
3342  else
3343  UnlockBufHdr(bufHdr, buf_state);
3344  }
3345 
3346  pfree(nodes);
3347  pfree(rels);
3348 }
BackendId MyBackendId
Definition: globals.c:84
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1464
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:71
uint32 BlockNumber
Definition: block.h:31
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:79
static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3360
void DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
Definition: localbuf.c:373
void pfree(void *pointer)
Definition: mcxt.c:1169
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:572
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4576
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define InvalidBlockNumber
Definition: block.h:33
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
#define MAX_FORKNUM
Definition: relpath.h:55
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
dlist_node node
Definition: smgr.h:72

◆ FindAndDropRelFileNodeBuffers()

static void FindAndDropRelFileNodeBuffers ( RelFileNode  rnode,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 3360 of file bufmgr.c.

References buftag::blockNum, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), buftag::forkNum, GetBufferDescriptor, INIT_BUFFERTAG, InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeEquals, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

3363 {
3364  BlockNumber curBlock;
3365 
3366  for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
3367  {
3368  uint32 bufHash; /* hash value for tag */
3369  BufferTag bufTag; /* identity of requested block */
3370  LWLock *bufPartitionLock; /* buffer partition lock for it */
3371  int buf_id;
3372  BufferDesc *bufHdr;
3373  uint32 buf_state;
3374 
3375  /* create a tag so we can lookup the buffer */
3376  INIT_BUFFERTAG(bufTag, rnode, forkNum, curBlock);
3377 
3378  /* determine its hash code and partition lock ID */
3379  bufHash = BufTableHashCode(&bufTag);
3380  bufPartitionLock = BufMappingPartitionLock(bufHash);
3381 
3382  /* Check that it is in the buffer pool. If not, do nothing. */
3383  LWLockAcquire(bufPartitionLock, LW_SHARED);
3384  buf_id = BufTableLookup(&bufTag, bufHash);
3385  LWLockRelease(bufPartitionLock);
3386 
3387  if (buf_id < 0)
3388  continue;
3389 
3390  bufHdr = GetBufferDescriptor(buf_id);
3391 
3392  /*
3393  * We need to lock the buffer header and recheck if the buffer is
3394  * still associated with the same block because the buffer could be
3395  * evicted by some other backend loading blocks for a different
3396  * relation after we release lock on the BufMapping table.
3397  */
3398  buf_state = LockBufHdr(bufHdr);
3399 
3400  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
3401  bufHdr->tag.forkNum == forkNum &&
3402  bufHdr->tag.blockNum >= firstDelBlock)
3403  InvalidateBuffer(bufHdr); /* releases spinlock */
3404  else
3405  UnlockBufHdr(bufHdr, buf_state);
3406  }
3407 }
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
ForkNumber forkNum
Definition: buf_internals.h:94
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1464
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
uint32 BlockNumber
Definition: block.h:31
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln 
)
static

Definition at line 2818 of file bufmgr.c.

References ErrorContextCallback::arg, BufferUsage::blk_write_time, buftag::blockNum, BM_JUST_DIRTIED, BM_PERMANENT, BufferGetLSN, BufHdrGetBlock, ErrorContextCallback::callback, RelFileNode::dbNode, error_context_stack, buftag::forkNum, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBackendId, LockBufHdr(), RelFileNodeBackend::node, PageSetChecksumCopy(), pgBufferUsage, pgstat_count_buffer_write_time, ErrorContextCallback::previous, RelFileNode::relNode, buftag::rnode, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rnode, smgropen(), smgrwrite(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr, and XLogFlush().

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

2819 {
2820  XLogRecPtr recptr;
2821  ErrorContextCallback errcallback;
2822  instr_time io_start,
2823  io_time;
2824  Block bufBlock;
2825  char *bufToWrite;
2826  uint32 buf_state;
2827 
2828  /*
2829  * Try to start an I/O operation. If StartBufferIO returns false, then
2830  * someone else flushed the buffer before we could, so we need not do
2831  * anything.
2832  */
2833  if (!StartBufferIO(buf, false))
2834  return;
2835 
2836  /* Setup error traceback support for ereport() */
2838  errcallback.arg = (void *) buf;
2839  errcallback.previous = error_context_stack;
2840  error_context_stack = &errcallback;
2841 
2842  /* Find smgr relation for buffer */
2843  if (reln == NULL)
2844  reln = smgropen(buf->tag.rnode, InvalidBackendId);
2845 
2846  TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
2847  buf->tag.blockNum,
2848  reln->smgr_rnode.node.spcNode,
2849  reln->smgr_rnode.node.dbNode,
2850  reln->smgr_rnode.node.relNode);
2851 
2852  buf_state = LockBufHdr(buf);
2853 
2854  /*
2855  * Run PageGetLSN while holding header lock, since we don't have the
2856  * buffer locked exclusively in all cases.
2857  */
2858  recptr = BufferGetLSN(buf);
2859 
2860  /* To check if block content changes while flushing. - vadim 01/17/97 */
2861  buf_state &= ~BM_JUST_DIRTIED;
2862  UnlockBufHdr(buf, buf_state);
2863 
2864  /*
2865  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
2866  * rule that log updates must hit disk before any of the data-file changes
2867  * they describe do.
2868  *
2869  * However, this rule does not apply to unlogged relations, which will be
2870  * lost after a crash anyway. Most unlogged relation pages do not bear
2871  * LSNs since we never emit WAL records for them, and therefore flushing
2872  * up through the buffer LSN would be useless, but harmless. However,
2873  * GiST indexes use LSNs internally to track page-splits, and therefore
2874  * unlogged GiST pages bear "fake" LSNs generated by
2875  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
2876  * LSN counter could advance past the WAL insertion point; and if it did
2877  * happen, attempting to flush WAL through that location would fail, with
2878  * disastrous system-wide consequences. To make sure that can't happen,
2879  * skip the flush if the buffer isn't permanent.
2880  */
2881  if (buf_state & BM_PERMANENT)
2882  XLogFlush(recptr);
2883 
2884  /*
2885  * Now it's safe to write buffer to disk. Note that no one else should
2886  * have been able to write it while we were busy with log flushing because
2887  * only one process at a time can set the BM_IO_IN_PROGRESS bit.
2888  */
2889  bufBlock = BufHdrGetBlock(buf);
2890 
2891  /*
2892  * Update page checksum if desired. Since we have only shared lock on the
2893  * buffer, other processes might be updating hint bits in it, so we must
2894  * copy the page to private storage if we do checksumming.
2895  */
2896  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
2897 
2898  if (track_io_timing)
2899  INSTR_TIME_SET_CURRENT(io_start);
2900 
2901  /*
2902  * bufToWrite is either the shared buffer or a copy, as appropriate.
2903  */
2904  smgrwrite(reln,
2905  buf->tag.forkNum,
2906  buf->tag.blockNum,
2907  bufToWrite,
2908  false);
2909 
2910  if (track_io_timing)
2911  {
2912  INSTR_TIME_SET_CURRENT(io_time);
2913  INSTR_TIME_SUBTRACT(io_time, io_start);
2916  }
2917 
2919 
2920  /*
2921  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
2922  * end the BM_IO_IN_PROGRESS state.
2923  */
2924  TerminateBufferIO(buf, true, 0);
2925 
2926  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(buf->tag.forkNum,
2927  buf->tag.blockNum,
2928  reln->smgr_rnode.node.spcNode,
2929  reln->smgr_rnode.node.dbNode,
2930  reln->smgr_rnode.node.relNode);
2931 
2932  /* Pop the error context stack */
2933  error_context_stack = errcallback.previous;
2934 }
#define BM_PERMANENT
Definition: buf_internals.h:67
ForkNumber forkNum
Definition: buf_internals.h:94
struct timeval instr_time
Definition: instr_time.h:150
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1503
void(* callback)(void *arg)
Definition: elog.h:247
struct ErrorContextCallback * previous
Definition: elog.h:246
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2881
ErrorContextCallback * error_context_stack
Definition: elog.c:93
long shared_blks_written
Definition: instrument.h:24
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4410
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:158
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4461
#define InvalidBackendId
Definition: backendid.h:23
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
instr_time blk_write_time
Definition: instrument.h:32
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:1084
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
BufferTag tag
#define UnlockBufHdr(desc, s)
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:61
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4538
bool track_io_timing
Definition: bufmgr.c:135
Pointer Page
Definition: bufpage.h:78
BufferUsage pgBufferUsage
Definition: instrument.c:20
void * Block
Definition: bufmgr.h:24

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 3721 of file bufmgr.c.

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by dbase_redo().

3722 {
3723  int i;
3724  BufferDesc *bufHdr;
3725 
3726  /* Make sure we can handle the pin inside the loop */
3728 
3729  for (i = 0; i < NBuffers; i++)
3730  {
3731  uint32 buf_state;
3732 
3733  bufHdr = GetBufferDescriptor(i);
3734 
3735  /*
3736  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3737  * and saves some cycles.
3738  */
3739  if (bufHdr->tag.rnode.dbNode != dbid)
3740  continue;
3741 
3743 
3744  buf_state = LockBufHdr(bufHdr);
3745  if (bufHdr->tag.rnode.dbNode == dbid &&
3746  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3747  {
3748  PinBuffer_Locked(bufHdr);
3750  FlushBuffer(bufHdr, NULL);
3752  UnpinBuffer(bufHdr, true);
3753  }
3754  else
3755  UnlockBufHdr(bufHdr, buf_state);
3756  }
3757 }
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2818
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1831
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define BM_VALID
Definition: buf_internals.h:60
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1786
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 3764 of file bufmgr.c.

References Assert, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor, and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

3765 {
3766  BufferDesc *bufHdr;
3767 
3768  /* currently not needed, but no fundamental reason not to support */
3769  Assert(!BufferIsLocal(buffer));
3770 
3771  Assert(BufferIsPinned(buffer));
3772 
3773  bufHdr = GetBufferDescriptor(buffer - 1);
3774 
3776 
3777  FlushBuffer(bufHdr, NULL);
3778 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1920
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2818
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 3525 of file bufmgr.c.

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock, ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, i, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, RelationUsesLocalBuffers, RelFileNodeEquals, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, smgrwrite(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by heapam_relation_copy_data(), and index_copy_data().

3526 {
3527  int i;
3528  BufferDesc *bufHdr;
3529 
3530  /* Open rel at the smgr level if not already done */
3531  RelationOpenSmgr(rel);
3532 
3533  if (RelationUsesLocalBuffers(rel))
3534  {
3535  for (i = 0; i < NLocBuffer; i++)
3536  {
3537  uint32 buf_state;
3538 
3539  bufHdr = GetLocalBufferDescriptor(i);
3540  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3541  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
3542  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3543  {
3544  ErrorContextCallback errcallback;
3545  Page localpage;
3546 
3547  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
3548 
3549  /* Setup error traceback support for ereport() */
3551  errcallback.arg = (void *) bufHdr;
3552  errcallback.previous = error_context_stack;
3553  error_context_stack = &errcallback;
3554 
3555  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
3556 
3557  smgrwrite(rel->rd_smgr,
3558  bufHdr->tag.forkNum,
3559  bufHdr->tag.blockNum,
3560  localpage,
3561  false);
3562 
3563  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
3564  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
3565 
3566  /* Pop the error context stack */
3567  error_context_stack = errcallback.previous;
3568  }
3569  }
3570 
3571  return;
3572  }
3573 
3574  /* Make sure we can handle the pin inside the loop */
3576 
3577  for (i = 0; i < NBuffers; i++)
3578  {
3579  uint32 buf_state;
3580 
3581  bufHdr = GetBufferDescriptor(i);
3582 
3583  /*
3584  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3585  * and saves some cycles.
3586  */
3587  if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
3588  continue;
3589 
3591 
3592  buf_state = LockBufHdr(bufHdr);
3593  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3594  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3595  {
3596  PinBuffer_Locked(bufHdr);
3598  FlushBuffer(bufHdr, rel->rd_smgr);
3600  UnpinBuffer(bufHdr, true);
3601  }
3602  else
3603  UnlockBufHdr(bufHdr, buf_state);
3604  }
3605 }
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:64
ForkNumber forkNum
Definition: buf_internals.h:94
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4557
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
#define GetLocalBufferDescriptor(id)
#define BM_DIRTY
Definition: buf_internals.h:59
void(* callback)(void *arg)
Definition: elog.h:247
struct ErrorContextCallback * previous
Definition: elog.h:246
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2818
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
ErrorContextCallback * error_context_stack
Definition: elog.c:93
#define RelationOpenSmgr(relation)
Definition: rel.h:514
int NLocBuffer
Definition: localbuf.c:41
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1831
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define BM_VALID
Definition: buf_internals.h:60
RelFileNode rd_node
Definition: rel.h:55
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1786
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1532
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:579
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:277
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
pg_atomic_uint32 state
Pointer Page
Definition: bufpage.h:78
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 3617 of file bufmgr.c.

References Assert, BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, RelFileNodeBackend::node, palloc(), pfree(), pg_qsort(), PinBuffer_Locked(), RelFileNodeBackendIsTemp, RelFileNodeEquals, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, SMgrSortArray::rnode, rnode_comparator(), SMgrRelationData::smgr_rnode, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by smgrdosyncall().

3618 {
3619  int i;
3620  SMgrSortArray *srels;
3621  bool use_bsearch;
3622 
3623  if (nrels == 0)
3624  return;
3625 
3626  /* fill-in array for qsort */
3627  srels = palloc(sizeof(SMgrSortArray) * nrels);
3628 
3629  for (i = 0; i < nrels; i++)
3630  {
3631  Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
3632 
3633  srels[i].rnode = smgrs[i]->smgr_rnode.node;
3634  srels[i].srel = smgrs[i];
3635  }
3636 
3637  /*
3638  * Save the bsearch overhead for low number of relations to sync. See
3639  * DropRelFileNodesAllBuffers for details.
3640  */
3641  use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
3642 
3643  /* sort the list of SMgrRelations if necessary */
3644  if (use_bsearch)
3645  pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
3646 
3647  /* Make sure we can handle the pin inside the loop */
3649 
3650  for (i = 0; i < NBuffers; i++)
3651  {
3652  SMgrSortArray *srelent = NULL;
3653  BufferDesc *bufHdr = GetBufferDescriptor(i);
3654  uint32 buf_state;
3655 
3656  /*
3657  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3658  * and saves some cycles.
3659  */
3660 
3661  if (!use_bsearch)
3662  {
3663  int j;
3664 
3665  for (j = 0; j < nrels; j++)
3666  {
3667  if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
3668  {
3669  srelent = &srels[j];
3670  break;
3671  }
3672  }
3673 
3674  }
3675  else
3676  {
3677  srelent = bsearch((const void *) &(bufHdr->tag.rnode),
3678  srels, nrels, sizeof(SMgrSortArray),
3680  }
3681 
3682  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3683  if (srelent == NULL)
3684  continue;
3685 
3687 
3688  buf_state = LockBufHdr(bufHdr);
3689  if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
3690  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3691  {
3692  PinBuffer_Locked(bufHdr);
3694  FlushBuffer(bufHdr, srelent->srel);
3696  UnpinBuffer(bufHdr, true);
3697  }
3698  else
3699  UnlockBufHdr(bufHdr, buf_state);
3700  }
3701 
3702  pfree(srels);
3703 }
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:71
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2818
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
void pfree(void *pointer)
Definition: mcxt.c:1169
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
SMgrRelation srel
Definition: bufmgr.c:128
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:441
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1831
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4576
#define BM_VALID
Definition: buf_internals.h:60
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define Assert(condition)
Definition: c.h:804
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1786
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:135
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
RelFileNode rnode
Definition: bufmgr.c:127
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 410 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, and REFCOUNT_ARRAY_ENTRIES.

Referenced by UnpinBuffer().

411 {
412  Assert(ref->refcount == 0);
413 
414  if (ref >= &PrivateRefCountArray[0] &&
416  {
417  ref->buffer = InvalidBuffer;
418 
419  /*
420  * Mark the just used entry as reserved - in many scenarios that
421  * allows us to avoid ever having to search the array/hash for free
422  * entries.
423  */
424  ReservedRefCountEntry = ref;
425  }
426  else
427  {
428  bool found;
429  Buffer buffer = ref->buffer;
430 
432  (void *) &buffer,
433  HASH_REMOVE,
434  &found);
435  Assert(found);
438  }
439 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int Buffer
Definition: buf.h:23

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 387 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsValid, GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), PrintBufferLeakWarning(), and ReadRecentBuffer().

388 {
390 
391  Assert(BufferIsValid(buffer));
392  Assert(!BufferIsLocal(buffer));
393 
394  /*
395  * Not moving the entry - that's ok for the current users, but we might
396  * want to change this one day.
397  */
398  ref = GetPrivateRefCountEntry(buffer, false);
399 
400  if (ref == NULL)
401  return 0;
402  return ref->refcount;
403 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 307 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid, free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBuffer().

308 {
310  int i;
311 
312  Assert(BufferIsValid(buffer));
313  Assert(!BufferIsLocal(buffer));
314 
315  /*
316  * First search for references in the array, that'll be sufficient in the
317  * majority of cases.
318  */
319  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
320  {
321  res = &PrivateRefCountArray[i];
322 
323  if (res->buffer == buffer)
324  return res;
325  }
326 
327  /*
328  * By here we know that the buffer, if already pinned, isn't residing in
329  * the array.
330  *
331  * Only look up the buffer in the hashtable if we've previously overflowed
332  * into it.
333  */
334  if (PrivateRefCountOverflowed == 0)
335  return NULL;
336 
338  (void *) &buffer,
339  HASH_FIND,
340  NULL);
341 
342  if (res == NULL)
343  return NULL;
344  else if (!do_move)
345  {
346  /* caller doesn't want us to move the hash entry into the array */
347  return res;
348  }
349  else
350  {
351  /* move buffer from hashtable into the free array slot */
352  bool found;
354 
355  /* Ensure there's a free array slot */
357 
358  /* Use up the reserved slot */
359  Assert(ReservedRefCountEntry != NULL);
360  free = ReservedRefCountEntry;
361  ReservedRefCountEntry = NULL;
362  Assert(free->buffer == InvalidBuffer);
363 
364  /* and fill it */
365  free->buffer = buffer;
366  free->refcount = res->refcount;
367 
368  /* delete from hashtable */
370  (void *) &buffer,
371  HASH_REMOVE,
372  &found);
373  Assert(found);
376 
377  return free;
378  }
379 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define free(a)
Definition: header.h:65
#define Assert(condition)
Definition: c.h:804
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int i
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 4231 of file bufmgr.c.

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and RecoveryConflictInterrupt().

4232 {
4233  int bufid = GetStartupBufferPinWaitBufId();
4234 
4235  /*
4236  * If we get woken slowly then it's possible that the Startup process was
4237  * already woken by other backends before we got here. Also possible that
4238  * we get here by multiple interrupts or interrupts at inappropriate
4239  * times, so make sure we do nothing if the bufid is not set.
4240  */
4241  if (bufid < 0)
4242  return false;
4243 
4244  if (GetPrivateRefCount(bufid + 1) > 0)
4245  return true;
4246 
4247  return false;
4248 }
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:662

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 3822 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlargeBuffers(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

3823 {
3824  Assert(BufferIsPinned(buffer));
3826  if (BufferIsLocal(buffer))
3827  LocalRefCount[-buffer - 1]++;
3828  else
3829  {
3830  PrivateRefCountEntry *ref;
3831 
3832  ref = GetPrivateRefCountEntry(buffer, true);
3833  Assert(ref != NULL);
3834  ref->refcount++;
3835  }
3837 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:946
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
int32 * LocalRefCount
Definition: localbuf.c:45

◆ InitBufferPoolAccess()

void InitBufferPoolAccess ( void  )

Definition at line 2598 of file bufmgr.c.

References HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, and PrivateRefCountArray.

Referenced by BaseInit().

2599 {
2600  HASHCTL hash_ctl;
2601 
2602  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
2603 
2604  hash_ctl.keysize = sizeof(int32);
2605  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
2606 
2607  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
2608  HASH_ELEM | HASH_BLOBS);
2609 }
struct PrivateRefCountEntry PrivateRefCountEntry
#define HASH_ELEM
Definition: hsearch.h:95
Size entrysize
Definition: hsearch.h:76
signed int int32
Definition: c.h:429
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:349
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
#define HASH_BLOBS
Definition: hsearch.h:97
Size keysize
Definition: hsearch.h:75
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198

◆ InitBufferPoolBackend()

void InitBufferPoolBackend ( void  )

Definition at line 2621 of file bufmgr.c.

References AtProcExit_Buffers(), and on_shmem_exit().

Referenced by AuxiliaryProcessMain(), and InitPostgres().

2622 {
2624 }
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:2631

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1464 of file bufmgr.c.

References Assert, BM_LOCKED, BM_TAG_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer, BUFFERTAGS_EQUAL, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), CLEAR_BUFFERTAG, elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr, and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), and FindAndDropRelFileNodeBuffers().

1465 {
1466  BufferTag oldTag;
1467  uint32 oldHash; /* hash value for oldTag */
1468  LWLock *oldPartitionLock; /* buffer partition lock for it */
1469  uint32 oldFlags;
1470  uint32 buf_state;
1471 
1472  /* Save the original buffer tag before dropping the spinlock */
1473  oldTag = buf->tag;
1474 
1475  buf_state = pg_atomic_read_u32(&buf->state);
1476  Assert(buf_state & BM_LOCKED);
1477  UnlockBufHdr(buf, buf_state);
1478 
1479  /*
1480  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1481  * worth storing the hashcode in BufferDesc so we need not recompute it
1482  * here? Probably not.
1483  */
1484  oldHash = BufTableHashCode(&oldTag);
1485  oldPartitionLock = BufMappingPartitionLock(oldHash);
1486 
1487 retry:
1488 
1489  /*
1490  * Acquire exclusive mapping lock in preparation for changing the buffer's
1491  * association.
1492  */
1493  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1494 
1495  /* Re-lock the buffer header */
1496  buf_state = LockBufHdr(buf);
1497 
1498  /* If it's changed while we were waiting for lock, do nothing */
1499  if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
1500  {
1501  UnlockBufHdr(buf, buf_state);
1502  LWLockRelease(oldPartitionLock);
1503  return;
1504  }
1505 
1506  /*
1507  * We assume the only reason for it to be pinned is that someone else is
1508  * flushing the page out. Wait for them to finish. (This could be an
1509  * infinite loop if the refcount is messed up... it would be nice to time
1510  * out after awhile, but there seems no way to be sure how many loops may
1511  * be needed. Note that if the other guy has pinned the buffer but not
1512  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1513  * be busy-looping here.)
1514  */
1515  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1516  {
1517  UnlockBufHdr(buf, buf_state);
1518  LWLockRelease(oldPartitionLock);
1519  /* safety check: should definitely not be our *own* pin */
1521  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1522  WaitIO(buf);
1523  goto retry;
1524  }
1525 
1526  /*
1527  * Clear out the buffer's tag and flags. We must do this to ensure that
1528  * linear scans of the buffer array don't think the buffer is valid.
1529  */
1530  oldFlags = buf_state & BUF_FLAG_MASK;
1531  CLEAR_BUFFERTAG(buf->tag);
1532  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1533  UnlockBufHdr(buf, buf_state);
1534 
1535  /*
1536  * Remove the buffer from the lookup hashtable, if it was in there.
1537  */
1538  if (oldFlags & BM_TAG_VALID)
1539  BufTableDelete(&oldTag, oldHash);
1540 
1541  /*
1542  * Done with mapping lock.
1543  */
1544  LWLockRelease(oldPartitionLock);
1545 
1546  /*
1547  * Insert the buffer at the head of the list of free buffers.
1548  */
1549  StrategyFreeBuffer(buf);
1550 }
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:4367
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:364
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
#define ERROR
Definition: elog.h:46
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BUFFERTAGS_EQUAL(a, b)
#define BM_LOCKED
Definition: buf_internals.h:58
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define Assert(condition)
Definition: c.h:804
#define CLEAR_BUFFERTAG(a)
Definition: buf_internals.h:98
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
#define BufferDescriptorGetBuffer(bdesc)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
BufferTag tag
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 4313 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr.

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), hash_xlog_split_allocate_page(), and hashbucketcleanup().

4314 {
4315  BufferDesc *bufHdr;
4316  uint32 buf_state;
4317 
4318  Assert(BufferIsValid(buffer));
4319 
4320  if (BufferIsLocal(buffer))
4321  {
4322  /* There should be exactly one pin */
4323  if (LocalRefCount[-buffer - 1] != 1)
4324  return false;
4325  /* Nobody else to wait for */
4326  return true;
4327  }
4328 
4329  /* There should be exactly one local pin */
4330  if (GetPrivateRefCount(buffer) != 1)
4331  return false;
4332 
4333  bufHdr = GetBufferDescriptor(buffer - 1);
4334 
4335  /* caller must hold exclusive lock on buffer */
4337  LW_EXCLUSIVE));
4338 
4339  buf_state = LockBufHdr(bufHdr);
4340 
4341  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4342  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4343  {
4344  /* pincount is OK. */
4345  UnlockBufHdr(bufHdr, buf_state);
4346  return true;
4347  }
4348 
4349  UnlockBufHdr(bufHdr, buf_state);
4350  return false;
4351 }
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1938
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext context)

Definition at line 4791 of file bufmgr.c.

References buftag::blockNum, cur, buftag::forkNum, i, InvalidBackendId, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, RelFileNodeEquals, buftag::rnode, smgropen(), smgrwriteback(), and PendingWriteback::tag.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

4792 {
4793  int i;
4794 
4795  if (context->nr_pending == 0)
4796  return;
4797 
4798  /*
4799  * Executing the writes in-order can make them a lot faster, and allows to
4800  * merge writeback requests to consecutive blocks into larger writebacks.
4801  */
4802  sort_pending_writebacks(context->pending_writebacks, context->nr_pending);
4803 
4804  /*
4805  * Coalesce neighbouring writes, but nothing else. For that we iterate
4806  * through the, now sorted, array of pending flushes, and look forward to
4807  * find all neighbouring (or identical) writes.
4808  */
4809  for (i = 0; i < context->nr_pending; i++)
4810  {
4813  SMgrRelation reln;
4814  int ahead;
4815  BufferTag tag;
4816  Size nblocks = 1;
4817 
4818  cur = &context->pending_writebacks[i];
4819  tag = cur->tag;
4820 
4821  /*
4822  * Peek ahead, into following writeback requests, to see if they can
4823  * be combined with the current one.
4824  */
4825  for (ahead = 0; i + ahead + 1 < context->nr_pending; ahead++)
4826  {
4827  next = &context->pending_writebacks[i + ahead + 1];
4828 
4829  /* different file, stop */
4830  if (!RelFileNodeEquals(cur->tag.rnode, next->tag.rnode) ||
4831  cur->tag.forkNum != next->tag.forkNum)
4832  break;
4833 
4834  /* ok, block queued twice, skip */
4835  if (cur->tag.blockNum == next->tag.blockNum)
4836  continue;
4837 
4838  /* only merge consecutive writes */
4839  if (cur->tag.blockNum + 1 != next->tag.blockNum)
4840  break;
4841 
4842  nblocks++;
4843  cur = next;
4844  }
4845 
4846  i += ahead;
4847 
4848  /* and finally tell the kernel to write the data to storage */
4849  reln = smgropen(tag.rnode, InvalidBackendId);
4850  smgrwriteback(reln, tag.forkNum, tag.blockNum, nblocks);
4851  }
4852 
4853  context->nr_pending = 0;
4854 }
static int32 next
Definition: blutils.c:219
ForkNumber forkNum
Definition: buf_internals.h:94
struct cursor * cur
Definition: ecpg.c:28
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:536
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
#define InvalidBackendId
Definition: backendid.h:23
size_t Size
Definition: c.h:540
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
int i
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 4557 of file bufmgr.c.

References buftag::blockNum, errcontext, buftag::forkNum, MyBackendId, pfree(), relpathbackend, buftag::rnode, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

4558 {
4559  BufferDesc *bufHdr = (BufferDesc *) arg;
4560 
4561  if (bufHdr != NULL)
4562  {
4563  char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId,
4564  bufHdr->tag.forkNum);
4565 
4566  errcontext("writing block %u of relation %s",
4567  bufHdr->tag.blockNum, path);
4568  pfree(path);
4569  }
4570 }
BackendId MyBackendId
Definition: globals.c:84
ForkNumber forkNum
Definition: buf_internals.h:94
void pfree(void *pointer)
Definition: mcxt.c:1169
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define errcontext
Definition: elog.h:204
void * arg
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:78

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 4023 of file bufmgr.c.

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), and LWLockRelease().

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_getnewbuf(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), brinbuild(), brinbuildempty(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), checkXLogConsistency(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), fill_seq_with_data(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbuildempty(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgetpage(), heapgettup(), initBloomState(), lazy_scan_heap(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

4024 {
4025  BufferDesc *buf;
4026 
4027  Assert(BufferIsPinned(buffer));
4028  if (BufferIsLocal(buffer))
4029  return; /* local buffers need no lock */
4030 
4031  buf = GetBufferDescriptor(buffer - 1);
4032 
4033  if (mode == BUFFER_LOCK_UNLOCK)
4035  else if (mode == BUFFER_LOCK_SHARE)
4037  else if (mode == BUFFER_LOCK_EXCLUSIVE)
4039  else
4040  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
4041 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
static PgChecksumMode mode
Definition: pg_checksums.c:61
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
#define ERROR
Definition: elog.h:46
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
#define elog(elevel,...)
Definition: elog.h:232
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 4080 of file bufmgr.c.

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, DeadlockTimeout, elog, ERROR, get_ps_display(), GetBufferDescriptor, GetCurrentTimestamp(), GetPrivateRefCount(), InHotStandby, LocalRefCount, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcPid, now(), palloc(), pfree(), PG_WAIT_BUFFER_PIN, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr, update_process_title, and BufferDesc::wait_backend_pid.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), ReadBuffer_common(), and XLogReadBufferForRedoExtended().

4081 {
4082  BufferDesc *bufHdr;
4083  char *new_status = NULL;
4084  TimestampTz waitStart = 0;
4085  bool logged_recovery_conflict = false;
4086 
4087  Assert(BufferIsPinned(buffer));
4088  Assert(PinCountWaitBuf == NULL);
4089 
4090  if (BufferIsLocal(buffer))
4091  {
4092  /* There should be exactly one pin */
4093  if (LocalRefCount[-buffer - 1] != 1)
4094  elog(ERROR, "incorrect local pin count: %d",
4095  LocalRefCount[-buffer - 1]);
4096  /* Nobody else to wait for */
4097  return;
4098  }
4099 
4100  /* There should be exactly one local pin */
4101  if (GetPrivateRefCount(buffer) != 1)
4102  elog(ERROR, "incorrect local pin count: %d",
4103  GetPrivateRefCount(buffer));
4104 
4105  bufHdr = GetBufferDescriptor(buffer - 1);
4106 
4107  for (;;)
4108  {
4109  uint32 buf_state;
4110 
4111  /* Try to acquire lock */
4113  buf_state = LockBufHdr(bufHdr);
4114 
4115  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4116  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4117  {
4118  /* Successfully acquired exclusive lock with pincount 1 */
4119  UnlockBufHdr(bufHdr, buf_state);
4120 
4121  /*
4122  * Emit the log message if recovery conflict on buffer pin was
4123  * resolved but the startup process waited longer than
4124  * deadlock_timeout for it.
4125  */
4126  if (logged_recovery_conflict)
4128  waitStart, GetCurrentTimestamp(),
4129  NULL, false);
4130 
4131  /* Report change to non-waiting status */
4132  if (new_status)
4133  {
4134  set_ps_display(new_status);
4135  pfree(new_status);
4136  }
4137  return;
4138  }
4139  /* Failed, so mark myself as waiting for pincount 1 */
4140  if (buf_state & BM_PIN_COUNT_WAITER)
4141  {
4142  UnlockBufHdr(bufHdr, buf_state);
4143  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4144  elog(ERROR, "multiple backends attempting to wait for pincount 1");
4145  }
4146  bufHdr->wait_backend_pid = MyProcPid;
4147  PinCountWaitBuf = bufHdr;
4148  buf_state |= BM_PIN_COUNT_WAITER;
4149  UnlockBufHdr(bufHdr, buf_state);
4150  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4151 
4152  /* Wait to be signaled by UnpinBuffer() */
4153  if (InHotStandby)
4154  {
4155  /* Report change to waiting status */
4156  if (update_process_title && new_status == NULL)
4157  {
4158  const char *old_status;
4159  int len;
4160 
4161  old_status = get_ps_display(&len);
4162  new_status = (char *) palloc(len + 8 + 1);
4163  memcpy(new_status, old_status, len);
4164  strcpy(new_status + len, " waiting");
4165  set_ps_display(new_status);
4166  new_status[len] = '\0'; /* truncate off " waiting" */
4167  }
4168 
4169  /*
4170  * Emit the log message if the startup process is waiting longer
4171  * than deadlock_timeout for recovery conflict on buffer pin.
4172  *
4173  * Skip this if first time through because the startup process has
4174  * not started waiting yet in this case. So, the wait start
4175  * timestamp is set after this logic.
4176  */
4177  if (waitStart != 0 && !logged_recovery_conflict)
4178  {
4180 
4181  if (TimestampDifferenceExceeds(waitStart, now,
4182  DeadlockTimeout))
4183  {
4185  waitStart, now, NULL, true);
4186  logged_recovery_conflict = true;
4187  }
4188  }
4189 
4190  /*
4191  * Set the wait start timestamp if logging is enabled and first
4192  * time through.
4193  */
4194  if (log_recovery_conflict_waits && waitStart == 0)
4195  waitStart = GetCurrentTimestamp();
4196 
4197  /* Publish the bufid that Startup process waits on */
4198  SetStartupBufferPinWaitBufId(buffer - 1);
4199  /* Set alarm and then wait to be signaled by UnpinBuffer() */
4201  /* Reset the published bufid */
4203  }
4204  else
4206 
4207  /*
4208  * Remove flag marking us as waiter. Normally this will not be set
4209  * anymore, but ProcWaitForSignal() can return for other signals as
4210  * well. We take care to only reset the flag if we're the waiter, as
4211  * theoretically another backend could have started waiting. That's
4212  * impossible with the current usages due to table level locking, but
4213  * better be safe.
4214  */
4215  buf_state = LockBufHdr(bufHdr);
4216  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
4217  bufHdr->wait_backend_pid == MyProcPid)
4218  buf_state &= ~BM_PIN_COUNT_WAITER;
4219  UnlockBufHdr(bufHdr, buf_state);
4220 
4221  PinCountWaitBuf = NULL;
4222  /* Loop back and try again */
4223  }
4224 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
int MyProcPid
Definition: globals.c:43
int wait_backend_pid
bool update_process_title
Definition: ps_status.c:36
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
int64 TimestampTz
Definition: timestamp.h:39
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:249
#define InHotStandby
Definition: xlog.h:74
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1711
void set_ps_display(const char *activity)
Definition: ps_status.c:349
void pfree(void *pointer)
Definition: mcxt.c:1169
#define ERROR
Definition: elog.h:46
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:753
const char * get_ps_display(int *displen)
Definition: ps_status.c:430
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:650
#define GetBufferDescriptor(id)
#define PG_WAIT_BUFFER_PIN
Definition: wait_event.h:20
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
bool log_recovery_conflict_waits
Definition: standby.c:42
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1896
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4023
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
void * palloc(Size size)
Definition: mcxt.c:1062
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:165
int DeadlockTimeout
Definition: proc.c:60
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1544
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:65

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 4603 of file bufmgr.c.

References BM_LOCKED, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), FindAndDropRelFileNodeBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadBuffer_common(), ReadRecentBuffer(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBuffer(), and WaitIO().

4604 {
4605  SpinDelayStatus delayStatus;
4606  uint32 old_buf_state;
4607 
4608  init_local_spin_delay(&delayStatus);
4609 
4610  while (true)
4611  {
4612  /* set BM_LOCKED flag */
4613  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
4614  /* if it wasn't set before we're OK */
4615  if (!(old_buf_state & BM_LOCKED))
4616  break;
4617  perform_spin_delay(&delayStatus);
4618  }
4619  finish_spin_delay(&delayStatus);
4620  return old_buf_state | BM_LOCKED;
4621 }
#define init_local_spin_delay(status)
Definition: s_lock.h:1043
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:174
unsigned int uint32
Definition: c.h:441
#define BM_LOCKED
Definition: buf_internals.h:58
pg_atomic_uint32 state
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:372
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:124

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 1562 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, BufferIsValid, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newroot(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune(), heap_xlog_update(), heap_xlog_vacuum(), heap_xlog_visible(), lazy_scan_heap(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

1563 {
1564  BufferDesc *bufHdr;
1565  uint32 buf_state;
1566  uint32 old_buf_state;
1567 
1568  if (!BufferIsValid(buffer))
1569  elog(ERROR, "bad buffer ID: %d", buffer);
1570 
1571  if (BufferIsLocal(buffer))
1572  {
1573  MarkLocalBufferDirty(buffer);
1574  return;
1575  }
1576 
1577  bufHdr = GetBufferDescriptor(buffer - 1);
1578 
1579  Assert(BufferIsPinned(buffer));
1581  LW_EXCLUSIVE));
1582 
1583  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
1584  for (;;)
1585  {
1586  if (old_buf_state & BM_LOCKED)
1587  old_buf_state = WaitBufHdrUnlocked(bufHdr);
1588 
1589  buf_state = old_buf_state;
1590 
1591  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1592  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
1593 
1594  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
1595  buf_state))
1596  break;
1597  }
1598 
1599  /*
1600  * If the buffer was not dirty already, do vacuum accounting.
1601  */
1602  if (!(old_buf_state & BM_DIRTY))
1603  {
1604  VacuumPageDirty++;
1606  if (VacuumCostActive)
1608  }
1609 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1938
int VacuumCostBalance
Definition: globals.c:151
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
int64 VacuumPageDirty
Definition: globals.c:149
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:143
#define ERROR
Definition: elog.h:46
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
long shared_blks_dirtied
Definition: instrument.h:23
unsigned int uint32
Definition: c.h:441
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
#define BM_LOCKED
Definition: buf_internals.h:58
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4631
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:152
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 3854 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferGetPage, BufferIsLocal, BufferIsValid, PGPROC::delayChkpt, elog, ERROR, GetBufferDescriptor, GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN, pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileNodeSkippingWAL(), buftag::rnode, BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

3855 {
3856  BufferDesc *bufHdr;
3857  Page page = BufferGetPage(buffer);
3858 
3859  if (!BufferIsValid(buffer))
3860  elog(ERROR, "bad buffer ID: %d", buffer);
3861 
3862  if (BufferIsLocal(buffer))
3863  {
3864  MarkLocalBufferDirty(buffer);
3865  return;
3866  }
3867 
3868  bufHdr = GetBufferDescriptor(buffer - 1);
3869 
3870  Assert(GetPrivateRefCount(buffer) > 0);
3871  /* here, either share or exclusive lock is OK */
3873 
3874  /*
3875  * This routine might get called many times on the same page, if we are
3876  * making the first scan after commit of an xact that added/deleted many
3877  * tuples. So, be as quick as we can if the buffer is already dirty. We
3878  * do this by not acquiring spinlock if it looks like the status bits are
3879  * already set. Since we make this test unlocked, there's a chance we
3880  * might fail to notice that the flags have just been cleared, and failed
3881  * to reset them, due to memory-ordering issues. But since this function
3882  * is only intended to be used in cases where failing to write out the
3883  * data would be harmless anyway, it doesn't really matter.
3884  */
3885  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
3887  {
3889  bool dirtied = false;
3890  bool delayChkpt = false;
3891  uint32 buf_state;
3892 
3893  /*
3894  * If we need to protect hint bit updates from torn writes, WAL-log a
3895  * full page image of the page. This full page image is only necessary
3896  * if the hint bit update is the first change to the page since the
3897  * last checkpoint.
3898  *
3899  * We don't check full_page_writes here because that logic is included
3900  * when we call XLogInsert() since the value changes dynamically.
3901  */
3902  if (XLogHintBitIsNeeded() &&
3903  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
3904  {
3905  /*
3906  * If we must not write WAL, due to a relfilenode-specific
3907  * condition or being in recovery, don't dirty the page. We can
3908  * set the hint, just not dirty the page as a result so the hint
3909  * is lost when we evict the page or shutdown.
3910  *
3911  * See src/backend/storage/page/README for longer discussion.
3912  */
3913  if (RecoveryInProgress() ||
3914  RelFileNodeSkippingWAL(bufHdr->tag.rnode))
3915  return;
3916 
3917  /*
3918  * If the block is already dirty because we either made a change
3919  * or set a hint already, then we don't need to write a full page
3920  * image. Note that aggressive cleaning of blocks dirtied by hint
3921  * bit setting would increase the call rate. Bulk setting of hint
3922  * bits would reduce the call rate...
3923  *
3924  * We must issue the WAL record before we mark the buffer dirty.
3925  * Otherwise we might write the page before we write the WAL. That
3926  * causes a race condition, since a checkpoint might occur between
3927  * writing the WAL record and marking the buffer dirty. We solve
3928  * that with a kluge, but one that is already in use during
3929  * transaction commit to prevent race conditions. Basically, we
3930  * simply prevent the checkpoint WAL record from being written
3931  * until we have marked the buffer dirty. We don't start the
3932  * checkpoint flush until we have marked dirty, so our checkpoint
3933  * must flush the change to disk successfully or the checkpoint
3934  * never gets written, so crash recovery will fix.
3935  *
3936  * It's possible we may enter here without an xid, so it is
3937  * essential that CreateCheckpoint waits for virtual transactions
3938  * rather than full transactionids.
3939  */
3940  MyProc->delayChkpt = delayChkpt = true;
3941  lsn = XLogSaveBufferForHint(buffer, buffer_std);
3942  }
3943 
3944  buf_state = LockBufHdr(bufHdr);
3945 
3946  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3947 
3948  if (!(buf_state & BM_DIRTY))
3949  {
3950  dirtied = true; /* Means "will be dirtied by this action" */
3951 
3952  /*
3953  * Set the page LSN if we wrote a backup block. We aren't supposed
3954  * to set this when only holding a share lock but as long as we
3955  * serialise it somehow we're OK. We choose to set LSN while
3956  * holding the buffer header lock, which causes any reader of an
3957  * LSN who holds only a share lock to also obtain a buffer header
3958  * lock before using PageGetLSN(), which is enforced in
3959  * BufferGetLSNAtomic().
3960  *
3961  * If checksums are enabled, you might think we should reset the
3962  * checksum here. That will happen when the page is written
3963  * sometime later in this checkpoint cycle.
3964  */
3965  if (!XLogRecPtrIsInvalid(lsn))
3966  PageSetLSN(page, lsn);
3967  }
3968 
3969  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
3970  UnlockBufHdr(bufHdr, buf_state);
3971 
3972  if (delayChkpt)
3973  MyProc->delayChkpt = false;
3974 
3975  if (dirtied)
3976  {
3977  VacuumPageDirty++;
3979  if (VacuumCostActive)
3981  }
3982  }
3983 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define BM_PERMANENT
Definition: buf_internals.h:67
int VacuumCostBalance
Definition: globals.c:151
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1920
PGPROC * MyProc
Definition: proc.c:68
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:917
int64 VacuumPageDirty
Definition: globals.c:149
bool RecoveryInProgress(void)
Definition: xlog.c:8237
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:143
#define ERROR
Definition: elog.h:46
bool delayChkpt
Definition: proc.h:187
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
long shared_blks_dirtied
Definition: instrument.h:23
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4603
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
RelFileNode rnode
Definition: buf_internals.h:93
bool RelFileNodeSkippingWAL(RelFileNode rnode)
Definition: storage.c:497
BufferTag tag
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define XLogHintBitIsNeeded()
Definition: xlog.h:212
Pointer Page
Definition: bufpage.h:78
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:152
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 281 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, PrivateRefCountEntry::refcount, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

282 {
284 
285  /* only allowed to be called when a reservation has been made */
286  Assert(ReservedRefCountEntry != NULL);
287 
288  /* use up the reserved entry */
289  res = ReservedRefCountEntry;
290  ReservedRefCountEntry = NULL;
291 
292  /* and fill it */
293  res->buffer = buffer;
294  res->refcount = 0;
295 
296  return res;
297 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define Assert(condition)
Definition: c.h:804

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 1683 of file bufmgr.c.

References Assert, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservePrivateRefCountEntry(), ResourceOwnerRememberBuffer(), BufferDesc::state, VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), and ReadRecentBuffer().

1684 {
1686  bool result;
1687  PrivateRefCountEntry *ref;
1688 
1689  ref = GetPrivateRefCountEntry(b, true);
1690 
1691  if (ref == NULL)
1692  {
1693  uint32 buf_state;
1694  uint32 old_buf_state;
1695 
1697  ref = NewPrivateRefCountEntry(b);
1698 
1699  old_buf_state = pg_atomic_read_u32(&buf->state);
1700  for (;;)
1701  {
1702  if (old_buf_state & BM_LOCKED)
1703  old_buf_state = WaitBufHdrUnlocked(buf);
1704 
1705  buf_state = old_buf_state;
1706 
1707  /* increase refcount */
1708  buf_state += BUF_REFCOUNT_ONE;
1709 
1710  if (strategy == NULL)
1711  {
1712  /* Default case: increase usagecount unless already max. */
1714  buf_state += BUF_USAGECOUNT_ONE;
1715  }
1716  else
1717  {
1718  /*
1719  * Ring buffers shouldn't evict others from pool. Thus we
1720  * don't make usagecount more than 1.
1721  */
1722  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
1723  buf_state += BUF_USAGECOUNT_ONE;
1724  }
1725 
1726  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
1727  buf_state))
1728  {
1729  result = (buf_state & BM_VALID) != 0;
1730 
1731  /*
1732  * Assume that we acquired a buffer pin for the purposes of
1733  * Valgrind buffer client checks (even in !result case) to
1734  * keep things simple. Buffers that are unsafe to access are
1735  * not generally guaranteed to be marked undefined or
1736  * non-accessible in any case.
1737  */
1739  break;
1740  }
1741  }
1742  }
1743  else
1744  {
1745  /*
1746  * If we previously pinned the buffer, it must surely be valid.
1747  *
1748  * Note: We deliberately avoid a Valgrind client request here.
1749  * Individual access methods can optionally superimpose buffer page
1750  * client requests on top of our client requests to enforce that
1751  * buffers are only accessed while locked (and pinned). It's possible
1752  * that the buffer page is legitimately non-accessible here. We
1753  * cannot meddle with that.
1754  */
1755  result = true;
1756  }
1757 
1758  ref->refcount++;
1759  Assert(ref->refcount > 0);
1761  return result;
1762 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:281
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
unsigned int uint32
Definition: c.h:441
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define BM_VALID
Definition: buf_internals.h:60
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
int result
Definition: header.h:19
#define Assert(condition)
Definition: c.h:804
#define BufferDescriptorGetBuffer(bdesc)
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4631
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:77
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
pg_atomic_uint32 state
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:50
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 1786 of file bufmgr.c.

References Assert, BM_LOCKED, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), BufferDesc::state, UnlockBufHdr, and VALGRIND_MAKE_MEM_DEFINED.

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), ReadRecentBuffer(), and SyncOneBuffer().

1787 {
1788  Buffer b;
1789  PrivateRefCountEntry *ref;
1790  uint32 buf_state;
1791 
1792  /*
1793  * As explained, We don't expect any preexisting pins. That allows us to
1794  * manipulate the PrivateRefCount after releasing the spinlock
1795  */
1797 
1798  /*
1799  * Buffer can't have a preexisting pin, so mark its page as defined to
1800  * Valgrind (this is similar to the PinBuffer() case where the backend
1801  * doesn't already have a buffer pin)
1802  */
1804 
1805  /*
1806  * Since we hold the buffer spinlock, we can update the buffer state and
1807  * release the lock in one operation.
1808  */
1809  buf_state = pg_atomic_read_u32(&buf->state);
1810  Assert(buf_state & BM_LOCKED);
1811  buf_state += BUF_REFCOUNT_ONE;
1812  UnlockBufHdr(buf, buf_state);
1813 
1814  b = BufferDescriptorGetBuffer(buf);
1815 
1816  ref = NewPrivateRefCountEntry(b);
1817  ref->refcount++;
1818 
1820 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:959
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:281
unsigned int uint32
Definition: c.h:441
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
#define Assert(condition)
Definition: c.h:804
#define BufferDescriptorGetBuffer(bdesc)
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 587 of file bufmgr.c.

References Assert, BlockNumberIsValid, ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RelationData::rd_smgr, RELATION_IS_OTHER_TEMP, RelationIsValid, RelationOpenSmgr, and RelationUsesLocalBuffers.

Referenced by acquire_sample_rows(), BitmapPrefetch(), count_nondeletable_pages(), HeapTupleHeaderAdvanceLatestRemovedXid(), and pg_prewarm().

588 {
589  Assert(RelationIsValid(reln));
590  Assert(BlockNumberIsValid(blockNum));
591 
592  /* Open it at the smgr level if not already done */
593  RelationOpenSmgr(reln);
594 
595  if (RelationUsesLocalBuffers(reln))
596  {
597  /* see comments in ReadBufferExtended */
598  if (RELATION_IS_OTHER_TEMP(reln))
599  ereport(ERROR,
600  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
601  errmsg("cannot access temporary tables of other sessions")));
602 
603  /* pass it off to localbuf.c */
604  return PrefetchLocalBuffer(reln->rd_smgr, forkNum, blockNum);
605  }
606  else
607  {
608  /* pass it to the shared buffer version */
609  return PrefetchSharedBuffer(reln->rd_smgr, forkNum, blockNum);
610  }
611 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:64
int errcode(int sqlerrcode)
Definition: elog.c:698
#define RelationOpenSmgr(relation)
Definition: rel.h:514
#define ERROR
Definition: elog.h:46
#define RelationIsValid(relation)
Definition: rel.h:430
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:500
#define ereport(elevel,...)
Definition: elog.h:157
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define Assert(condition)
Definition: c.h:804
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:600
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:579
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 500 of file bufmgr.c.

References Assert, BlockNumberIsValid, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), INIT_BUFFERTAG, PrefetchBufferResult::initiated_io, InvalidBuffer, LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeBackend::node, PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rnode, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherScanBlocks().

503 {
505  BufferTag newTag; /* identity of requested block */
506  uint32 newHash; /* hash value for newTag */
507  LWLock *newPartitionLock; /* buffer partition lock for it */
508  int buf_id;
509 
510  Assert(BlockNumberIsValid(blockNum));
511 
512  /* create a tag so we can lookup the buffer */
513  INIT_BUFFERTAG(newTag, smgr_reln->smgr_rnode.node,
514  forkNum, blockNum);
515 
516  /* determine its hash code and partition lock ID */
517  newHash = BufTableHashCode(&newTag);
518  newPartitionLock = BufMappingPartitionLock(newHash);
519 
520  /* see if the block is in the buffer pool already */
521  LWLockAcquire(newPartitionLock, LW_SHARED);
522  buf_id = BufTableLookup(&newTag, newHash);
523  LWLockRelease(newPartitionLock);
524 
525  /* If not in buffers, initiate prefetch */
526  if (buf_id < 0)
527  {
528 #ifdef USE_PREFETCH
529  /*
530  * Try to initiate an asynchronous read. This returns false in
531  * recovery if the relation file doesn't exist.
532  */
533  if (smgrprefetch(smgr_reln, forkNum, blockNum))
534  result.initiated_io = true;
535 #endif /* USE_PREFETCH */
536  }
537  else
538  {
539  /*
540  * Report the buffer it was in at that time. The caller may be able
541  * to avoid a buffer table lookup, but it's not pinned and it must be
542  * rechecked!
543  */
544  result.recent_buffer = buf_id + 1;
545  }
546 
547  /*
548  * If the block *is* in buffers, we do nothing. This is not really ideal:
549  * the block might be just about to be evicted, which would be stupid
550  * since we know we are going to need it soon. But the only easy answer
551  * is to bump the usage_count, which does not seem like a great solution:
552  * when the caller does ultimately touch the block, usage_count would get
553  * bumped again, resulting in too much favoritism for blocks that are
554  * involved in a prefetch sequence. A real fix would involve some
555  * additional per-buffer state, and it's not clear that there's enough of
556  * a problem to justify that.
557  */
558 
559  return result;
560 }
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
#define InvalidBuffer
Definition: buf.h:25
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
Buffer recent_buffer
Definition: bufmgr.h:54
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
unsigned int uint32
Definition: c.h:441
int result
Definition: header.h:19
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:804
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:487

◆ PrintBufferLeakWarning()

void PrintBufferLeakWarning ( Buffer  buffer)

Definition at line 2691 of file bufmgr.c.

References Assert, buftag::blockNum, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BufferIsLocal, BufferIsValid, elog, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, GetPrivateRefCount(), InvalidBackendId, LocalRefCount, MyBackendId, pfree(), pg_atomic_read_u32(), relpathbackend, buftag::rnode, BufferDesc::state, BufferDesc::tag, and WARNING.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResourceOwnerReleaseInternal().

2692 {
2693  BufferDesc *buf;
2694  int32 loccount;
2695  char *path;
2696  BackendId backend;
2697  uint32 buf_state;
2698 
2699  Assert(BufferIsValid(buffer));
2700  if (BufferIsLocal(buffer))
2701  {
2702  buf = GetLocalBufferDescriptor(-buffer - 1);
2703  loccount = LocalRefCount[-buffer - 1];
2704  backend = MyBackendId;
2705  }
2706  else
2707  {
2708  buf = GetBufferDescriptor(buffer - 1);
2709  loccount = GetPrivateRefCount(buffer);
2710  backend = InvalidBackendId;
2711  }
2712 
2713  /* theoretically we should lock the bufhdr here */
2714  path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
2715  buf_state = pg_atomic_read_u32(&buf->state);
2716  elog(WARNING,
2717  "buffer refcount leak: [%03d] "
2718  "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
2719  buffer, path,
2720  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
2721  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
2722  pfree(path);
2723 }
BackendId MyBackendId
Definition: globals.c:84
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
signed int int32
Definition: c.h:429
void pfree(void *pointer)
Definition: mcxt.c:1169
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:441
#define WARNING
Definition: elog.h:40
#define InvalidBackendId
Definition: backendid.h:23
int BackendId
Definition: backendid.h:21
#define Assert(condition)
Definition: c.h:804
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define elog(elevel,...)
Definition: elog.h:232
pg_atomic_uint32 state
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:78
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 697 of file bufmgr.c.

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinbuild(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), fill_seq_with_data(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

698 {
699  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
700 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:744

◆ ReadBuffer_common()

static Buffer ReadBuffer_common ( SMgrRelation  reln,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool hit 
)
static

Definition at line 807 of file bufmgr.c.

References Assert, RelFileNodeBackend::backend, BufferUsage::blk_read_time, BM_VALID, BufferAlloc(), BufferDescriptorGetBuffer, BufferDescriptorGetContentLock, BufHdrGetBlock, CurrentResourceOwner, RelFileNode::dbNode, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), ERROR, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, BufferUsage::local_blks_written, LocalBufferAlloc(), LocalBufHdrGetBlock, LockBufferForCleanup(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), MemSet, RelFileNodeBackend::node, P_NEW, PageIsNew, PageIsVerifiedExtended(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_buffer_read_time, PIV_LOG_WARNING, PIV_REPORT_STAT, RBM_NORMAL, RBM_NORMAL_NO_LOG, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelFileNode::relNode, relpath, ResourceOwnerEnlargeBuffers(), BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, BufferUsage::shared_blks_written, SMgrRelationData::smgr_rnode, smgrextend(), SmgrIsTemp, smgrnblocks(), smgrread(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::state, TerminateBufferIO(), track_io_timing, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, VacuumPageHit, VacuumPageMiss, WARNING, and zero_damaged_pages.

Referenced by ReadBufferExtended(), and ReadBufferWithoutRelcache().

810 {
811  BufferDesc *bufHdr;
812  Block bufBlock;
813  bool found;
814  bool isExtend;
815  bool isLocalBuf = SmgrIsTemp(smgr);
816 
817  *hit = false;
818 
819  /* Make sure we will have room to remember the buffer pin */
821 
822  isExtend = (blockNum == P_NEW);
823 
824  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
825  smgr->smgr_rnode.node.spcNode,
826  smgr->smgr_rnode.node.dbNode,
827  smgr->smgr_rnode.node.relNode,
828  smgr->smgr_rnode.backend,
829  isExtend);
830 
831  /* Substitute proper block number if caller asked for P_NEW */
832  if (isExtend)
833  blockNum = smgrnblocks(smgr, forkNum);
834 
835  if (isLocalBuf)
836  {
837  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
838  if (found)
840  else if (isExtend)
842  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
845  }
846  else
847  {
848  /*
849  * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
850  * not currently in memory.
851  */
852  bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
853  strategy, &found);
854  if (found)
856  else if (isExtend)
858  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
861  }
862 
863  /* At this point we do NOT hold any locks. */
864 
865  /* if it was already in the buffer pool, we're done */
866  if (found)
867  {
868  if (!isExtend)
869  {
870  /* Just need to update stats before we exit */
871  *hit = true;
872  VacuumPageHit++;
873 
874  if (VacuumCostActive)
876 
877  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
878  smgr->smgr_rnode.node.spcNode,
879  smgr->smgr_rnode.node.dbNode,
880  smgr->smgr_rnode.node.relNode,
881  smgr->smgr_rnode.backend,
882  isExtend,
883  found)