PostgreSQL Source Code  git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)   LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint32) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static Buffer ReadBuffer_common (SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf, bool fixOwner)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln)
 
static void FindAndDropRelFileNodeBuffers (RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rnode_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const void *p1, const void *p2)
 
static int ckpt_buforder_comparator (const void *pa, const void *pb)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static void InvalidateBuffer (BufferDesc *buf)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
void InitBufferPoolBackend (void)
 
void PrintBufferLeakWarning (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
void BufmgrCommit (void)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelFileNodeBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelFileNodesAllBuffers (SMgrRelation *smgr_reln, int nnodes)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
void AbortBufferIO (void)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *context)
 
void TestForOldSnapshot_impl (Snapshot snapshot, Relation relation)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = 0
 
int maintenance_io_concurrency = 0
 
int checkpoint_flush_after = 0
 
int bgwriter_flush_after = 0
 
int backend_flush_after = 0
 
static BufferDescInProgressBuf = NULL
 
static bool IsForInput
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint32) (NBuffers / 32)

Definition at line 79 of file bufmgr.c.

Referenced by DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 69 of file bufmgr.c.

Referenced by BgBufferSync(), and SyncOneBuffer().

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 68 of file bufmgr.c.

Referenced by BgBufferSync(), BufferSync(), and SyncOneBuffer().

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 61 of file bufmgr.c.

Referenced by BufferAlloc(), and FlushBuffer().

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int32 * LocalRefCount
Definition: localbuf.c:45

Definition at line 448 of file bufmgr.c.

Referenced by BufferGetBlockNumber(), BufferGetLSNAtomic(), BufferGetTag(), BufferIsPermanent(), ConditionalLockBuffer(), FlushOneBuffer(), IncrBufferRefCount(), LockBuffer(), LockBufferForCleanup(), MarkBufferDirty(), and ReleaseAndReadBuffer().

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 60 of file bufmgr.c.

Referenced by FlushBuffer(), PinBuffer(), PinBuffer_Locked(), ReadBuffer_common(), and UnpinBuffer().

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 64 of file bufmgr.c.

Referenced by FlushRelationBuffers(), and ReadBuffer_common().

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 71 of file bufmgr.c.

Referenced by DropRelFileNodesAllBuffers(), and FlushRelationsAllBuffers().

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

void AbortBufferIO ( void  )

Definition at line 4429 of file bufmgr.c.

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_VALID, buf, BufferDescriptorGetIOLock, ereport, errcode(), errdetail(), errmsg(), buftag::forkNum, InProgressBuf, IsForInput, LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), pfree(), relpathperm, buftag::rnode, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

4430 {
4432 
4433  if (buf)
4434  {
4435  uint32 buf_state;
4436 
4437  /*
4438  * Since LWLockReleaseAll has already been called, we're not holding
4439  * the buffer's io_in_progress_lock. We have to re-acquire it so that
4440  * we can use TerminateBufferIO. Anyone who's executing WaitIO on the
4441  * buffer will be in a busy spin until we succeed in doing this.
4442  */
4444 
4445  buf_state = LockBufHdr(buf);
4446  Assert(buf_state & BM_IO_IN_PROGRESS);
4447  if (IsForInput)
4448  {
4449  Assert(!(buf_state & BM_DIRTY));
4450 
4451  /* We'd better not think buffer is valid yet */
4452  Assert(!(buf_state & BM_VALID));
4453  UnlockBufHdr(buf, buf_state);
4454  }
4455  else
4456  {
4457  Assert(buf_state & BM_DIRTY);
4458  UnlockBufHdr(buf, buf_state);
4459  /* Issue notice if this is not the first failure... */
4460  if (buf_state & BM_IO_ERROR)
4461  {
4462  /* Buffer is pinned, so we can read tag without spinlock */
4463  char *path;
4464 
4465  path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
4466  ereport(WARNING,
4467  (errcode(ERRCODE_IO_ERROR),
4468  errmsg("could not write block %u of %s",
4469  buf->tag.blockNum, path),
4470  errdetail("Multiple failures --- write error might be permanent.")));
4471  pfree(path);
4472  }
4473  }
4474  TerminateBufferIO(buf, false, BM_IO_ERROR);
4475  }
4476 }
#define relpathperm(rnode, forknum)
Definition: relpath.h:83
ForkNumber forkNum
Definition: buf_internals.h:93
int errcode(int sqlerrcode)
Definition: elog.c:704
#define BM_DIRTY
Definition: buf_internals.h:58
#define BufferDescriptorGetIOLock(bdesc)
static BufferDesc * InProgressBuf
Definition: bufmgr.c:161
void pfree(void *pointer)
Definition: mcxt.c:1057
static char * buf
Definition: pg_test_fsync.c:68
int errdetail(const char *fmt,...)
Definition: elog.c:1048
unsigned int uint32
Definition: c.h:429
static bool IsForInput
Definition: bufmgr.c:162
#define WARNING
Definition: elog.h:40
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4397
#define BM_VALID
Definition: buf_internals.h:59
#define ereport(elevel,...)
Definition: elog.h:155
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define Assert(condition)
Definition: c.h:792
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
#define BM_IO_ERROR
Definition: buf_internals.h:62
BufferTag tag
int errmsg(const char *fmt,...)
Definition: elog.c:915
#define UnlockBufHdr(desc, s)
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:61

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 2492 of file bufmgr.c.

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

2493 {
2495 
2496  AtEOXact_LocalBuffers(isCommit);
2497 
2499 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define Assert(condition)
Definition: c.h:792
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2566
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:577

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 2547 of file bufmgr.c.

References AbortBufferIO(), AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolBackend().

2548 {
2549  AbortBufferIO();
2550  UnlockBuffers();
2551 
2553 
2554  /* localbuf.c needs a chance too */
2556 }
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:588
void UnlockBuffers(void)
Definition: bufmgr.c:3911
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2566
void AbortBufferIO(void)
Definition: bufmgr.c:4429

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 2122 of file bufmgr.c.

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, BgWriterStats, BUF_REUSABLE, BUF_WRITTEN, CurrentResourceOwner, DEBUG1, DEBUG2, elog, PgStat_MsgBgWriter::m_buf_alloc, PgStat_MsgBgWriter::m_buf_written_clean, PgStat_MsgBgWriter::m_maxwritten_clean, NBuffers, ResourceOwnerEnlargeBuffers(), StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

2123 {
2124  /* info obtained from freelist.c */
2125  int strategy_buf_id;
2126  uint32 strategy_passes;
2127  uint32 recent_alloc;
2128 
2129  /*
2130  * Information saved between calls so we can determine the strategy
2131  * point's advance rate and avoid scanning already-cleaned buffers.
2132  */
2133  static bool saved_info_valid = false;
2134  static int prev_strategy_buf_id;
2135  static uint32 prev_strategy_passes;
2136  static int next_to_clean;
2137  static uint32 next_passes;
2138 
2139  /* Moving averages of allocation rate and clean-buffer density */
2140  static float smoothed_alloc = 0;
2141  static float smoothed_density = 10.0;
2142 
2143  /* Potentially these could be tunables, but for now, not */
2144  float smoothing_samples = 16;
2145  float scan_whole_pool_milliseconds = 120000.0;
2146 
2147  /* Used to compute how far we scan ahead */
2148  long strategy_delta;
2149  int bufs_to_lap;
2150  int bufs_ahead;
2151  float scans_per_alloc;
2152  int reusable_buffers_est;
2153  int upcoming_alloc_est;
2154  int min_scan_buffers;
2155 
2156  /* Variables for the scanning loop proper */
2157  int num_to_scan;
2158  int num_written;
2159  int reusable_buffers;
2160 
2161  /* Variables for final smoothed_density update */
2162  long new_strategy_delta;
2163  uint32 new_recent_alloc;
2164 
2165  /*
2166  * Find out where the freelist clock sweep currently is, and how many
2167  * buffer allocations have happened since our last call.
2168  */
2169  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
2170 
2171  /* Report buffer alloc counts to pgstat */
2172  BgWriterStats.m_buf_alloc += recent_alloc;
2173 
2174  /*
2175  * If we're not running the LRU scan, just stop after doing the stats
2176  * stuff. We mark the saved state invalid so that we can recover sanely
2177  * if LRU scan is turned back on later.
2178  */
2179  if (bgwriter_lru_maxpages <= 0)
2180  {
2181  saved_info_valid = false;
2182  return true;
2183  }
2184 
2185  /*
2186  * Compute strategy_delta = how many buffers have been scanned by the
2187  * clock sweep since last time. If first time through, assume none. Then
2188  * see if we are still ahead of the clock sweep, and if so, how many
2189  * buffers we could scan before we'd catch up with it and "lap" it. Note:
2190  * weird-looking coding of xxx_passes comparisons are to avoid bogus
2191  * behavior when the passes counts wrap around.
2192  */
2193  if (saved_info_valid)
2194  {
2195  int32 passes_delta = strategy_passes - prev_strategy_passes;
2196 
2197  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
2198  strategy_delta += (long) passes_delta * NBuffers;
2199 
2200  Assert(strategy_delta >= 0);
2201 
2202  if ((int32) (next_passes - strategy_passes) > 0)
2203  {
2204  /* we're one pass ahead of the strategy point */
2205  bufs_to_lap = strategy_buf_id - next_to_clean;
2206 #ifdef BGW_DEBUG
2207  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2208  next_passes, next_to_clean,
2209  strategy_passes, strategy_buf_id,
2210  strategy_delta, bufs_to_lap);
2211 #endif
2212  }
2213  else if (next_passes == strategy_passes &&
2214  next_to_clean >= strategy_buf_id)
2215  {
2216  /* on same pass, but ahead or at least not behind */
2217  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
2218 #ifdef BGW_DEBUG
2219  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2220  next_passes, next_to_clean,
2221  strategy_passes, strategy_buf_id,
2222  strategy_delta, bufs_to_lap);
2223 #endif
2224  }
2225  else
2226  {
2227  /*
2228  * We're behind, so skip forward to the strategy point and start
2229  * cleaning from there.
2230  */
2231 #ifdef BGW_DEBUG
2232  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
2233  next_passes, next_to_clean,
2234  strategy_passes, strategy_buf_id,
2235  strategy_delta);
2236 #endif
2237  next_to_clean = strategy_buf_id;
2238  next_passes = strategy_passes;
2239  bufs_to_lap = NBuffers;
2240  }
2241  }
2242  else
2243  {
2244  /*
2245  * Initializing at startup or after LRU scanning had been off. Always
2246  * start at the strategy point.
2247  */
2248 #ifdef BGW_DEBUG
2249  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
2250  strategy_passes, strategy_buf_id);
2251 #endif
2252  strategy_delta = 0;
2253  next_to_clean = strategy_buf_id;
2254  next_passes = strategy_passes;
2255  bufs_to_lap = NBuffers;
2256  }
2257 
2258  /* Update saved info for next time */
2259  prev_strategy_buf_id = strategy_buf_id;
2260  prev_strategy_passes = strategy_passes;
2261  saved_info_valid = true;
2262 
2263  /*
2264  * Compute how many buffers had to be scanned for each new allocation, ie,
2265  * 1/density of reusable buffers, and track a moving average of that.
2266  *
2267  * If the strategy point didn't move, we don't update the density estimate
2268  */
2269  if (strategy_delta > 0 && recent_alloc > 0)
2270  {
2271  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
2272  smoothed_density += (scans_per_alloc - smoothed_density) /
2273  smoothing_samples;
2274  }
2275 
2276  /*
2277  * Estimate how many reusable buffers there are between the current
2278  * strategy point and where we've scanned ahead to, based on the smoothed
2279  * density estimate.
2280  */
2281  bufs_ahead = NBuffers - bufs_to_lap;
2282  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
2283 
2284  /*
2285  * Track a moving average of recent buffer allocations. Here, rather than
2286  * a true average we want a fast-attack, slow-decline behavior: we
2287  * immediately follow any increase.
2288  */
2289  if (smoothed_alloc <= (float) recent_alloc)
2290  smoothed_alloc = recent_alloc;
2291  else
2292  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
2293  smoothing_samples;
2294 
2295  /* Scale the estimate by a GUC to allow more aggressive tuning. */
2296  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
2297 
2298  /*
2299  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
2300  * eventually underflow to zero, and the underflows produce annoying
2301  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
2302  * zero, there's no point in tracking smaller and smaller values of
2303  * smoothed_alloc, so just reset it to exactly zero to avoid this
2304  * syndrome. It will pop back up as soon as recent_alloc increases.
2305  */
2306  if (upcoming_alloc_est == 0)
2307  smoothed_alloc = 0;
2308 
2309  /*
2310  * Even in cases where there's been little or no buffer allocation
2311  * activity, we want to make a small amount of progress through the buffer
2312  * cache so that as many reusable buffers as possible are clean after an
2313  * idle period.
2314  *
2315  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
2316  * the BGW will be called during the scan_whole_pool time; slice the
2317  * buffer pool into that many sections.
2318  */
2319  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
2320 
2321  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
2322  {
2323 #ifdef BGW_DEBUG
2324  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
2325  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
2326 #endif
2327  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
2328  }
2329 
2330  /*
2331  * Now write out dirty reusable buffers, working forward from the
2332  * next_to_clean point, until we have lapped the strategy scan, or cleaned
2333  * enough buffers to match our estimate of the next cycle's allocation
2334  * requirements, or hit the bgwriter_lru_maxpages limit.
2335  */
2336 
2337  /* Make sure we can handle the pin inside SyncOneBuffer */
2339 
2340  num_to_scan = bufs_to_lap;
2341  num_written = 0;
2342  reusable_buffers = reusable_buffers_est;
2343 
2344  /* Execute the LRU scan */
2345  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
2346  {
2347  int sync_state = SyncOneBuffer(next_to_clean, true,
2348  wb_context);
2349 
2350  if (++next_to_clean >= NBuffers)
2351  {
2352  next_to_clean = 0;
2353  next_passes++;
2354  }
2355  num_to_scan--;
2356 
2357  if (sync_state & BUF_WRITTEN)
2358  {
2359  reusable_buffers++;
2360  if (++num_written >= bgwriter_lru_maxpages)
2361  {
2363  break;
2364  }
2365  }
2366  else if (sync_state & BUF_REUSABLE)
2367  reusable_buffers++;
2368  }
2369 
2370  BgWriterStats.m_buf_written_clean += num_written;
2371 
2372 #ifdef BGW_DEBUG
2373  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
2374  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
2375  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
2376  bufs_to_lap - num_to_scan,
2377  num_written,
2378  reusable_buffers - reusable_buffers_est);
2379 #endif
2380 
2381  /*
2382  * Consider the above scan as being like a new allocation scan.
2383  * Characterize its density and update the smoothed one based on it. This
2384  * effectively halves the moving average period in cases where both the
2385  * strategy and the background writer are doing some useful scanning,
2386  * which is helpful because a long memory isn't as desirable on the
2387  * density estimates.
2388  */
2389  new_strategy_delta = bufs_to_lap - num_to_scan;
2390  new_recent_alloc = reusable_buffers - reusable_buffers_est;
2391  if (new_strategy_delta > 0 && new_recent_alloc > 0)
2392  {
2393  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
2394  smoothed_density += (scans_per_alloc - smoothed_density) /
2395  smoothing_samples;
2396 
2397 #ifdef BGW_DEBUG
2398  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
2399  new_recent_alloc, new_strategy_delta,
2400  scans_per_alloc, smoothed_density);
2401 #endif
2402  }
2403 
2404  /* Return true if OK to hibernate */
2405  return (bufs_to_lap == 0 && recent_alloc == 0);
2406 }
PgStat_Counter m_buf_alloc
Definition: pgstat.h:461
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:395
#define DEBUG1
Definition: elog.h:25
int BgWriterDelay
Definition: bgwriter.c:64
ResourceOwner CurrentResourceOwner
Definition: resowner.c:144
PgStat_Counter m_maxwritten_clean
Definition: pgstat.h:458
PgStat_Counter m_buf_written_clean
Definition: pgstat.h:457
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:144
double bgwriter_lru_multiplier
Definition: bufmgr.c:134
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2425
signed int int32
Definition: c.h:417
#define BUF_REUSABLE
Definition: bufmgr.c:69
int bgwriter_lru_maxpages
Definition: bufmgr.c:133
#define DEBUG2
Definition: elog.h:24
unsigned int uint32
Definition: c.h:429
#define BUF_WRITTEN
Definition: bufmgr.c:68
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:934
#define Assert(condition)
Definition: c.h:792
#define elog(elevel,...)
Definition: elog.h:228
int NBuffers
Definition: globals.c:133

◆ BufferAlloc()

static BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1020 of file bufmgr.c.

References Assert, BackendWritebackContext, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_FLAG_MASK, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BUF_USAGECOUNT_ONE, BufferDescriptorGetContentLock, BufferGetLSN, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, INIT_BUFFERTAG, INIT_FORKNUM, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), RelFileNodeBackend::node, PinBuffer(), PinBuffer_Locked(), RelFileNode::relNode, ReservePrivateRefCountEntry(), ScheduleBufferTagForWriteback(), SMgrRelationData::smgr_rnode, RelFileNode::spcNode, StartBufferIO(), StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr, UnpinBuffer(), and XLogNeedsFlush().

Referenced by ReadBuffer_common().

1024 {
1025  BufferTag newTag; /* identity of requested block */
1026  uint32 newHash; /* hash value for newTag */
1027  LWLock *newPartitionLock; /* buffer partition lock for it */
1028  BufferTag oldTag; /* previous identity of selected buffer */
1029  uint32 oldHash; /* hash value for oldTag */
1030  LWLock *oldPartitionLock; /* buffer partition lock for it */
1031  uint32 oldFlags;
1032  int buf_id;
1033  BufferDesc *buf;
1034  bool valid;
1035  uint32 buf_state;
1036 
1037  /* create a tag so we can lookup the buffer */
1038  INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
1039 
1040  /* determine its hash code and partition lock ID */
1041  newHash = BufTableHashCode(&newTag);
1042  newPartitionLock = BufMappingPartitionLock(newHash);
1043 
1044  /* see if the block is in the buffer pool already */
1045  LWLockAcquire(newPartitionLock, LW_SHARED);
1046  buf_id = BufTableLookup(&newTag, newHash);
1047  if (buf_id >= 0)
1048  {
1049  /*
1050  * Found it. Now, pin the buffer so no one can steal it from the
1051  * buffer pool, and check to see if the correct data has been loaded
1052  * into the buffer.
1053  */
1054  buf = GetBufferDescriptor(buf_id);
1055 
1056  valid = PinBuffer(buf, strategy);
1057 
1058  /* Can release the mapping lock as soon as we've pinned it */
1059  LWLockRelease(newPartitionLock);
1060 
1061  *foundPtr = true;
1062 
1063  if (!valid)
1064  {
1065  /*
1066  * We can only get here if (a) someone else is still reading in
1067  * the page, or (b) a previous read attempt failed. We have to
1068  * wait for any active read attempt to finish, and then set up our
1069  * own read attempt if the page is still not BM_VALID.
1070  * StartBufferIO does it all.
1071  */
1072  if (StartBufferIO(buf, true))
1073  {
1074  /*
1075  * If we get here, previous attempts to read the buffer must
1076  * have failed ... but we shall bravely try again.
1077  */
1078  *foundPtr = false;
1079  }
1080  }
1081 
1082  return buf;
1083  }
1084 
1085  /*
1086  * Didn't find it in the buffer pool. We'll have to initialize a new
1087  * buffer. Remember to unlock the mapping lock while doing the work.
1088  */
1089  LWLockRelease(newPartitionLock);
1090 
1091  /* Loop here in case we have to try another victim buffer */
1092  for (;;)
1093  {
1094  /*
1095  * Ensure, while the spinlock's not yet held, that there's a free
1096  * refcount entry.
1097  */
1099 
1100  /*
1101  * Select a victim buffer. The buffer is returned with its header
1102  * spinlock still held!
1103  */
1104  buf = StrategyGetBuffer(strategy, &buf_state);
1105 
1106  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1107 
1108  /* Must copy buffer flags while we still hold the spinlock */
1109  oldFlags = buf_state & BUF_FLAG_MASK;
1110 
1111  /* Pin the buffer and then release the buffer spinlock */
1112  PinBuffer_Locked(buf);
1113 
1114  /*
1115  * If the buffer was dirty, try to write it out. There is a race
1116  * condition here, in that someone might dirty it after we released it
1117  * above, or even while we are writing it out (since our share-lock
1118  * won't prevent hint-bit updates). We will recheck the dirty bit
1119  * after re-locking the buffer header.
1120  */
1121  if (oldFlags & BM_DIRTY)
1122  {
1123  /*
1124  * We need a share-lock on the buffer contents to write it out
1125  * (else we might write invalid data, eg because someone else is
1126  * compacting the page contents while we write). We must use a
1127  * conditional lock acquisition here to avoid deadlock. Even
1128  * though the buffer was not pinned (and therefore surely not
1129  * locked) when StrategyGetBuffer returned it, someone else could
1130  * have pinned and exclusive-locked it by the time we get here. If
1131  * we try to get the lock unconditionally, we'd block waiting for
1132  * them; if they later block waiting for us, deadlock ensues.
1133  * (This has been observed to happen when two backends are both
1134  * trying to split btree index pages, and the second one just
1135  * happens to be trying to split the page the first one got from
1136  * StrategyGetBuffer.)
1137  */
1139  LW_SHARED))
1140  {
1141  /*
1142  * If using a nondefault strategy, and writing the buffer
1143  * would require a WAL flush, let the strategy decide whether
1144  * to go ahead and write/reuse the buffer or to choose another
1145  * victim. We need lock to inspect the page LSN, so this
1146  * can't be done inside StrategyGetBuffer.
1147  */
1148  if (strategy != NULL)
1149  {
1150  XLogRecPtr lsn;
1151 
1152  /* Read the LSN while holding buffer header lock */
1153  buf_state = LockBufHdr(buf);
1154  lsn = BufferGetLSN(buf);
1155  UnlockBufHdr(buf, buf_state);
1156 
1157  if (XLogNeedsFlush(lsn) &&
1158  StrategyRejectBuffer(strategy, buf))
1159  {
1160  /* Drop lock/pin and loop around for another buffer */
1162  UnpinBuffer(buf, true);
1163  continue;
1164  }
1165  }
1166 
1167  /* OK, do the I/O */
1168  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
1169  smgr->smgr_rnode.node.spcNode,
1170  smgr->smgr_rnode.node.dbNode,
1171  smgr->smgr_rnode.node.relNode);
1172 
1173  FlushBuffer(buf, NULL);
1175 
1177  &buf->tag);
1178 
1179  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
1180  smgr->smgr_rnode.node.spcNode,
1181  smgr->smgr_rnode.node.dbNode,
1182  smgr->smgr_rnode.node.relNode);
1183  }
1184  else
1185  {
1186  /*
1187  * Someone else has locked the buffer, so give it up and loop
1188  * back to get another one.
1189  */
1190  UnpinBuffer(buf, true);
1191  continue;
1192  }
1193  }
1194 
1195  /*
1196  * To change the association of a valid buffer, we'll need to have
1197  * exclusive lock on both the old and new mapping partitions.
1198  */
1199  if (oldFlags & BM_TAG_VALID)
1200  {
1201  /*
1202  * Need to compute the old tag's hashcode and partition lock ID.
1203  * XXX is it worth storing the hashcode in BufferDesc so we need
1204  * not recompute it here? Probably not.
1205  */
1206  oldTag = buf->tag;
1207  oldHash = BufTableHashCode(&oldTag);
1208  oldPartitionLock = BufMappingPartitionLock(oldHash);
1209 
1210  /*
1211  * Must lock the lower-numbered partition first to avoid
1212  * deadlocks.
1213  */
1214  if (oldPartitionLock < newPartitionLock)
1215  {
1216  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1217  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1218  }
1219  else if (oldPartitionLock > newPartitionLock)
1220  {
1221  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1222  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1223  }
1224  else
1225  {
1226  /* only one partition, only one lock */
1227  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1228  }
1229  }
1230  else
1231  {
1232  /* if it wasn't valid, we need only the new partition */
1233  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1234  /* remember we have no old-partition lock or tag */
1235  oldPartitionLock = NULL;
1236  /* keep the compiler quiet about uninitialized variables */
1237  oldHash = 0;
1238  }
1239 
1240  /*
1241  * Try to make a hashtable entry for the buffer under its new tag.
1242  * This could fail because while we were writing someone else
1243  * allocated another buffer for the same block we want to read in.
1244  * Note that we have not yet removed the hashtable entry for the old
1245  * tag.
1246  */
1247  buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
1248 
1249  if (buf_id >= 0)
1250  {
1251  /*
1252  * Got a collision. Someone has already done what we were about to
1253  * do. We'll just handle this as if it were found in the buffer
1254  * pool in the first place. First, give up the buffer we were
1255  * planning to use.
1256  */
1257  UnpinBuffer(buf, true);
1258 
1259  /* Can give up that buffer's mapping partition lock now */
1260  if (oldPartitionLock != NULL &&
1261  oldPartitionLock != newPartitionLock)
1262  LWLockRelease(oldPartitionLock);
1263 
1264  /* remaining code should match code at top of routine */
1265 
1266  buf = GetBufferDescriptor(buf_id);
1267 
1268  valid = PinBuffer(buf, strategy);
1269 
1270  /* Can release the mapping lock as soon as we've pinned it */
1271  LWLockRelease(newPartitionLock);
1272 
1273  *foundPtr = true;
1274 
1275  if (!valid)
1276  {
1277  /*
1278  * We can only get here if (a) someone else is still reading
1279  * in the page, or (b) a previous read attempt failed. We
1280  * have to wait for any active read attempt to finish, and
1281  * then set up our own read attempt if the page is still not
1282  * BM_VALID. StartBufferIO does it all.
1283  */
1284  if (StartBufferIO(buf, true))
1285  {
1286  /*
1287  * If we get here, previous attempts to read the buffer
1288  * must have failed ... but we shall bravely try again.
1289  */
1290  *foundPtr = false;
1291  }
1292  }
1293 
1294  return buf;
1295  }
1296 
1297  /*
1298  * Need to lock the buffer header too in order to change its tag.
1299  */
1300  buf_state = LockBufHdr(buf);
1301 
1302  /*
1303  * Somebody could have pinned or re-dirtied the buffer while we were
1304  * doing the I/O and making the new hashtable entry. If so, we can't
1305  * recycle this buffer; we must undo everything we've done and start
1306  * over with a new victim buffer.
1307  */
1308  oldFlags = buf_state & BUF_FLAG_MASK;
1309  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
1310  break;
1311 
1312  UnlockBufHdr(buf, buf_state);
1313  BufTableDelete(&newTag, newHash);
1314  if (oldPartitionLock != NULL &&
1315  oldPartitionLock != newPartitionLock)
1316  LWLockRelease(oldPartitionLock);
1317  LWLockRelease(newPartitionLock);
1318  UnpinBuffer(buf, true);
1319  }
1320 
1321  /*
1322  * Okay, it's finally safe to rename the buffer.
1323  *
1324  * Clearing BM_VALID here is necessary, clearing the dirtybits is just
1325  * paranoia. We also reset the usage_count since any recency of use of
1326  * the old content is no longer relevant. (The usage_count starts out at
1327  * 1 so that the buffer can survive one clock-sweep pass.)
1328  *
1329  * Make sure BM_PERMANENT is set for buffers that must be written at every
1330  * checkpoint. Unlogged buffers only need to be written at shutdown
1331  * checkpoints, except for their "init" forks, which need to be treated
1332  * just like permanent relations.
1333  */
1334  buf->tag = newTag;
1335  buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
1338  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1339  buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
1340  else
1341  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1342 
1343  UnlockBufHdr(buf, buf_state);
1344 
1345  if (oldPartitionLock != NULL)
1346  {
1347  BufTableDelete(&oldTag, oldHash);
1348  if (oldPartitionLock != newPartitionLock)
1349  LWLockRelease(oldPartitionLock);
1350  }
1351 
1352  LWLockRelease(newPartitionLock);
1353 
1354  /*
1355  * Buffer contents are currently invalid. Try to get the io_in_progress
1356  * lock. If StartBufferIO returns false, then someone else managed to
1357  * read it before we did, so there's nothing left for BufferAlloc() to do.
1358  */
1359  if (StartBufferIO(buf, true))
1360  *foundPtr = false;
1361  else
1362  *foundPtr = true;
1363 
1364  return buf;
1365 }
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:1604
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
Definition: freelist.c:201
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
Definition: lwlock.h:31
#define BM_PERMANENT
Definition: buf_internals.h:66
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:60
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3181
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:65
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
#define BM_DIRTY
Definition: buf_internals.h:58
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2734
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4330
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:119
void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
Definition: bufmgr.c:4699
#define BUF_FLAG_MASK
Definition: buf_internals.h:45
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
WritebackContext BackendWritebackContext
Definition: buf_init.c:23
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1378
static char * buf
Definition: pg_test_fsync.c:68
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:43
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:63
unsigned int uint32
Definition: c.h:429
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1752
#define BM_VALID
Definition: buf_internals.h:59
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf)
Definition: freelist.c:686
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:792
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1707
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:42
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
#define BM_IO_ERROR
Definition: buf_internals.h:62
BufferTag tag
#define UnlockBufHdr(desc, s)
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:61
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:48

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 2674 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, GetLocalBufferDescriptor, and BufferDesc::tag.

Referenced by _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_prune_chain(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and XLogReadBufferExtended().

2675 {
2676  BufferDesc *bufHdr;
2677 
2678  Assert(BufferIsPinned(buffer));
2679 
2680  if (BufferIsLocal(buffer))
2681  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2682  else
2683  bufHdr = GetBufferDescriptor(buffer - 1);
2684 
2685  /* pinned, so OK to read tag without spinlock */
2686  return bufHdr->tag.blockNum;
2687 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:94
BufferTag tag

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 2937 of file bufmgr.c.

References Assert, BufferGetPage, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, LockBufHdr(), PageGetLSN, UnlockBufHdr, and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

2938 {
2939  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
2940  char *page = BufferGetPage(buffer);
2941  XLogRecPtr lsn;
2942  uint32 buf_state;
2943 
2944  /*
2945  * If we don't need locking for correctness, fastpath out.
2946  */
2947  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
2948  return PageGetLSN(page);
2949 
2950  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2951  Assert(BufferIsValid(buffer));
2952  Assert(BufferIsPinned(buffer));
2953 
2954  buf_state = LockBufHdr(bufHdr);
2955  lsn = PageGetLSN(page);
2956  UnlockBufHdr(bufHdr, buf_state);
2957 
2958  return lsn;
2959 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:429
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define PageGetLSN(page)
Definition: bufpage.h:366
#define UnlockBufHdr(desc, s)
#define XLogHintBitIsNeeded()
Definition: xlog.h:202

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileNode rnode,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 2695 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, buftag::rnode, and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

2697 {
2698  BufferDesc *bufHdr;
2699 
2700  /* Do the same checks as BufferGetBlockNumber. */
2701  Assert(BufferIsPinned(buffer));
2702 
2703  if (BufferIsLocal(buffer))
2704  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2705  else
2706  bufHdr = GetBufferDescriptor(buffer - 1);
2707 
2708  /* pinned, so OK to read tag without spinlock */
2709  *rnode = bufHdr->tag.rnode;
2710  *forknum = bufHdr->tag.forkNum;
2711  *blknum = bufHdr->tag.blockNum;
2712 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
ForkNumber forkNum
Definition: buf_internals.h:93
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 2907 of file bufmgr.c.

References Assert, BM_PERMANENT, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

2908 {
2909  BufferDesc *bufHdr;
2910 
2911  /* Local buffers are used only for temp relations. */
2912  if (BufferIsLocal(buffer))
2913  return false;
2914 
2915  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2916  Assert(BufferIsValid(buffer));
2917  Assert(BufferIsPinned(buffer));
2918 
2919  /*
2920  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
2921  * need not bother with the buffer header spinlock. Even if someone else
2922  * changes the buffer header state while we're doing this, the state is
2923  * changed atomically, so we'll read the old value or the new value, but
2924  * not random garbage.
2925  */
2926  bufHdr = GetBufferDescriptor(buffer - 1);
2927  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
2928 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define BM_PERMANENT
Definition: buf_internals.h:66
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
pg_atomic_uint32 state
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 1845 of file bufmgr.c.

References Assert, BgWriterStats, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), ckpt_buforder_comparator(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, CurrentResourceOwner, DatumGetPointer, buftag::forkNum, CkptSortItem::forkNum, GetBufferDescriptor, i, CkptTsStatus::index, InvalidOid, IssuePendingWritebacks(), LockBufHdr(), PgStat_MsgBgWriter::m_buf_written_checkpoints, NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), pfree(), pg_atomic_read_u32(), PointerGetDatum, ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, qsort, RelFileNode::relNode, CkptSortItem::relNode, repalloc(), ResourceOwnerEnlargeBuffers(), buftag::rnode, RelFileNode::spcNode, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr, and WritebackContextInit().

Referenced by CheckPointBuffers().

1846 {
1847  uint32 buf_state;
1848  int buf_id;
1849  int num_to_scan;
1850  int num_spaces;
1851  int num_processed;
1852  int num_written;
1853  CkptTsStatus *per_ts_stat = NULL;
1854  Oid last_tsid;
1855  binaryheap *ts_heap;
1856  int i;
1857  int mask = BM_DIRTY;
1858  WritebackContext wb_context;
1859 
1860  /* Make sure we can handle the pin inside SyncOneBuffer */
1862 
1863  /*
1864  * Unless this is a shutdown checkpoint or we have been explicitly told,
1865  * we write only permanent, dirty buffers. But at shutdown or end of
1866  * recovery, we write all dirty buffers.
1867  */
1870  mask |= BM_PERMANENT;
1871 
1872  /*
1873  * Loop over all buffers, and mark the ones that need to be written with
1874  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
1875  * can estimate how much work needs to be done.
1876  *
1877  * This allows us to write only those pages that were dirty when the
1878  * checkpoint began, and not those that get dirtied while it proceeds.
1879  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
1880  * later in this function, or by normal backends or the bgwriter cleaning
1881  * scan, the flag is cleared. Any buffer dirtied after this point won't
1882  * have the flag set.
1883  *
1884  * Note that if we fail to write some buffer, we may leave buffers with
1885  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
1886  * certainly need to be written for the next checkpoint attempt, too.
1887  */
1888  num_to_scan = 0;
1889  for (buf_id = 0; buf_id < NBuffers; buf_id++)
1890  {
1891  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
1892 
1893  /*
1894  * Header spinlock is enough to examine BM_DIRTY, see comment in
1895  * SyncOneBuffer.
1896  */
1897  buf_state = LockBufHdr(bufHdr);
1898 
1899  if ((buf_state & mask) == mask)
1900  {
1901  CkptSortItem *item;
1902 
1903  buf_state |= BM_CHECKPOINT_NEEDED;
1904 
1905  item = &CkptBufferIds[num_to_scan++];
1906  item->buf_id = buf_id;
1907  item->tsId = bufHdr->tag.rnode.spcNode;
1908  item->relNode = bufHdr->tag.rnode.relNode;
1909  item->forkNum = bufHdr->tag.forkNum;
1910  item->blockNum = bufHdr->tag.blockNum;
1911  }
1912 
1913  UnlockBufHdr(bufHdr, buf_state);
1914 
1915  /* Check for barrier events in case NBuffers is large. */
1918  }
1919 
1920  if (num_to_scan == 0)
1921  return; /* nothing to do */
1922 
1924 
1925  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
1926 
1927  /*
1928  * Sort buffers that need to be written to reduce the likelihood of random
1929  * IO. The sorting is also important for the implementation of balancing
1930  * writes between tablespaces. Without balancing writes we'd potentially
1931  * end up writing to the tablespaces one-by-one; possibly overloading the
1932  * underlying system.
1933  */
1934  qsort(CkptBufferIds, num_to_scan, sizeof(CkptSortItem),
1936 
1937  num_spaces = 0;
1938 
1939  /*
1940  * Allocate progress status for each tablespace with buffers that need to
1941  * be flushed. This requires the to-be-flushed array to be sorted.
1942  */
1943  last_tsid = InvalidOid;
1944  for (i = 0; i < num_to_scan; i++)
1945  {
1946  CkptTsStatus *s;
1947  Oid cur_tsid;
1948 
1949  cur_tsid = CkptBufferIds[i].tsId;
1950 
1951  /*
1952  * Grow array of per-tablespace status structs, every time a new
1953  * tablespace is found.
1954  */
1955  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
1956  {
1957  Size sz;
1958 
1959  num_spaces++;
1960 
1961  /*
1962  * Not worth adding grow-by-power-of-2 logic here - even with a
1963  * few hundred tablespaces this should be fine.
1964  */
1965  sz = sizeof(CkptTsStatus) * num_spaces;
1966 
1967  if (per_ts_stat == NULL)
1968  per_ts_stat = (CkptTsStatus *) palloc(sz);
1969  else
1970  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
1971 
1972  s = &per_ts_stat[num_spaces - 1];
1973  memset(s, 0, sizeof(*s));
1974  s->tsId = cur_tsid;
1975 
1976  /*
1977  * The first buffer in this tablespace. As CkptBufferIds is sorted
1978  * by tablespace all (s->num_to_scan) buffers in this tablespace
1979  * will follow afterwards.
1980  */
1981  s->index = i;
1982 
1983  /*
1984  * progress_slice will be determined once we know how many buffers
1985  * are in each tablespace, i.e. after this loop.
1986  */
1987 
1988  last_tsid = cur_tsid;
1989  }
1990  else
1991  {
1992  s = &per_ts_stat[num_spaces - 1];
1993  }
1994 
1995  s->num_to_scan++;
1996 
1997  /* Check for barrier events. */
2000  }
2001 
2002  Assert(num_spaces > 0);
2003 
2004  /*
2005  * Build a min-heap over the write-progress in the individual tablespaces,
2006  * and compute how large a portion of the total progress a single
2007  * processed buffer is.
2008  */
2009  ts_heap = binaryheap_allocate(num_spaces,
2011  NULL);
2012 
2013  for (i = 0; i < num_spaces; i++)
2014  {
2015  CkptTsStatus *ts_stat = &per_ts_stat[i];
2016 
2017  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
2018 
2019  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
2020  }
2021 
2022  binaryheap_build(ts_heap);
2023 
2024  /*
2025  * Iterate through to-be-checkpointed buffers and write the ones (still)
2026  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
2027  * tablespaces; otherwise the sorting would lead to only one tablespace
2028  * receiving writes at a time, making inefficient use of the hardware.
2029  */
2030  num_processed = 0;
2031  num_written = 0;
2032  while (!binaryheap_empty(ts_heap))
2033  {
2034  BufferDesc *bufHdr = NULL;
2035  CkptTsStatus *ts_stat = (CkptTsStatus *)
2037 
2038  buf_id = CkptBufferIds[ts_stat->index].buf_id;
2039  Assert(buf_id != -1);
2040 
2041  bufHdr = GetBufferDescriptor(buf_id);
2042 
2043  num_processed++;
2044 
2045  /*
2046  * We don't need to acquire the lock here, because we're only looking
2047  * at a single bit. It's possible that someone else writes the buffer
2048  * and clears the flag right after we check, but that doesn't matter
2049  * since SyncOneBuffer will then do nothing. However, there is a
2050  * further race condition: it's conceivable that between the time we
2051  * examine the bit here and the time SyncOneBuffer acquires the lock,
2052  * someone else not only wrote the buffer but replaced it with another
2053  * page and dirtied it. In that improbable case, SyncOneBuffer will
2054  * write the buffer though we didn't need to. It doesn't seem worth
2055  * guarding against this, though.
2056  */
2058  {
2059  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
2060  {
2061  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
2063  num_written++;
2064  }
2065  }
2066 
2067  /*
2068  * Measure progress independent of actually having to flush the buffer
2069  * - otherwise writing become unbalanced.
2070  */
2071  ts_stat->progress += ts_stat->progress_slice;
2072  ts_stat->num_scanned++;
2073  ts_stat->index++;
2074 
2075  /* Have all the buffers from the tablespace been processed? */
2076  if (ts_stat->num_scanned == ts_stat->num_to_scan)
2077  {
2078  binaryheap_remove_first(ts_heap);
2079  }
2080  else
2081  {
2082  /* update heap with the new progress */
2083  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
2084  }
2085 
2086  /*
2087  * Sleep to throttle our I/O rate.
2088  *
2089  * (This will check for barrier events even if it doesn't sleep.)
2090  */
2091  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
2092  }
2093 
2094  /* issue all pending flushes */
2095  IssuePendingWritebacks(&wb_context);
2096 
2097  pfree(per_ts_stat);
2098  per_ts_stat = NULL;
2099  binaryheap_free(ts_heap);
2100 
2101  /*
2102  * Update checkpoint statistics. As noted above, this doesn't include
2103  * buffers written by other backends or bgwriter scan.
2104  */
2105  CheckpointStats.ckpt_bufs_written += num_written;
2106 
2107  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
2108 }
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:683
PgStat_Counter m_buf_written_checkpoints
Definition: pgstat.h:456
#define BM_PERMANENT
Definition: buf_internals.h:66
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:226
Oid tsId
Definition: bufmgr.c:97
#define binaryheap_empty(h)
Definition: binaryheap.h:52
ForkNumber forkNum
Definition: buf_internals.h:93
#define PointerGetDatum(X)
Definition: postgres.h:556
ResourceOwner CurrentResourceOwner
Definition: resowner.c:144
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:65
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:464
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:4664
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:144
int checkpoint_flush_after
Definition: bufmgr.c:156
void binaryheap_replace_first(binaryheap *heap, Datum d)
Definition: binaryheap.c:204
unsigned int Oid
Definition: postgres_ext.h:31
#define BM_DIRTY
Definition: buf_internals.h:58
void binaryheap_add_unordered(binaryheap *heap, Datum d)
Definition: binaryheap.c:110
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2425
void IssuePendingWritebacks(WritebackContext *context)
Definition: bufmgr.c:4733
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:4687
void pfree(void *pointer)
Definition: mcxt.c:1057
double float8
Definition: c.h:553
Datum binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:159
int num_to_scan
Definition: bufmgr.c:110
float8 progress_slice
Definition: bufmgr.c:107
int index
Definition: bufmgr.c:115
float8 progress
Definition: bufmgr.c:106
static int ckpt_buforder_comparator(const void *pa, const void *pb)
Definition: bufmgr.c:4630
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:222
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:429
#define BUF_WRITTEN
Definition: bufmgr.c:68
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:934
int ckpt_bufs_written
Definition: xlog.h:252
BlockNumber blockNum
#define InvalidOid
Definition: postgres_ext.h:36
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:126
const symbol * s
Definition: header.h:17
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define Assert(condition)
Definition: c.h:792
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:36
CheckpointStatsData CheckpointStats
Definition: xlog.c:187
CkptSortItem * CkptBufferIds
Definition: buf_init.c:24
size_t Size
Definition: c.h:528
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:69
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:33
#define DatumGetPointer(X)
Definition: postgres.h:549
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:950
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:133
pg_atomic_uint32 state
Datum binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:174
int num_scanned
Definition: bufmgr.c:112
#define qsort(a, b, c, d)
Definition: port.h:503
ForkNumber forkNum
struct CkptTsStatus CkptTsStatus
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:221
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ buffertag_comparator()

static int buffertag_comparator ( const void *  p1,
const void *  p2 
)
static

Definition at line 4599 of file bufmgr.c.

References buftag::blockNum, buftag::forkNum, buftag::rnode, and rnode_comparator().

Referenced by IssuePendingWritebacks().

4600 {
4601  const BufferTag *ba = (const BufferTag *) a;
4602  const BufferTag *bb = (const BufferTag *) b;
4603  int ret;
4604 
4605  ret = rnode_comparator(&ba->rnode, &bb->rnode);
4606 
4607  if (ret != 0)
4608  return ret;
4609 
4610  if (ba->forkNum < bb->forkNum)
4611  return -1;
4612  if (ba->forkNum > bb->forkNum)
4613  return 1;
4614 
4615  if (ba->blockNum < bb->blockNum)
4616  return -1;
4617  if (ba->blockNum > bb->blockNum)
4618  return 1;
4619 
4620  return 0;
4621 }
ForkNumber forkNum
Definition: buf_internals.h:93
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4520
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92

◆ BufmgrCommit()

void BufmgrCommit ( void  )

Definition at line 2660 of file bufmgr.c.

Referenced by PrepareTransaction(), and RecordTransactionCommit().

2661 {
2662  /* Nothing to do in bufmgr anymore... */
2663 }

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 2566 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, PrintBufferLeakWarning(), PrivateRefCountArray, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

2567 {
2568 #ifdef USE_ASSERT_CHECKING
2569  int RefCountErrors = 0;
2570  PrivateRefCountEntry *res;
2571  int i;
2572 
2573  /* check the array */
2574  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
2575  {
2576  res = &PrivateRefCountArray[i];
2577 
2578  if (res->buffer != InvalidBuffer)
2579  {
2581  RefCountErrors++;
2582  }
2583  }
2584 
2585  /* if necessary search the hash */
2587  {
2588  HASH_SEQ_STATUS hstat;
2589 
2591  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
2592  {
2594  RefCountErrors++;
2595  }
2596 
2597  }
2598 
2599  Assert(RefCountErrors == 0);
2600 #endif
2601 }
void PrintBufferLeakWarning(Buffer buffer)
Definition: bufmgr.c:2607
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
#define Assert(condition)
Definition: c.h:792
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int i

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 2650 of file bufmgr.c.

References BufferSync().

Referenced by CheckPointGuts().

2651 {
2652  BufferSync(flags);
2653 }
static void BufferSync(int flags)
Definition: bufmgr.c:1845

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const void *  pa,
const void *  pb 
)
static

Definition at line 4630 of file bufmgr.c.

References CkptSortItem::blockNum, CkptSortItem::forkNum, CkptSortItem::relNode, and CkptSortItem::tsId.

Referenced by BufferSync().

4631 {
4632  const CkptSortItem *a = (const CkptSortItem *) pa;
4633  const CkptSortItem *b = (const CkptSortItem *) pb;
4634 
4635  /* compare tablespace */
4636  if (a->tsId < b->tsId)
4637  return -1;
4638  else if (a->tsId > b->tsId)
4639  return 1;
4640  /* compare relation */
4641  if (a->relNode < b->relNode)
4642  return -1;
4643  else if (a->relNode > b->relNode)
4644  return 1;
4645  /* compare fork */
4646  else if (a->forkNum < b->forkNum)
4647  return -1;
4648  else if (a->forkNum > b->forkNum)
4649  return 1;
4650  /* compare block number */
4651  else if (a->blockNum < b->blockNum)
4652  return -1;
4653  else if (a->blockNum > b->blockNum)
4654  return 1;
4655  /* equal page IDs are unlikely, but not impossible */
4656  return 0;
4657 }
BlockNumber blockNum
ForkNumber forkNum

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 3965 of file bufmgr.c.

References Assert, buf, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

3966 {
3967  BufferDesc *buf;
3968 
3969  Assert(BufferIsPinned(buffer));
3970  if (BufferIsLocal(buffer))
3971  return true; /* act as though we got it */
3972 
3973  buf = GetBufferDescriptor(buffer - 1);
3974 
3976  LW_EXCLUSIVE);
3977 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1378
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 4173 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid, ConditionalLockBuffer(), GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr.

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), lazy_scan_heap(), and lazy_vacuum_heap().

4174 {
4175  BufferDesc *bufHdr;
4176  uint32 buf_state,
4177  refcount;
4178 
4179  Assert(BufferIsValid(buffer));
4180 
4181  if (BufferIsLocal(buffer))
4182  {
4183  refcount = LocalRefCount[-buffer - 1];
4184  /* There should be exactly one pin */
4185  Assert(refcount > 0);
4186  if (refcount != 1)
4187  return false;
4188  /* Nobody else to wait for */
4189  return true;
4190  }
4191 
4192  /* There should be exactly one local pin */
4193  refcount = GetPrivateRefCount(buffer);
4194  Assert(refcount);
4195  if (refcount != 1)
4196  return false;
4197 
4198  /* Try to acquire lock */
4199  if (!ConditionalLockBuffer(buffer))
4200  return false;
4201 
4202  bufHdr = GetBufferDescriptor(buffer - 1);
4203  buf_state = LockBufHdr(bufHdr);
4204  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
4205 
4206  Assert(refcount > 0);
4207  if (refcount == 1)
4208  {
4209  /* Successfully acquired exclusive lock with pincount 1 */
4210  UnlockBufHdr(bufHdr, buf_state);
4211  return true;
4212  }
4213 
4214  /* Failed, so release the lock */
4215  UnlockBufHdr(bufHdr, buf_state);
4216  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4217  return false;
4218 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:429
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:3965
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3939
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:48

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 3337 of file bufmgr.c.

References buftag::blockNum, buf, BufferDescriptorGetBuffer, RelFileNode::dbNode, elog, buftag::forkNum, BufferDesc::freeNext, GetBufferDescriptor, GetPrivateRefCount(), i, InvalidateBuffer(), InvalidBackendId, LockBufHdr(), LOG, NBuffers, relpathbackend, relpathperm, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by dbase_redo(), dropdb(), and movedb().

3338 {
3339  int i;
3340 
3341  /*
3342  * We needn't consider local buffers, since by assumption the target
3343  * database isn't our own.
3344  */
3345 
3346  for (i = 0; i < NBuffers; i++)
3347  {
3348  BufferDesc *bufHdr = GetBufferDescriptor(i);
3349  uint32 buf_state;
3350 
3351  /*
3352  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3353  * and saves some cycles.
3354  */
3355  if (bufHdr->tag.rnode.dbNode != dbid)
3356  continue;
3357 
3358  buf_state = LockBufHdr(bufHdr);
3359  if (bufHdr->tag.rnode.dbNode == dbid)
3360  InvalidateBuffer(bufHdr); /* releases spinlock */
3361  else
3362  UnlockBufHdr(bufHdr, buf_state);
3363  }
3364 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1385
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:429
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:133

◆ DropRelFileNodeBuffers()

void DropRelFileNodeBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 2983 of file bufmgr.c.

References RelFileNodeBackend::backend, buftag::blockNum, BlockNumberIsValid, BUF_DROP_FULL_SCAN_THRESHOLD, DropRelFileNodeLocalBuffers(), FindAndDropRelFileNodeBuffers(), buftag::forkNum, GetBufferDescriptor, i, InvalidateBuffer(), InvalidBlockNumber, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, RelFileNodeBackend::node, RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, SMgrRelationData::smgr_rnode, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrtruncate().

2985 {
2986  int i;
2987  int j;
2988  RelFileNodeBackend rnode;
2989  BlockNumber nForkBlock[MAX_FORKNUM];
2990  BlockNumber nBlocksToInvalidate = 0;
2991 
2992  rnode = smgr_reln->smgr_rnode;
2993 
2994  /* If it's a local relation, it's localbuf.c's problem. */
2995  if (RelFileNodeBackendIsTemp(rnode))
2996  {
2997  if (rnode.backend == MyBackendId)
2998  {
2999  for (j = 0; j < nforks; j++)
3000  DropRelFileNodeLocalBuffers(rnode.node, forkNum[j],
3001  firstDelBlock[j]);
3002  }
3003  return;
3004  }
3005 
3006  /*
3007  * To remove all the pages of the specified relation forks from the buffer
3008  * pool, we need to scan the entire buffer pool but we can optimize it by
3009  * finding the buffers from BufMapping table provided we know the exact
3010  * size of each fork of the relation. The exact size is required to ensure
3011  * that we don't leave any buffer for the relation being dropped as
3012  * otherwise the background writer or checkpointer can lead to a PANIC
3013  * error while flushing buffers corresponding to files that don't exist.
3014  *
3015  * To know the exact size, we rely on the size cached for each fork by us
3016  * during recovery which limits the optimization to recovery and on
3017  * standbys but we can easily extend it once we have shared cache for
3018  * relation size.
3019  *
3020  * In recovery, we cache the value returned by the first lseek(SEEK_END)
3021  * and the future writes keeps the cached value up-to-date. See
3022  * smgrextend. It is possible that the value of the first lseek is smaller
3023  * than the actual number of existing blocks in the file due to buggy
3024  * Linux kernels that might not have accounted for the recent write. But
3025  * that should be fine because there must not be any buffers after that
3026  * file size.
3027  */
3028  for (i = 0; i < nforks; i++)
3029  {
3030  /* Get the number of blocks for a relation's fork */
3031  nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
3032 
3033  if (nForkBlock[i] == InvalidBlockNumber)
3034  {
3035  nBlocksToInvalidate = InvalidBlockNumber;
3036  break;
3037  }
3038 
3039  /* calculate the number of blocks to be invalidated */
3040  nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
3041  }
3042 
3043  /*
3044  * We apply the optimization iff the total number of blocks to invalidate
3045  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3046  */
3047  if (BlockNumberIsValid(nBlocksToInvalidate) &&
3048  nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3049  {
3050  for (j = 0; j < nforks; j++)
3051  FindAndDropRelFileNodeBuffers(rnode.node, forkNum[j],
3052  nForkBlock[j], firstDelBlock[j]);
3053  return;
3054  }
3055 
3056  for (i = 0; i < NBuffers; i++)
3057  {
3058  BufferDesc *bufHdr = GetBufferDescriptor(i);
3059  uint32 buf_state;
3060 
3061  /*
3062  * We can make this a tad faster by prechecking the buffer tag before
3063  * we attempt to lock the buffer; this saves a lot of lock
3064  * acquisitions in typical cases. It should be safe because the
3065  * caller must have AccessExclusiveLock on the relation, or some other
3066  * reason to be certain that no one is loading new pages of the rel
3067  * into the buffer pool. (Otherwise we might well miss such pages
3068  * entirely.) Therefore, while the tag might be changing while we
3069  * look at it, it can't be changing *to* a value we care about, only
3070  * *away* from such a value. So false negatives are impossible, and
3071  * false positives are safe because we'll recheck after getting the
3072  * buffer lock.
3073  *
3074  * We could check forkNum and blockNum as well as the rnode, but the
3075  * incremental win from doing so seems small.
3076  */
3077  if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
3078  continue;
3079 
3080  buf_state = LockBufHdr(bufHdr);
3081 
3082  for (j = 0; j < nforks; j++)
3083  {
3084  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
3085  bufHdr->tag.forkNum == forkNum[j] &&
3086  bufHdr->tag.blockNum >= firstDelBlock[j])
3087  {
3088  InvalidateBuffer(bufHdr); /* releases spinlock */
3089  break;
3090  }
3091  }
3092  if (j >= nforks)
3093  UnlockBufHdr(bufHdr, buf_state);
3094  }
3095 }
BackendId MyBackendId
Definition: globals.c:82
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ForkNumber forkNum
Definition: buf_internals.h:93
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1385
void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:326
uint32 BlockNumber
Definition: block.h:31
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:79
static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3276
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:572
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:429
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
BackendId backend
Definition: relfilenode.h:75
#define InvalidBlockNumber
Definition: block.h:33
#define MAX_FORKNUM
Definition: relpath.h:55
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:133
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ DropRelFileNodesAllBuffers()

void DropRelFileNodesAllBuffers ( SMgrRelation smgr_reln,
int  nnodes 
)

Definition at line 3107 of file bufmgr.c.

References BlockNumberIsValid, BUF_DROP_FULL_SCAN_THRESHOLD, DropRelFileNodeAllLocalBuffers(), FindAndDropRelFileNodeBuffers(), GetBufferDescriptor, i, InvalidateBuffer(), InvalidBlockNumber, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, SMgrRelationData::node, palloc(), pfree(), pg_qsort(), RelFileNodeBackendIsTemp, RelFileNodeEquals, RELS_BSEARCH_THRESHOLD, buftag::rnode, rnode_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrdounlinkall().

3108 {
3109  int i;
3110  int j;
3111  int n = 0;
3112  SMgrRelation *rels;
3113  BlockNumber (*block)[MAX_FORKNUM + 1];
3114  BlockNumber nBlocksToInvalidate = 0;
3115  RelFileNode *nodes;
3116  bool cached = true;
3117  bool use_bsearch;
3118 
3119  if (nnodes == 0)
3120  return;
3121 
3122  rels = palloc(sizeof(SMgrRelation) * nnodes); /* non-local relations */
3123 
3124  /* If it's a local relation, it's localbuf.c's problem. */
3125  for (i = 0; i < nnodes; i++)
3126  {
3127  if (RelFileNodeBackendIsTemp(smgr_reln[i]->smgr_rnode))
3128  {
3129  if (smgr_reln[i]->smgr_rnode.backend == MyBackendId)
3130  DropRelFileNodeAllLocalBuffers(smgr_reln[i]->smgr_rnode.node);
3131  }
3132  else
3133  rels[n++] = smgr_reln[i];
3134  }
3135 
3136  /*
3137  * If there are no non-local relations, then we're done. Release the
3138  * memory and return.
3139  */
3140  if (n == 0)
3141  {
3142  pfree(rels);
3143  return;
3144  }
3145 
3146  /*
3147  * This is used to remember the number of blocks for all the relations
3148  * forks.
3149  */
3150  block = (BlockNumber (*)[MAX_FORKNUM + 1])
3151  palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
3152 
3153  /*
3154  * We can avoid scanning the entire buffer pool if we know the exact size
3155  * of each of the given relation forks. See DropRelFileNodeBuffers.
3156  */
3157  for (i = 0; i < n && cached; i++)
3158  {
3159  for (j = 0; j <= MAX_FORKNUM; j++)
3160  {
3161  /* Get the number of blocks for a relation's fork. */
3162  block[i][j] = smgrnblocks_cached(rels[i], j);
3163 
3164  /* We need to only consider the relation forks that exists. */
3165  if (block[i][j] == InvalidBlockNumber)
3166  {
3167  if (!smgrexists(rels[i], j))
3168  continue;
3169  cached = false;
3170  break;
3171  }
3172 
3173  /* calculate the total number of blocks to be invalidated */
3174  nBlocksToInvalidate += block[i][j];
3175  }
3176  }
3177 
3178  /*
3179  * We apply the optimization iff the total number of blocks to invalidate
3180  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3181  */
3182  if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3183  {
3184  for (i = 0; i < n; i++)
3185  {
3186  for (j = 0; j <= MAX_FORKNUM; j++)
3187  {
3188  /* ignore relation forks that doesn't exist */
3189  if (!BlockNumberIsValid(block[i][j]))
3190  continue;
3191 
3192  /* drop all the buffers for a particular relation fork */
3193  FindAndDropRelFileNodeBuffers(rels[i]->smgr_rnode.node,
3194  j, block[i][j], 0);
3195  }
3196  }
3197 
3198  pfree(block);
3199  pfree(rels);
3200  return;
3201  }
3202 
3203  pfree(block);
3204  nodes = palloc(sizeof(RelFileNode) * n); /* non-local relations */
3205  for (i = 0; i < n; i++)
3206  nodes[i] = rels[i]->smgr_rnode.node;
3207 
3208  /*
3209  * For low number of relations to drop just use a simple walk through, to
3210  * save the bsearch overhead. The threshold to use is rather a guess than
3211  * an exactly determined value, as it depends on many factors (CPU and RAM
3212  * speeds, amount of shared buffers etc.).
3213  */
3214  use_bsearch = n > RELS_BSEARCH_THRESHOLD;
3215 
3216  /* sort the list of rnodes if necessary */
3217  if (use_bsearch)
3218  pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
3219 
3220  for (i = 0; i < NBuffers; i++)
3221  {
3222  RelFileNode *rnode = NULL;
3223  BufferDesc *bufHdr = GetBufferDescriptor(i);
3224  uint32 buf_state;
3225 
3226  /*
3227  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3228  * and saves some cycles.
3229  */
3230 
3231  if (!use_bsearch)
3232  {
3233  int j;
3234 
3235  for (j = 0; j < n; j++)
3236  {
3237  if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
3238  {
3239  rnode = &nodes[j];
3240  break;
3241  }
3242  }
3243  }
3244  else
3245  {
3246  rnode = bsearch((const void *) &(bufHdr->tag.rnode),
3247  nodes, n, sizeof(RelFileNode),
3249  }
3250 
3251  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3252  if (rnode == NULL)
3253  continue;
3254 
3255  buf_state = LockBufHdr(bufHdr);
3256  if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
3257  InvalidateBuffer(bufHdr); /* releases spinlock */
3258  else
3259  UnlockBufHdr(bufHdr, buf_state);
3260  }
3261 
3262  pfree(nodes);
3263  pfree(rels);
3264 }
BackendId MyBackendId
Definition: globals.c:82
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1385
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:71
uint32 BlockNumber
Definition: block.h:31
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:79
static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3276
void DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
Definition: localbuf.c:373
void pfree(void *pointer)
Definition: mcxt.c:1057
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:572
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:429
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4520
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define InvalidBlockNumber
Definition: block.h:33
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
Definition: qsort.c:113
#define MAX_FORKNUM
Definition: relpath.h:55
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:950
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:133
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
dlist_node node
Definition: smgr.h:72

◆ FindAndDropRelFileNodeBuffers()

static void FindAndDropRelFileNodeBuffers ( RelFileNode  rnode,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 3276 of file bufmgr.c.

References buftag::blockNum, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), buftag::forkNum, GetBufferDescriptor, INIT_BUFFERTAG, InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeEquals, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

3279 {
3280  BlockNumber curBlock;
3281 
3282  for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
3283  {
3284  uint32 bufHash; /* hash value for tag */
3285  BufferTag bufTag; /* identity of requested block */
3286  LWLock *bufPartitionLock; /* buffer partition lock for it */
3287  int buf_id;
3288  BufferDesc *bufHdr;
3289  uint32 buf_state;
3290 
3291  /* create a tag so we can lookup the buffer */
3292  INIT_BUFFERTAG(bufTag, rnode, forkNum, curBlock);
3293 
3294  /* determine its hash code and partition lock ID */
3295  bufHash = BufTableHashCode(&bufTag);
3296  bufPartitionLock = BufMappingPartitionLock(bufHash);
3297 
3298  /* Check that it is in the buffer pool. If not, do nothing. */
3299  LWLockAcquire(bufPartitionLock, LW_SHARED);
3300  buf_id = BufTableLookup(&bufTag, bufHash);
3301  LWLockRelease(bufPartitionLock);
3302 
3303  if (buf_id < 0)
3304  continue;
3305 
3306  bufHdr = GetBufferDescriptor(buf_id);
3307 
3308  /*
3309  * We need to lock the buffer header and recheck if the buffer is
3310  * still associated with the same block because the buffer could be
3311  * evicted by some other backend loading blocks for a different
3312  * relation after we release lock on the BufMapping table.
3313  */
3314  buf_state = LockBufHdr(bufHdr);
3315 
3316  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
3317  bufHdr->tag.forkNum == forkNum &&
3318  bufHdr->tag.blockNum >= firstDelBlock)
3319  InvalidateBuffer(bufHdr); /* releases spinlock */
3320  else
3321  UnlockBufHdr(bufHdr, buf_state);
3322  }
3323 }
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
ForkNumber forkNum
Definition: buf_internals.h:93
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1385
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
uint32 BlockNumber
Definition: block.h:31
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:429
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag
#define UnlockBufHdr(desc, s)
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln 
)
static

Definition at line 2734 of file bufmgr.c.

References ErrorContextCallback::arg, BufferUsage::blk_write_time, buftag::blockNum, BM_JUST_DIRTIED, BM_PERMANENT, BufferGetLSN, BufHdrGetBlock, ErrorContextCallback::callback, RelFileNode::dbNode, error_context_stack, buftag::forkNum, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBackendId, LockBufHdr(), RelFileNodeBackend::node, PageSetChecksumCopy(), pgBufferUsage, pgstat_count_buffer_write_time, ErrorContextCallback::previous, RelFileNode::relNode, buftag::rnode, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rnode, smgropen(), smgrwrite(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr, and XLogFlush().

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

2735 {
2736  XLogRecPtr recptr;
2737  ErrorContextCallback errcallback;
2738  instr_time io_start,
2739  io_time;
2740  Block bufBlock;
2741  char *bufToWrite;
2742  uint32 buf_state;
2743 
2744  /*
2745  * Acquire the buffer's io_in_progress lock. If StartBufferIO returns
2746  * false, then someone else flushed the buffer before we could, so we need
2747  * not do anything.
2748  */
2749  if (!StartBufferIO(buf, false))
2750  return;
2751 
2752  /* Setup error traceback support for ereport() */
2754  errcallback.arg = (void *) buf;
2755  errcallback.previous = error_context_stack;
2756  error_context_stack = &errcallback;
2757 
2758  /* Find smgr relation for buffer */
2759  if (reln == NULL)
2760  reln = smgropen(buf->tag.rnode, InvalidBackendId);
2761 
2762  TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
2763  buf->tag.blockNum,
2764  reln->smgr_rnode.node.spcNode,
2765  reln->smgr_rnode.node.dbNode,
2766  reln->smgr_rnode.node.relNode);
2767 
2768  buf_state = LockBufHdr(buf);
2769 
2770  /*
2771  * Run PageGetLSN while holding header lock, since we don't have the
2772  * buffer locked exclusively in all cases.
2773  */
2774  recptr = BufferGetLSN(buf);
2775 
2776  /* To check if block content changes while flushing. - vadim 01/17/97 */
2777  buf_state &= ~BM_JUST_DIRTIED;
2778  UnlockBufHdr(buf, buf_state);
2779 
2780  /*
2781  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
2782  * rule that log updates must hit disk before any of the data-file changes
2783  * they describe do.
2784  *
2785  * However, this rule does not apply to unlogged relations, which will be
2786  * lost after a crash anyway. Most unlogged relation pages do not bear
2787  * LSNs since we never emit WAL records for them, and therefore flushing
2788  * up through the buffer LSN would be useless, but harmless. However,
2789  * GiST indexes use LSNs internally to track page-splits, and therefore
2790  * unlogged GiST pages bear "fake" LSNs generated by
2791  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
2792  * LSN counter could advance past the WAL insertion point; and if it did
2793  * happen, attempting to flush WAL through that location would fail, with
2794  * disastrous system-wide consequences. To make sure that can't happen,
2795  * skip the flush if the buffer isn't permanent.
2796  */
2797  if (buf_state & BM_PERMANENT)
2798  XLogFlush(recptr);
2799 
2800  /*
2801  * Now it's safe to write buffer to disk. Note that no one else should
2802  * have been able to write it while we were busy with log flushing because
2803  * we have the io_in_progress lock.
2804  */
2805  bufBlock = BufHdrGetBlock(buf);
2806 
2807  /*
2808  * Update page checksum if desired. Since we have only shared lock on the
2809  * buffer, other processes might be updating hint bits in it, so we must
2810  * copy the page to private storage if we do checksumming.
2811  */
2812  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
2813 
2814  if (track_io_timing)
2815  INSTR_TIME_SET_CURRENT(io_start);
2816 
2817  /*
2818  * bufToWrite is either the shared buffer or a copy, as appropriate.
2819  */
2820  smgrwrite(reln,
2821  buf->tag.forkNum,
2822  buf->tag.blockNum,
2823  bufToWrite,
2824  false);
2825 
2826  if (track_io_timing)
2827  {
2828  INSTR_TIME_SET_CURRENT(io_time);
2829  INSTR_TIME_SUBTRACT(io_time, io_start);
2832  }
2833 
2835 
2836  /*
2837  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
2838  * end the io_in_progress state.
2839  */
2840  TerminateBufferIO(buf, true, 0);
2841 
2842  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(buf->tag.forkNum,
2843  buf->tag.blockNum,
2844  reln->smgr_rnode.node.spcNode,
2845  reln->smgr_rnode.node.dbNode,
2846  reln->smgr_rnode.node.relNode);
2847 
2848  /* Pop the error context stack */
2849  error_context_stack = errcallback.previous;
2850 }
#define BM_PERMANENT
Definition: buf_internals.h:66
ForkNumber forkNum
Definition: buf_internals.h:93
struct timeval instr_time
Definition: instr_time.h:150
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1393
void(* callback)(void *arg)
Definition: elog.h:243
struct ErrorContextCallback * previous
Definition: elog.h:242
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2860
ErrorContextCallback * error_context_stack
Definition: elog.c:93
long shared_blks_written
Definition: instrument.h:24
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4330
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:158
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define BM_JUST_DIRTIED
Definition: buf_internals.h:63
unsigned int uint32
Definition: c.h:429
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4397
#define InvalidBackendId
Definition: backendid.h:23
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
instr_time blk_write_time
Definition: instrument.h:32
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:1565
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
BufferTag tag
#define UnlockBufHdr(desc, s)
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:61
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4482
bool track_io_timing
Definition: bufmgr.c:135
Pointer Page
Definition: bufpage.h:78
BufferUsage pgBufferUsage
Definition: instrument.c:20
void * Block
Definition: bufmgr.h:24

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 3637 of file bufmgr.c.

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by dbase_redo().

3638 {
3639  int i;
3640  BufferDesc *bufHdr;
3641 
3642  /* Make sure we can handle the pin inside the loop */
3644 
3645  for (i = 0; i < NBuffers; i++)
3646  {
3647  uint32 buf_state;
3648 
3649  bufHdr = GetBufferDescriptor(i);
3650 
3651  /*
3652  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3653  * and saves some cycles.
3654  */
3655  if (bufHdr->tag.rnode.dbNode != dbid)
3656  continue;
3657 
3659 
3660  buf_state = LockBufHdr(bufHdr);
3661  if (bufHdr->tag.rnode.dbNode == dbid &&
3662  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3663  {
3664  PinBuffer_Locked(bufHdr);
3666  FlushBuffer(bufHdr, NULL);
3668  UnpinBuffer(bufHdr, true);
3669  }
3670  else
3671  UnlockBufHdr(bufHdr, buf_state);
3672  }
3673 }
ResourceOwner CurrentResourceOwner
Definition: resowner.c:144
#define BM_DIRTY
Definition: buf_internals.h:58
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2734
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:429
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1752
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:934
#define BM_VALID
Definition: buf_internals.h:59
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1707
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:133
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 3680 of file bufmgr.c.

References Assert, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor, and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

3681 {
3682  BufferDesc *bufHdr;
3683 
3684  /* currently not needed, but no fundamental reason not to support */
3685  Assert(!BufferIsLocal(buffer));
3686 
3687  Assert(BufferIsPinned(buffer));
3688 
3689  bufHdr = GetBufferDescriptor(buffer - 1);
3690 
3692 
3693  FlushBuffer(bufHdr, NULL);
3694 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1926
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2734
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 3441 of file bufmgr.c.

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock, ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, i, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, RelationUsesLocalBuffers, RelFileNodeEquals, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, smgrwrite(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by heapam_relation_copy_data(), and index_copy_data().

3442 {
3443  int i;
3444  BufferDesc *bufHdr;
3445 
3446  /* Open rel at the smgr level if not already done */
3447  RelationOpenSmgr(rel);
3448 
3449  if (RelationUsesLocalBuffers(rel))
3450  {
3451  for (i = 0; i < NLocBuffer; i++)
3452  {
3453  uint32 buf_state;
3454 
3455  bufHdr = GetLocalBufferDescriptor(i);
3456  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3457  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
3458  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3459  {
3460  ErrorContextCallback errcallback;
3461  Page localpage;
3462 
3463  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
3464 
3465  /* Setup error traceback support for ereport() */
3467  errcallback.arg = (void *) bufHdr;
3468  errcallback.previous = error_context_stack;
3469  error_context_stack = &errcallback;
3470 
3471  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
3472 
3473  smgrwrite(rel->rd_smgr,
3474  bufHdr->tag.forkNum,
3475  bufHdr->tag.blockNum,
3476  localpage,
3477  false);
3478 
3479  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
3480  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
3481 
3482  /* Pop the error context stack */
3483  error_context_stack = errcallback.previous;
3484  }
3485  }
3486 
3487  return;
3488  }
3489 
3490  /* Make sure we can handle the pin inside the loop */
3492 
3493  for (i = 0; i < NBuffers; i++)
3494  {
3495  uint32 buf_state;
3496 
3497  bufHdr = GetBufferDescriptor(i);
3498 
3499  /*
3500  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3501  * and saves some cycles.
3502  */
3503  if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
3504  continue;
3505 
3507 
3508  buf_state = LockBufHdr(bufHdr);
3509  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3510  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3511  {
3512  PinBuffer_Locked(bufHdr);
3514  FlushBuffer(bufHdr, rel->rd_smgr);
3516  UnpinBuffer(bufHdr, true);
3517  }
3518  else
3519  UnlockBufHdr(bufHdr, buf_state);
3520  }
3521 }
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:64
ForkNumber forkNum
Definition: buf_internals.h:93
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4501
ResourceOwner CurrentResourceOwner
Definition: resowner.c:144
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
#define GetLocalBufferDescriptor(id)
#define BM_DIRTY
Definition: buf_internals.h:58
void(* callback)(void *arg)
Definition: elog.h:243
struct ErrorContextCallback * previous
Definition: elog.h:242
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2734
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
ErrorContextCallback * error_context_stack
Definition: elog.c:93
#define RelationOpenSmgr(relation)
Definition: rel.h:514
int NLocBuffer
Definition: localbuf.c:41
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:523
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:63
unsigned int uint32
Definition: c.h:429
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1752
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:934
#define BM_VALID
Definition: buf_internals.h:59
RelFileNode rd_node
Definition: rel.h:55
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1707
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1422
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:573
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:133
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:277
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
pg_atomic_uint32 state
Pointer Page
Definition: bufpage.h:78
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 3533 of file bufmgr.c.

References Assert, BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, RelFileNodeBackend::node, palloc(), pfree(), pg_qsort(), PinBuffer_Locked(), RelFileNodeBackendIsTemp, RelFileNodeEquals, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, SMgrSortArray::rnode, rnode_comparator(), SMgrRelationData::smgr_rnode, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by smgrdosyncall().

3534 {
3535  int i;
3536  SMgrSortArray *srels;
3537  bool use_bsearch;
3538 
3539  if (nrels == 0)
3540  return;
3541 
3542  /* fill-in array for qsort */
3543  srels = palloc(sizeof(SMgrSortArray) * nrels);
3544 
3545  for (i = 0; i < nrels; i++)
3546  {
3547  Assert(!RelFileNodeBackendIsTemp(smgrs[i]->smgr_rnode));
3548 
3549  srels[i].rnode = smgrs[i]->smgr_rnode.node;
3550  srels[i].srel = smgrs[i];
3551  }
3552 
3553  /*
3554  * Save the bsearch overhead for low number of relations to sync. See
3555  * DropRelFileNodesAllBuffers for details.
3556  */
3557  use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
3558 
3559  /* sort the list of SMgrRelations if necessary */
3560  if (use_bsearch)
3561  pg_qsort(srels, nrels, sizeof(SMgrSortArray), rnode_comparator);
3562 
3563  /* Make sure we can handle the pin inside the loop */
3565 
3566  for (i = 0; i < NBuffers; i++)
3567  {
3568  SMgrSortArray *srelent = NULL;
3569  BufferDesc *bufHdr = GetBufferDescriptor(i);
3570  uint32 buf_state;
3571 
3572  /*
3573  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3574  * and saves some cycles.
3575  */
3576 
3577  if (!use_bsearch)
3578  {
3579  int j;
3580 
3581  for (j = 0; j < nrels; j++)
3582  {
3583  if (RelFileNodeEquals(bufHdr->tag.rnode, srels[j].rnode))
3584  {
3585  srelent = &srels[j];
3586  break;
3587  }
3588  }
3589 
3590  }
3591  else
3592  {
3593  srelent = bsearch((const void *) &(bufHdr->tag.rnode),
3594  srels, nrels, sizeof(SMgrSortArray),
3596  }
3597 
3598  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3599  if (srelent == NULL)
3600  continue;
3601 
3603 
3604  buf_state = LockBufHdr(bufHdr);
3605  if (RelFileNodeEquals(bufHdr->tag.rnode, srelent->rnode) &&
3606  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3607  {
3608  PinBuffer_Locked(bufHdr);
3610  FlushBuffer(bufHdr, srelent->srel);
3612  UnpinBuffer(bufHdr, true);
3613  }
3614  else
3615  UnlockBufHdr(bufHdr, buf_state);
3616  }
3617 
3618  pfree(srels);
3619 }
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ResourceOwner CurrentResourceOwner
Definition: resowner.c:144
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:71
#define BM_DIRTY
Definition: buf_internals.h:58
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2734
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
void pfree(void *pointer)
Definition: mcxt.c:1057
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
SMgrRelation srel
Definition: bufmgr.c:128
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:429
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1752
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:934
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4520
#define BM_VALID
Definition: buf_internals.h:59
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define Assert(condition)
Definition: c.h:792
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1707
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
Definition: qsort.c:113
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:950
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:133
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
RelFileNode rnode
Definition: bufmgr.c:127
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 410 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, and REFCOUNT_ARRAY_ENTRIES.

Referenced by UnpinBuffer().

411 {
412  Assert(ref->refcount == 0);
413 
414  if (ref >= &PrivateRefCountArray[0] &&
416  {
417  ref->buffer = InvalidBuffer;
418 
419  /*
420  * Mark the just used entry as reserved - in many scenarios that
421  * allows us to avoid ever having to search the array/hash for free
422  * entries.
423  */
424  ReservedRefCountEntry = ref;
425  }
426  else
427  {
428  bool found;
429  Buffer buffer = ref->buffer;
430 
432  (void *) &buffer,
433  HASH_REMOVE,
434  &found);
435  Assert(found);
438  }
439 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define Assert(condition)
Definition: c.h:792
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int Buffer
Definition: buf.h:23

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 387 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsValid, GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), and PrintBufferLeakWarning().

388 {
390 
391  Assert(BufferIsValid(buffer));
392  Assert(!BufferIsLocal(buffer));
393 
394  /*
395  * Not moving the entry - that's ok for the current users, but we might
396  * want to change this one day.
397  */
398  ref = GetPrivateRefCountEntry(buffer, false);
399 
400  if (ref == NULL)
401  return 0;
402  return ref->refcount;
403 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 307 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid, free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBuffer().

308 {
310  int i;
311 
312  Assert(BufferIsValid(buffer));
313  Assert(!BufferIsLocal(buffer));
314 
315  /*
316  * First search for references in the array, that'll be sufficient in the
317  * majority of cases.
318  */
319  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
320  {
321  res = &PrivateRefCountArray[i];
322 
323  if (res->buffer == buffer)
324  return res;
325  }
326 
327  /*
328  * By here we know that the buffer, if already pinned, isn't residing in
329  * the array.
330  *
331  * Only look up the buffer in the hashtable if we've previously overflowed
332  * into it.
333  */
334  if (PrivateRefCountOverflowed == 0)
335  return NULL;
336 
338  (void *) &buffer,
339  HASH_FIND,
340  NULL);
341 
342  if (res == NULL)
343  return NULL;
344  else if (!do_move)
345  {
346  /* caller doesn't want us to move the hash entry into the array */
347  return res;
348  }
349  else
350  {
351  /* move buffer from hashtable into the free array slot */
352  bool found;
354 
355  /* Ensure there's a free array slot */
357 
358  /* Use up the reserved slot */
359  Assert(ReservedRefCountEntry != NULL);
360  free = ReservedRefCountEntry;
361  ReservedRefCountEntry = NULL;
362  Assert(free->buffer == InvalidBuffer);
363 
364  /* and fill it */
365  free->buffer = buffer;
366  free->refcount = res->refcount;
367 
368  /* delete from hashtable */
370  (void *) &buffer,
371  HASH_REMOVE,
372  &found);
373  Assert(found);
376 
377  return free;
378  }
379 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:199
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define free(a)
Definition: header.h:65
#define Assert(condition)
Definition: c.h:792
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:88
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198
int i
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 4147 of file bufmgr.c.

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and RecoveryConflictInterrupt().

4148 {
4149  int bufid = GetStartupBufferPinWaitBufId();
4150 
4151  /*
4152  * If we get woken slowly then it's possible that the Startup process was
4153  * already woken by other backends before we got here. Also possible that
4154  * we get here by multiple interrupts or interrupts at inappropriate
4155  * times, so make sure we do nothing if the bufid is not set.
4156  */
4157  if (bufid < 0)
4158  return false;
4159 
4160  if (GetPrivateRefCount(bufid + 1) > 0)
4161  return true;
4162 
4163  return false;
4164 }
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:653

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 3738 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlargeBuffers(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

3739 {
3740  Assert(BufferIsPinned(buffer));
3742  if (BufferIsLocal(buffer))
3743  LocalRefCount[-buffer - 1]++;
3744  else
3745  {
3746  PrivateRefCountEntry *ref;
3747 
3748  ref = GetPrivateRefCountEntry(buffer, true);
3749  Assert(ref != NULL);
3750  ref->refcount++;
3751  }
3753 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
ResourceOwner CurrentResourceOwner
Definition: resowner.c:144
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:947
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:934
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
int32 * LocalRefCount
Definition: localbuf.c:45

◆ InitBufferPoolAccess()

void InitBufferPoolAccess ( void  )

Definition at line 2514 of file bufmgr.c.

References HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, and PrivateRefCountArray.

Referenced by BaseInit().

2515 {
2516  HASHCTL hash_ctl;
2517 
2518  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
2519 
2520  hash_ctl.keysize = sizeof(int32);
2521  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
2522 
2523  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
2524  HASH_ELEM | HASH_BLOBS);
2525 }
struct PrivateRefCountEntry PrivateRefCountEntry
#define HASH_ELEM
Definition: hsearch.h:95
Size entrysize
Definition: hsearch.h:76
signed int int32
Definition: c.h:417
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:349
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:197
#define HASH_BLOBS
Definition: hsearch.h:97
Size keysize
Definition: hsearch.h:75
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:198

◆ InitBufferPoolBackend()

void InitBufferPoolBackend ( void  )

Definition at line 2537 of file bufmgr.c.

References AtProcExit_Buffers(), and on_shmem_exit().

Referenced by AuxiliaryProcessMain(), and InitPostgres().

2538 {
2540 }
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:2547

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1385 of file bufmgr.c.

References Assert, BM_LOCKED, BM_TAG_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer, BUFFERTAGS_EQUAL, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), CLEAR_BUFFERTAG, elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr, and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), and FindAndDropRelFileNodeBuffers().

1386 {
1387  BufferTag oldTag;
1388  uint32 oldHash; /* hash value for oldTag */
1389  LWLock *oldPartitionLock; /* buffer partition lock for it */
1390  uint32 oldFlags;
1391  uint32 buf_state;
1392 
1393  /* Save the original buffer tag before dropping the spinlock */
1394  oldTag = buf->tag;
1395 
1396  buf_state = pg_atomic_read_u32(&buf->state);
1397  Assert(buf_state & BM_LOCKED);
1398  UnlockBufHdr(buf, buf_state);
1399 
1400  /*
1401  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1402  * worth storing the hashcode in BufferDesc so we need not recompute it
1403  * here? Probably not.
1404  */
1405  oldHash = BufTableHashCode(&oldTag);
1406  oldPartitionLock = BufMappingPartitionLock(oldHash);
1407 
1408 retry:
1409 
1410  /*
1411  * Acquire exclusive mapping lock in preparation for changing the buffer's
1412  * association.
1413  */
1414  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1415 
1416  /* Re-lock the buffer header */
1417  buf_state = LockBufHdr(buf);
1418 
1419  /* If it's changed while we were waiting for lock, do nothing */
1420  if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
1421  {
1422  UnlockBufHdr(buf, buf_state);
1423  LWLockRelease(oldPartitionLock);
1424  return;
1425  }
1426 
1427  /*
1428  * We assume the only reason for it to be pinned is that someone else is
1429  * flushing the page out. Wait for them to finish. (This could be an
1430  * infinite loop if the refcount is messed up... it would be nice to time
1431  * out after awhile, but there seems no way to be sure how many loops may
1432  * be needed. Note that if the other guy has pinned the buffer but not
1433  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1434  * be busy-looping here.)
1435  */
1436  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1437  {
1438  UnlockBufHdr(buf, buf_state);
1439  LWLockRelease(oldPartitionLock);
1440  /* safety check: should definitely not be our *own* pin */
1442  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1443  WaitIO(buf);
1444  goto retry;
1445  }
1446 
1447  /*
1448  * Clear out the buffer's tag and flags. We must do this to ensure that
1449  * linear scans of the buffer array don't think the buffer is valid.
1450  */
1451  oldFlags = buf_state & BUF_FLAG_MASK;
1452  CLEAR_BUFFERTAG(buf->tag);
1453  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1454  UnlockBufHdr(buf, buf_state);
1455 
1456  /*
1457  * Remove the buffer from the lookup hashtable, if it was in there.
1458  */
1459  if (oldFlags & BM_TAG_VALID)
1460  BufTableDelete(&oldTag, oldHash);
1461 
1462  /*
1463  * Done with mapping lock.
1464  */
1465  LWLockRelease(oldPartitionLock);
1466 
1467  /*
1468  * Insert the buffer at the head of the list of free buffers.
1469  */
1470  StrategyFreeBuffer(buf);
1471 }
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:60
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:4283
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:364
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
#define ERROR
Definition: elog.h:45
#define BUF_FLAG_MASK
Definition: buf_internals.h:45
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:429
#define BUFFERTAGS_EQUAL(a, b)
#define BM_LOCKED
Definition: buf_internals.h:57
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define Assert(condition)
Definition: c.h:792
#define CLEAR_BUFFERTAG(a)
Definition: buf_internals.h:97
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:42
#define BufferDescriptorGetBuffer(bdesc)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
BufferTag tag
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:228
pg_atomic_uint32 state
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:48
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 4229 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr.

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), hash_xlog_split_allocate_page(), and hashbucketcleanup().

4230 {
4231  BufferDesc *bufHdr;
4232  uint32 buf_state;
4233 
4234  Assert(BufferIsValid(buffer));
4235 
4236  if (BufferIsLocal(buffer))
4237  {
4238  /* There should be exactly one pin */
4239  if (LocalRefCount[-buffer - 1] != 1)
4240  return false;
4241  /* Nobody else to wait for */
4242  return true;
4243  }
4244 
4245  /* There should be exactly one local pin */
4246  if (GetPrivateRefCount(buffer) != 1)
4247  return false;
4248 
4249  bufHdr = GetBufferDescriptor(buffer - 1);
4250 
4251  /* caller must hold exclusive lock on buffer */
4253  LW_EXCLUSIVE));
4254 
4255  buf_state = LockBufHdr(bufHdr);
4256 
4257  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4258  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4259  {
4260  /* pincount is OK. */
4261  UnlockBufHdr(bufHdr, buf_state);
4262  return true;
4263  }
4264 
4265  UnlockBufHdr(bufHdr, buf_state);
4266  return false;
4267 }
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1944
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:429
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:48

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext context)

Definition at line 4733 of file bufmgr.c.

References buftag::blockNum, buffertag_comparator(), cur, buftag::forkNum, i, InvalidBackendId, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, qsort, RelFileNodeEquals, buftag::rnode, smgropen(), smgrwriteback(), and PendingWriteback::tag.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

4734 {
4735  int i;
4736 
4737  if (context->nr_pending == 0)
4738  return;
4739 
4740  /*
4741  * Executing the writes in-order can make them a lot faster, and allows to
4742  * merge writeback requests to consecutive blocks into larger writebacks.
4743  */
4744  qsort(&context->pending_writebacks, context->nr_pending,
4746 
4747  /*
4748  * Coalesce neighbouring writes, but nothing else. For that we iterate
4749  * through the, now sorted, array of pending flushes, and look forward to
4750  * find all neighbouring (or identical) writes.
4751  */
4752  for (i = 0; i < context->nr_pending; i++)
4753  {
4756  SMgrRelation reln;
4757  int ahead;
4758  BufferTag tag;
4759  Size nblocks = 1;
4760 
4761  cur = &context->pending_writebacks[i];
4762  tag = cur->tag;
4763 
4764  /*
4765  * Peek ahead, into following writeback requests, to see if they can
4766  * be combined with the current one.
4767  */
4768  for (ahead = 0; i + ahead + 1 < context->nr_pending; ahead++)
4769  {
4770  next = &context->pending_writebacks[i + ahead + 1];
4771 
4772  /* different file, stop */
4773  if (!RelFileNodeEquals(cur->tag.rnode, next->tag.rnode) ||
4774  cur->tag.forkNum != next->tag.forkNum)
4775  break;
4776 
4777  /* ok, block queued twice, skip */
4778  if (cur->tag.blockNum == next->tag.blockNum)
4779  continue;
4780 
4781  /* only merge consecutive writes */
4782  if (cur->tag.blockNum + 1 != next->tag.blockNum)
4783  break;
4784 
4785  nblocks++;
4786  cur = next;
4787  }
4788 
4789  i += ahead;
4790 
4791  /* and finally tell the kernel to write the data to storage */
4792  reln = smgropen(tag.rnode, InvalidBackendId);
4793  smgrwriteback(reln, tag.forkNum, tag.blockNum, nblocks);
4794  }
4795 
4796  context->nr_pending = 0;
4797 }
static int32 next
Definition: blutils.c:219
ForkNumber forkNum
Definition: buf_internals.h:93
struct cursor * cur
Definition: ecpg.c:28
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:536
static int buffertag_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4599
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
#define InvalidBackendId
Definition: backendid.h:23
size_t Size
Definition: c.h:528
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
int i
#define qsort(a, b, c, d)
Definition: port.h:503
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 4501 of file bufmgr.c.

References buftag::blockNum, errcontext, buftag::forkNum, MyBackendId, pfree(), relpathbackend, buftag::rnode, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

4502 {
4503  BufferDesc *bufHdr = (BufferDesc *) arg;
4504 
4505  if (bufHdr != NULL)
4506  {
4507  char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId,
4508  bufHdr->tag.forkNum);
4509 
4510  errcontext("writing block %u of relation %s",
4511  bufHdr->tag.blockNum, path);
4512  pfree(path);
4513  }
4514 }
BackendId MyBackendId
Definition: globals.c:82
ForkNumber forkNum
Definition: buf_internals.h:93
void pfree(void *pointer)
Definition: mcxt.c:1057
BlockNumber blockNum
Definition: buf_internals.h:94
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag
#define errcontext
Definition: elog.h:199
void * arg
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:78

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 3939 of file bufmgr.c.

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), and LWLockRelease().

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_getnewbuf(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), brinbuild(), brinbuildempty(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), checkXLogConsistency(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), fill_seq_with_data(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbuildempty(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgetpage(), heapgettup(), initBloomState(), lazy_scan_heap(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

3940 {
3941  BufferDesc *buf;
3942 
3943  Assert(BufferIsPinned(buffer));
3944  if (BufferIsLocal(buffer))
3945  return; /* local buffers need no lock */
3946 
3947  buf = GetBufferDescriptor(buffer - 1);
3948 
3949  if (mode == BUFFER_LOCK_UNLOCK)
3951  else if (mode == BUFFER_LOCK_SHARE)
3953  else if (mode == BUFFER_LOCK_EXCLUSIVE)
3955  else
3956  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
3957 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
static PgChecksumMode mode
Definition: pg_checksums.c:61
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
#define ERROR
Definition: elog.h:45
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
#define elog(elevel,...)
Definition: elog.h:228
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 3996 of file bufmgr.c.

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, DeadlockTimeout, elog, ERROR, get_ps_display(), GetBufferDescriptor, GetCurrentTimestamp(), GetPrivateRefCount(), InHotStandby, LocalRefCount, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcPid, now(), palloc(), pfree(), PG_WAIT_BUFFER_PIN, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr, update_process_title, and BufferDesc::wait_backend_pid.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), ReadBuffer_common(), and XLogReadBufferForRedoExtended().

3997 {
3998  BufferDesc *bufHdr;
3999  char *new_status = NULL;
4000  TimestampTz waitStart = 0;
4001  bool logged_recovery_conflict = false;
4002 
4003  Assert(BufferIsPinned(buffer));
4004  Assert(PinCountWaitBuf == NULL);
4005 
4006  if (BufferIsLocal(buffer))
4007  {
4008  /* There should be exactly one pin */
4009  if (LocalRefCount[-buffer - 1] != 1)
4010  elog(ERROR, "incorrect local pin count: %d",
4011  LocalRefCount[-buffer - 1]);
4012  /* Nobody else to wait for */
4013  return;
4014  }
4015 
4016  /* There should be exactly one local pin */
4017  if (GetPrivateRefCount(buffer) != 1)
4018  elog(ERROR, "incorrect local pin count: %d",
4019  GetPrivateRefCount(buffer));
4020 
4021  bufHdr = GetBufferDescriptor(buffer - 1);
4022 
4023  for (;;)
4024  {
4025  uint32 buf_state;
4026 
4027  /* Try to acquire lock */
4029  buf_state = LockBufHdr(bufHdr);
4030 
4031  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4032  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4033  {
4034  /* Successfully acquired exclusive lock with pincount 1 */
4035  UnlockBufHdr(bufHdr, buf_state);
4036 
4037  /*
4038  * Emit the log message if recovery conflict on buffer pin was
4039  * resolved but the startup process waited longer than
4040  * deadlock_timeout for it.
4041  */
4042  if (logged_recovery_conflict)
4044  waitStart, GetCurrentTimestamp(),
4045  NULL, false);
4046 
4047  /* Report change to non-waiting status */
4048  if (new_status)
4049  {
4050  set_ps_display(new_status);
4051  pfree(new_status);
4052  }
4053  return;
4054  }
4055  /* Failed, so mark myself as waiting for pincount 1 */
4056  if (buf_state & BM_PIN_COUNT_WAITER)
4057  {
4058  UnlockBufHdr(bufHdr, buf_state);
4059  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4060  elog(ERROR, "multiple backends attempting to wait for pincount 1");
4061  }
4062  bufHdr->wait_backend_pid = MyProcPid;
4063  PinCountWaitBuf = bufHdr;
4064  buf_state |= BM_PIN_COUNT_WAITER;
4065  UnlockBufHdr(bufHdr, buf_state);
4066  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4067 
4068  /* Wait to be signaled by UnpinBuffer() */
4069  if (InHotStandby)
4070  {
4071  /* Report change to waiting status */
4072  if (update_process_title && new_status == NULL)
4073  {
4074  const char *old_status;
4075  int len;
4076 
4077  old_status = get_ps_display(&len);
4078  new_status = (char *) palloc(len + 8 + 1);
4079  memcpy(new_status, old_status, len);
4080  strcpy(new_status + len, " waiting");
4081  set_ps_display(new_status);
4082  new_status[len] = '\0'; /* truncate off " waiting" */
4083  }
4084 
4085  /*
4086  * Emit the log message if the startup process is waiting longer
4087  * than deadlock_timeout for recovery conflict on buffer pin.
4088  *
4089  * Skip this if first time through because the startup process has
4090  * not started waiting yet in this case. So, the wait start
4091  * timestamp is set after this logic.
4092  */
4093  if (waitStart != 0 && !logged_recovery_conflict)
4094  {
4096 
4097  if (TimestampDifferenceExceeds(waitStart, now,
4098  DeadlockTimeout))
4099  {
4101  waitStart, now, NULL, true);
4102  logged_recovery_conflict = true;
4103  }
4104  }
4105 
4106  /*
4107  * Set the wait start timestamp if logging is enabled and first
4108  * time through.
4109  */
4110  if (log_recovery_conflict_waits && waitStart == 0)
4111  waitStart = GetCurrentTimestamp();
4112 
4113  /* Publish the bufid that Startup process waits on */
4114  SetStartupBufferPinWaitBufId(buffer - 1);
4115  /* Set alarm and then wait to be signaled by UnpinBuffer() */
4117  /* Reset the published bufid */
4119  }
4120  else
4122 
4123  /*
4124  * Remove flag marking us as waiter. Normally this will not be set
4125  * anymore, but ProcWaitForSignal() can return for other signals as
4126  * well. We take care to only reset the flag if we're the waiter, as
4127  * theoretically another backend could have started waiting. That's
4128  * impossible with the current usages due to table level locking, but
4129  * better be safe.
4130  */
4131  buf_state = LockBufHdr(bufHdr);
4132  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
4133  bufHdr->wait_backend_pid == MyProcPid)
4134  buf_state &= ~BM_PIN_COUNT_WAITER;
4135  UnlockBufHdr(bufHdr, buf_state);
4136 
4137  PinCountWaitBuf = NULL;
4138  /* Loop back and try again */
4139  }
4140 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
int MyProcPid
Definition: globals.c:41
int wait_backend_pid
bool update_process_title
Definition: ps_status.c:36
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1578
int64 TimestampTz
Definition: timestamp.h:39
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:234
#define InHotStandby
Definition: xlog.h:74
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1709
void set_ps_display(const char *activity)
Definition: ps_status.c:349
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ERROR
Definition: elog.h:45
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:688
const char * get_ps_display(int *displen)
Definition: ps_status.c:430
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:641
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:429
bool log_recovery_conflict_waits
Definition: standby.c:42
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1867
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3939
#define PG_WAIT_BUFFER_PIN
Definition: pgstat.h:897
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
void * palloc(Size size)
Definition: mcxt.c:950
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:228
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:165
int DeadlockTimeout
Definition: proc.c:60
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1542
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:48
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:64

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 4547 of file bufmgr.c.

References BM_LOCKED, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), FindAndDropRelFileNodeBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadBuffer_common(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBuffer(), and WaitIO().

4548 {
4549  SpinDelayStatus delayStatus;
4550  uint32 old_buf_state;
4551 
4552  init_local_spin_delay(&delayStatus);
4553 
4554  while (true)
4555  {
4556  /* set BM_LOCKED flag */
4557  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
4558  /* if it wasn't set before we're OK */
4559  if (!(old_buf_state & BM_LOCKED))
4560  break;
4561  perform_spin_delay(&delayStatus);
4562  }
4563  finish_spin_delay(&delayStatus);
4564  return old_buf_state | BM_LOCKED;
4565 }
#define init_local_spin_delay(status)
Definition: s_lock.h:1043
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:174
unsigned int uint32
Definition: c.h:429
#define BM_LOCKED
Definition: buf_internals.h:57
pg_atomic_uint32 state
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:372
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:124

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 1483 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, BufferIsValid, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newroot(), _bt_restore_meta(), _bt_split(), _bt_unlink_halfdead_page(), _bt_update_meta_cleanup_info(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_clean(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_heap(), lazy_vacuum_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

1484 {
1485  BufferDesc *bufHdr;
1486  uint32 buf_state;
1487  uint32 old_buf_state;
1488 
1489  if (!BufferIsValid(buffer))
1490  elog(ERROR, "bad buffer ID: %d", buffer);
1491 
1492  if (BufferIsLocal(buffer))
1493  {
1494  MarkLocalBufferDirty(buffer);
1495  return;
1496  }
1497 
1498  bufHdr = GetBufferDescriptor(buffer - 1);
1499 
1500  Assert(BufferIsPinned(buffer));
1502  LW_EXCLUSIVE));
1503 
1504  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
1505  for (;;)
1506  {
1507  if (old_buf_state & BM_LOCKED)
1508  old_buf_state = WaitBufHdrUnlocked(bufHdr);
1509 
1510  buf_state = old_buf_state;
1511 
1512  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1513  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
1514 
1515  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
1516  buf_state))
1517  break;
1518  }
1519 
1520  /*
1521  * If the buffer was not dirty already, do vacuum accounting.
1522  */
1523  if (!(old_buf_state & BM_DIRTY))
1524  {
1525  VacuumPageDirty++;
1527  if (VacuumCostActive)
1529  }
1530 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:448
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1944
int VacuumCostBalance
Definition: globals.c:149
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
int64 VacuumPageDirty
Definition: globals.c:147
#define BM_DIRTY
Definition: buf_internals.h:58
int VacuumCostPageDirty
Definition: globals.c:141
#define ERROR
Definition: elog.h:45
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:63
long shared_blks_dirtied
Definition: instrument.h:23
unsigned int uint32
Definition: c.h:429
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
#define BM_LOCKED
Definition: buf_internals.h:57
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4575
#define elog(elevel,...)
Definition: elog.h:228
pg_atomic_uint32 state
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:48
bool VacuumCostActive
Definition: globals.c:150
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 3770 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferGetPage, BufferIsLocal, BufferIsValid, PGPROC::delayChkpt, elog, ERROR, GetBufferDescriptor, GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN, pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileNodeSkippingWAL(), buftag::rnode, BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

3771 {
3772  BufferDesc *bufHdr;
3773  Page page = BufferGetPage(buffer);
3774 
3775  if (!BufferIsValid(buffer))
3776  elog(ERROR, "bad buffer ID: %d", buffer);
3777 
3778  if (BufferIsLocal(buffer))
3779  {
3780  MarkLocalBufferDirty(buffer);
3781  return;
3782  }
3783 
3784  bufHdr = GetBufferDescriptor(buffer - 1);
3785 
3786  Assert(GetPrivateRefCount(buffer) > 0);
3787  /* here, either share or exclusive lock is OK */
3789 
3790  /*
3791  * This routine might get called many times on the same page, if we are
3792  * making the first scan after commit of an xact that added/deleted many
3793  * tuples. So, be as quick as we can if the buffer is already dirty. We
3794  * do this by not acquiring spinlock if it looks like the status bits are
3795  * already set. Since we make this test unlocked, there's a chance we
3796  * might fail to notice that the flags have just been cleared, and failed
3797  * to reset them, due to memory-ordering issues. But since this function
3798  * is only intended to be used in cases where failing to write out the
3799  * data would be harmless anyway, it doesn't really matter.
3800  */
3801  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
3803  {
3805  bool dirtied = false;
3806  bool delayChkpt = false;
3807  uint32 buf_state;
3808 
3809  /*
3810  * If we need to protect hint bit updates from torn writes, WAL-log a
3811  * full page image of the page. This full page image is only necessary
3812  * if the hint bit update is the first change to the page since the
3813  * last checkpoint.
3814  *
3815  * We don't check full_page_writes here because that logic is included
3816  * when we call XLogInsert() since the value changes dynamically.
3817  */
3818  if (XLogHintBitIsNeeded() &&
3819  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
3820  {
3821  /*
3822  * If we must not write WAL, due to a relfilenode-specific
3823  * condition or being in recovery, don't dirty the page. We can
3824  * set the hint, just not dirty the page as a result so the hint
3825  * is lost when we evict the page or shutdown.
3826  *
3827  * See src/backend/storage/page/README for longer discussion.
3828  */
3829  if (RecoveryInProgress() ||
3830  RelFileNodeSkippingWAL(bufHdr->tag.rnode))
3831  return;
3832 
3833  /*
3834  * If the block is already dirty because we either made a change
3835  * or set a hint already, then we don't need to write a full page
3836  * image. Note that aggressive cleaning of blocks dirtied by hint
3837  * bit setting would increase the call rate. Bulk setting of hint
3838  * bits would reduce the call rate...
3839  *
3840  * We must issue the WAL record before we mark the buffer dirty.
3841  * Otherwise we might write the page before we write the WAL. That
3842  * causes a race condition, since a checkpoint might occur between
3843  * writing the WAL record and marking the buffer dirty. We solve
3844  * that with a kluge, but one that is already in use during
3845  * transaction commit to prevent race conditions. Basically, we
3846  * simply prevent the checkpoint WAL record from being written
3847  * until we have marked the buffer dirty. We don't start the
3848  * checkpoint flush until we have marked dirty, so our checkpoint
3849  * must flush the change to disk successfully or the checkpoint
3850  * never gets written, so crash recovery will fix.
3851  *
3852  * It's possible we may enter here without an xid, so it is
3853  * essential that CreateCheckpoint waits for virtual transactions
3854  * rather than full transactionids.
3855  */
3856  MyProc->delayChkpt = delayChkpt = true;
3857  lsn = XLogSaveBufferForHint(buffer, buffer_std);
3858  }
3859 
3860  buf_state = LockBufHdr(bufHdr);
3861 
3862  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3863 
3864  if (!(buf_state & BM_DIRTY))
3865  {
3866  dirtied = true; /* Means "will be dirtied by this action" */
3867 
3868  /*
3869  * Set the page LSN if we wrote a backup block. We aren't supposed
3870  * to set this when only holding a share lock but as long as we
3871  * serialise it somehow we're OK. We choose to set LSN while
3872  * holding the buffer header lock, which causes any reader of an
3873  * LSN who holds only a share lock to also obtain a buffer header
3874  * lock before using PageGetLSN(), which is enforced in
3875  * BufferGetLSNAtomic().
3876  *
3877  * If checksums are enabled, you might think we should reset the
3878  * checksum here. That will happen when the page is written
3879  * sometime later in this checkpoint cycle.
3880  */
3881  if (!XLogRecPtrIsInvalid(lsn))
3882  PageSetLSN(page, lsn);
3883  }
3884 
3885  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
3886  UnlockBufHdr(bufHdr, buf_state);
3887 
3888  if (delayChkpt)
3889  MyProc->delayChkpt = false;
3890 
3891  if (dirtied)
3892  {
3893  VacuumPageDirty++;
3895  if (VacuumCostActive)
3897  }
3898  }
3899 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define BM_PERMANENT
Definition: buf_internals.h:66
int VacuumCostBalance
Definition: globals.c:149
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1926
PGPROC * MyProc
Definition: proc.c:68
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:917
int64 VacuumPageDirty
Definition: globals.c:147
bool RecoveryInProgress(void)
Definition: xlog.c:8148
#define BM_DIRTY
Definition: buf_internals.h:58
int VacuumCostPageDirty
Definition: globals.c:141
#define ERROR
Definition: elog.h:45
bool delayChkpt
Definition: proc.h:185
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:63
long shared_blks_dirtied
Definition: instrument.h:23
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:429
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4547
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
RelFileNode rnode
Definition: buf_internals.h:92
bool RelFileNodeSkippingWAL(RelFileNode rnode)
Definition: storage.c:497
BufferTag tag
#define UnlockBufHdr(desc, s)
#define elog(elevel,...)
Definition: elog.h:228
pg_atomic_uint32 state
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define XLogHintBitIsNeeded()
Definition: xlog.h:202
Pointer Page
Definition: bufpage.h:78
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:48
bool VacuumCostActive
Definition: globals.c:150
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 281 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, PrivateRefCountEntry::refcount, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

282 {
284 
285  /* only allowed to be called when a reservation has been made */
286  Assert(ReservedRefCountEntry != NULL);
287 
288  /* use up the reserved entry */
289  res = ReservedRefCountEntry;
290  ReservedRefCountEntry = NULL;
291 
292  /* and fill it */
293  res->buffer = buffer;
294  res->refcount = 0;
295 
296  return res;
297 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:201
#define Assert(condition)
Definition: c.h:792

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 1604 of file bufmgr.c.

References Assert, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservePrivateRefCountEntry(), ResourceOwnerRememberBuffer(), BufferDesc::state, VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc().

1605 {
1607  bool result;
1608  PrivateRefCountEntry *ref;
1609 
1610  ref = GetPrivateRefCountEntry(b, true);
1611 
1612  if (ref == NULL)
1613  {
1614  uint32 buf_state;
1615  uint32 old_buf_state;
1616 
1618  ref = NewPrivateRefCountEntry(b);
1619 
1620  old_buf_state = pg_atomic_read_u32(&buf->state);
1621  for (;;)
1622  {
1623  if (old_buf_state & BM_LOCKED)
1624  old_buf_state = WaitBufHdrUnlocked(buf);
1625 
1626  buf_state = old_buf_state;
1627 
1628  /* increase refcount */
1629  buf_state += BUF_REFCOUNT_ONE;
1630 
1631  if (strategy == NULL)
1632  {
1633  /* Default case: increase usagecount unless already max. */
1635  buf_state += BUF_USAGECOUNT_ONE;
1636  }
1637  else
1638  {
1639  /*
1640  * Ring buffers shouldn't evict others from pool. Thus we
1641  * don't make usagecount more than 1.
1642  */
1643  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
1644  buf_state += BUF_USAGECOUNT_ONE;
1645  }
1646 
1647  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
1648  buf_state))
1649  {
1650  result = (buf_state & BM_VALID) != 0;
1651 
1652  /*
1653  * Assume that we acquired a buffer pin for the purposes of
1654  * Valgrind buffer client checks (even in !result case) to
1655  * keep things simple. Buffers that are unsafe to access are
1656  * not generally guaranteed to be marked undefined or
1657  * non-accessible in any case.
1658  */
1660  break;
1661  }
1662  }
1663  }
1664  else
1665  {
1666  /*
1667  * If we previously pinned the buffer, it must surely be valid.
1668  *
1669  * Note: We deliberately avoid a Valgrind client request here.
1670  * Individual access methods can optionally superimpose buffer page
1671  * client requests on top of our client requests to enforce that
1672  * buffers are only accessed while locked (and pinned). It's possible
1673  * that the buffer page is legitimately non-accessible here. We
1674  * cannot meddle with that.
1675  */
1676  result = true;
1677  }
1678 
1679  ref->refcount++;
1680  Assert(ref->refcount > 0);
1682  return result;
1683 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
ResourceOwner CurrentResourceOwner
Definition: resowner.c:144
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:947
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:281
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:43
unsigned int uint32
Definition: c.h:429
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:40
#define BM_LOCKED
Definition: buf_internals.h:57
#define BM_VALID
Definition: buf_internals.h:59
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
int result
Definition: header.h:19
#define Assert(condition)
Definition: c.h:792
#define BufferDescriptorGetBuffer(bdesc)
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4575
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:76
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:215
pg_atomic_uint32 state
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:49
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 1707 of file bufmgr.c.

References Assert, BM_LOCKED, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer, BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), BufferDesc::state, UnlockBufHdr, and VALGRIND_MAKE_MEM_DEFINED.

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

1708 {
1709  Buffer b;
1710  PrivateRefCountEntry *ref;
1711  uint32 buf_state;
1712 
1713  /*
1714  * As explained, We don't expect any preexisting pins. That allows us to
1715  * manipulate the PrivateRefCount after releasing the spinlock
1716  */
1718 
1719  /*
1720  * Buffer can't have a preexisting pin, so mark its page as defined to
1721  * Valgrind (this is similar to the PinBuffer() case where the backend
1722  * doesn't already have a buffer pin)
1723  */
1725 
1726  /*
1727  * Since we hold the buffer spinlock, we can update the buffer state and
1728  * release the lock in one operation.
1729  */
1730  buf_state = pg_atomic_read_u32(&buf->state);
1731  Assert(buf_state & BM_LOCKED);
1732  buf_state += BUF_REFCOUNT_ONE;
1733  UnlockBufHdr(buf, buf_state);
1734 
1735  b = BufferDescriptorGetBuffer(buf);
1736 
1737  ref = NewPrivateRefCountEntry(b);
1738  ref->refcount++;
1739 
1741 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:307
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
ResourceOwner CurrentResourceOwner
Definition: resowner.c:144
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:947
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:281
unsigned int uint32
Definition: c.h:429
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:40
#define BM_LOCKED
Definition: buf_internals.h:57
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:60
#define Assert(condition)
Definition: c.h:792
#define BufferDescriptorGetBuffer(bdesc)
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 587 of file bufmgr.c.

References Assert, BlockNumberIsValid, ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RelationData::rd_smgr, RELATION_IS_OTHER_TEMP, RelationIsValid, RelationOpenSmgr, and RelationUsesLocalBuffers.

Referenced by BitmapPrefetch(), count_nondeletable_pages(), HeapTupleHeaderAdvanceLatestRemovedXid(), and pg_prewarm().

588 {
589  Assert(RelationIsValid(reln));
590  Assert(BlockNumberIsValid(blockNum));
591 
592  /* Open it at the smgr level if not already done */
593  RelationOpenSmgr(reln);
594 
595  if (RelationUsesLocalBuffers(reln))
596  {
597  /* see comments in ReadBufferExtended */
598  if (RELATION_IS_OTHER_TEMP(reln))
599  ereport(ERROR,
600  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
601  errmsg("cannot access temporary tables of other sessions")));
602 
603  /* pass it off to localbuf.c */
604  return PrefetchLocalBuffer(reln->rd_smgr, forkNum, blockNum);
605  }
606  else
607  {
608  /* pass it to the shared buffer version */
609  return PrefetchSharedBuffer(reln->rd_smgr, forkNum, blockNum);
610  }
611 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:64
int errcode(int sqlerrcode)
Definition: elog.c:704
#define RelationOpenSmgr(relation)
Definition: rel.h:514
#define ERROR
Definition: elog.h:45
#define RelationIsValid(relation)
Definition: rel.h:430
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:500
#define ereport(elevel,...)
Definition: elog.h:155
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
#define Assert(condition)
Definition: c.h:792
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:594
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:573
int errmsg(const char *fmt,...)
Definition: elog.c:915

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 500 of file bufmgr.c.

References Assert, BlockNumberIsValid, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), INIT_BUFFERTAG, PrefetchBufferResult::initiated_io, InvalidBuffer, LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeBackend::node, PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rnode, and smgrprefetch().

Referenced by PrefetchBuffer().

503 {
505  BufferTag newTag; /* identity of requested block */
506  uint32 newHash; /* hash value for newTag */
507  LWLock *newPartitionLock; /* buffer partition lock for it */
508  int buf_id;
509 
510  Assert(BlockNumberIsValid(blockNum));
511 
512  /* create a tag so we can lookup the buffer */
513  INIT_BUFFERTAG(newTag, smgr_reln->smgr_rnode.node,
514  forkNum, blockNum);
515 
516  /* determine its hash code and partition lock ID */
517  newHash = BufTableHashCode(&newTag);
518  newPartitionLock = BufMappingPartitionLock(newHash);
519 
520  /* see if the block is in the buffer pool already */
521  LWLockAcquire(newPartitionLock, LW_SHARED);
522  buf_id = BufTableLookup(&newTag, newHash);
523  LWLockRelease(newPartitionLock);
524 
525  /* If not in buffers, initiate prefetch */
526  if (buf_id < 0)
527  {
528 #ifdef USE_PREFETCH
529  /*
530  * Try to initiate an asynchronous read. This returns false in
531  * recovery if the relation file doesn't exist.
532  */
533  if (smgrprefetch(smgr_reln, forkNum, blockNum))
534  result.initiated_io = true;
535 #endif /* USE_PREFETCH */
536  }
537  else
538  {
539  /*
540  * Report the buffer it was in at that time. The caller may be able
541  * to avoid a buffer table lookup, but it's not pinned and it must be
542  * rechecked!
543  */
544  result.recent_buffer = buf_id + 1;
545  }
546 
547  /*
548  * If the block *is* in buffers, we do nothing. This is not really ideal:
549  * the block might be just about to be evicted, which would be stupid
550  * since we know we are going to need it soon. But the only easy answer
551  * is to bump the usage_count, which does not seem like a great solution:
552  * when the caller does ultimately touch the block, usage_count would get
553  * bumped again, resulting in too much favoritism for blocks that are
554  * involved in a prefetch sequence. A real fix would involve some
555  * additional per-buffer state, and it's not clear that there's enough of
556  * a problem to justify that.
557  */
558 
559  return result;
560 }
Definition: lwlock.h:31
#define BufMappingPartitionLock(hashcode)
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
#define InvalidBuffer
Definition: buf.h:25
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
Buffer recent_buffer
Definition: bufmgr.h:54
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1810
RelFileNodeBackend smgr_rnode
Definition: smgr.h:42
unsigned int uint32
Definition: c.h:429
int result
Definition: header.h:19
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:792
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1206
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:487

◆ PrintBufferLeakWarning()

void PrintBufferLeakWarning ( Buffer  buffer)

Definition at line 2607 of file bufmgr.c.

References Assert, buftag::blockNum, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BufferIsLocal, BufferIsValid, elog, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, GetPrivateRefCount(), InvalidBackendId, LocalRefCount, MyBackendId, pfree(), pg_atomic_read_u32(), relpathbackend, buftag::rnode, BufferDesc::state, BufferDesc::tag, and WARNING.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResourceOwnerReleaseInternal().

2608 {
2609  BufferDesc *buf;
2610  int32 loccount;
2611  char *path;
2612  BackendId backend;
2613  uint32 buf_state;
2614 
2615  Assert(BufferIsValid(buffer));
2616  if (BufferIsLocal(buffer))
2617  {
2618  buf = GetLocalBufferDescriptor(-buffer - 1);
2619  loccount = LocalRefCount[-buffer - 1];
2620  backend = MyBackendId;
2621  }
2622  else
2623  {
2624  buf = GetBufferDescriptor(buffer - 1);
2625  loccount = GetPrivateRefCount(buffer);
2626  backend = InvalidBackendId;
2627  }
2628 
2629  /* theoretically we should lock the bufhdr here */
2630  path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
2631  buf_state = pg_atomic_read_u32(&buf->state);
2632  elog(WARNING,
2633  "buffer refcount leak: [%03d] "
2634  "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
2635  buffer, path,
2636  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
2637  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
2638  pfree(path);
2639 }
BackendId MyBackendId
Definition: globals.c:82
ForkNumber forkNum
Definition: buf_internals.h:93
#define GetLocalBufferDescriptor(id)
signed int int32
Definition: c.h:417
void pfree(void *pointer)
Definition: mcxt.c:1057
#define BUF_FLAG_MASK
Definition: buf_internals.h:45
static char * buf
Definition: pg_test_fsync.c:68
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:387
unsigned int uint32
Definition: c.h:429
#define WARNING
Definition: elog.h:40
#define InvalidBackendId
Definition: backendid.h:23
int BackendId
Definition: backendid.h:21
#define Assert(condition)
Definition: c.h:792
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:94
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
RelFileNode rnode
Definition: buf_internals.h:92
BufferTag tag
#define elog(elevel,...)
Definition: elog.h:228
pg_atomic_uint32 state
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:78
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:48
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 619 of file bufmgr.c.

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinbuild(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), fill_seq_with_data(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

620 {
621  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
622 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:666

◆ ReadBuffer_common()

static Buffer ReadBuffer_common ( SMgrRelation  reln,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool hit 
)
static

Definition at line 729 of file bufmgr.c.

References Assert, RelFileNodeBackend::backend, BufferUsage::blk_read_time, BM_VALID, BufferAlloc(), BufferDescriptorGetBuffer, BufferDescriptorGetContentLock, BufHdrGetBlock, CurrentResourceOwner, RelFileNode::dbNode, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), ERROR, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, BufferUsage::local_blks_written, LocalBufferAlloc(), LocalBufHdrGetBlock, LockBufferForCleanup(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), MemSet, RelFileNodeBackend::node, P_NEW, PageIsNew, PageIsVerifiedExtended(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_buffer_read_time, PIV_LOG_WARNING, PIV_REPORT_STAT, RBM_NORMAL, RBM_NORMAL_NO_LOG, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelFileNode::relNode, relpath, ResourceOwnerEnlargeBuffers(), BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, BufferUsage::shared_blks_written, SMgrRelationData::smgr_rnode, smgrextend(), SmgrIsTemp, smgrnblocks(), smgrread(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::state, TerminateBufferIO(), track_io_timing, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, VacuumPageHit, VacuumPageMiss, WARNING, and zero_damaged_pages.

Referenced by ReadBufferExtended(), and ReadBufferWithoutRelcache().

732 {
733  BufferDesc *bufHdr;
734  Block bufBlock;
735  bool found;
736  bool isExtend;
737  bool isLocalBuf = SmgrIsTemp(smgr);
738 
739  *hit = false;
740 
741  /* Make sure we will have room to remember the buffer pin */
743 
744  isExtend = (blockNum == P_NEW);
745 
746  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
747  smgr->smgr_rnode.node.spcNode,
748  smgr->smgr_rnode.node.dbNode,
749  smgr->smgr_rnode.node.relNode,
750  smgr->smgr_rnode.backend,
751  isExtend);
752 
753  /* Substitute proper block number if caller asked for P_NEW */
754  if (isExtend)
755  blockNum = smgrnblocks(smgr, forkNum);
756 
757  if (isLocalBuf)
758  {
759  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
760  if (found)
762  else if (isExtend)
764  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
767  }
768  else
769  {
770  /*
771  * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
772  * not currently in memory.
773  */
774  bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
775  strategy, &found);
776  if (found)
778  else if (isExtend)
780  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
783  }
784 
785  /* At this point we do NOT hold any locks. */
786 
787  /* if it was already in the buffer pool, we're done */
788  if (found)
789  {
790  if (!isExtend)
791  {
792  /* Just need to update stats before we exit */
793  *hit = true;
794  VacuumPageHit++;
795 
796  if (VacuumCostActive)
798 
799  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
800  smgr->smgr_rnode.node.spcNode,
801  smgr->smgr_rnode.node.dbNode,
802  smgr->smgr_rnode.node.relNode,
803  smgr->smgr_rnode.backend,
804  isExtend,
805  found);
806 
807  /*
808  * In RBM_ZERO_AND_LOCK mode the caller expects the page to be
809  * locked on return.
810  */
811  if (!isLocalBuf)
812  {
813  if (mode == RBM_ZERO_AND_LOCK)
815  LW_EXCLUSIVE);
816  else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
818  }
819 
820  return BufferDescriptorGetBuffer(bufHdr);
821  }
822 
823  /*
824  * We get here only in the corner case where we are trying to extend
825  * the relation but we found a pre-existing buffer marked BM_VALID.
826  * This can happen because mdread doesn't complain about reads beyond
827  * EOF (when zero_damaged_pages is ON) and so a previous attempt to
828  * read a block beyond EOF could have left a "valid" zero-filled
829  * buffer. Unfortunately, we have also seen this case occurring
830  * because of buggy Linux kernels that sometimes return an
831