PostgreSQL Source Code  git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
#include <lib/sort_template.h>
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static Buffer ReadBuffer_common (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
static void InvalidateBuffer (BufferDesc *buf)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
void PrintBufferLeakWarning (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
void BufmgrCommit (void)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
void AbortBufferIO (void)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *context)
 
void TestForOldSnapshot_impl (Snapshot snapshot, Relation relation)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescInProgressBuf = NULL
 
static bool IsForInput
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 81 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 71 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 70 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 63 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:389
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:228
int32 * LocalRefCount
Definition: localbuf.c:45

Definition at line 450 of file bufmgr.c.

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 62 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 66 of file bufmgr.c.

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 90 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 73 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 4935 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 4935 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 4937 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 4937 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 4934 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 4934 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 4936 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 4936 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 4933 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 4933 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

void AbortBufferIO ( void  )

Definition at line 4642 of file bufmgr.c.

4643 {
4645 
4646  if (buf)
4647  {
4648  uint32 buf_state;
4649 
4650  buf_state = LockBufHdr(buf);
4651  Assert(buf_state & BM_IO_IN_PROGRESS);
4652  if (IsForInput)
4653  {
4654  Assert(!(buf_state & BM_DIRTY));
4655 
4656  /* We'd better not think buffer is valid yet */
4657  Assert(!(buf_state & BM_VALID));
4658  UnlockBufHdr(buf, buf_state);
4659  }
4660  else
4661  {
4662  Assert(buf_state & BM_DIRTY);
4663  UnlockBufHdr(buf, buf_state);
4664  /* Issue notice if this is not the first failure... */
4665  if (buf_state & BM_IO_ERROR)
4666  {
4667  /* Buffer is pinned, so we can read tag without spinlock */
4668  char *path;
4669 
4670  path = relpathperm(BufTagGetRelFileLocator(&buf->tag),
4671  BufTagGetForkNum(&buf->tag));
4672  ereport(WARNING,
4673  (errcode(ERRCODE_IO_ERROR),
4674  errmsg("could not write block %u of %s",
4675  buf->tag.blockNum, path),
4676  errdetail("Multiple failures --- write error might be permanent.")));
4677  pfree(path);
4678  }
4679  }
4681  }
4682 }
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
#define BM_DIRTY
Definition: buf_internals.h:59
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:62
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
Definition: buf_internals.h:60
#define BM_IO_ERROR
Definition: buf_internals.h:63
static BufferDesc * InProgressBuf
Definition: bufmgr.c:163
static bool IsForInput
Definition: bufmgr.c:164
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4755
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:4610
unsigned int uint32
Definition: c.h:442
int errdetail(const char *fmt,...)
Definition: elog.c:1039
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define WARNING
Definition: elog.h:32
#define ereport(elevel,...)
Definition: elog.h:145
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1306
static char * buf
Definition: pg_test_fsync.c:67
#define relpathperm(rlocator, forknum)
Definition: relpath.h:90

References Assert(), BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_VALID, buf, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), InProgressBuf, IsForInput, LockBufHdr(), pfree(), relpathperm, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 2594 of file bufmgr.c.

2595 {
2597 
2598  AtEOXact_LocalBuffers(isCommit);
2599 
2601 }
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2655
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:201
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:598

References Assert(), AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 2636 of file bufmgr.c.

2637 {
2638  AbortBufferIO();
2639  UnlockBuffers();
2640 
2642 
2643  /* localbuf.c needs a chance too */
2645 }
void UnlockBuffers(void)
Definition: bufmgr.c:4144
void AbortBufferIO(void)
Definition: bufmgr.c:4642
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:609

References AbortBufferIO(), AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 2224 of file bufmgr.c.

2225 {
2226  /* info obtained from freelist.c */
2227  int strategy_buf_id;
2228  uint32 strategy_passes;
2229  uint32 recent_alloc;
2230 
2231  /*
2232  * Information saved between calls so we can determine the strategy
2233  * point's advance rate and avoid scanning already-cleaned buffers.
2234  */
2235  static bool saved_info_valid = false;
2236  static int prev_strategy_buf_id;
2237  static uint32 prev_strategy_passes;
2238  static int next_to_clean;
2239  static uint32 next_passes;
2240 
2241  /* Moving averages of allocation rate and clean-buffer density */
2242  static float smoothed_alloc = 0;
2243  static float smoothed_density = 10.0;
2244 
2245  /* Potentially these could be tunables, but for now, not */
2246  float smoothing_samples = 16;
2247  float scan_whole_pool_milliseconds = 120000.0;
2248 
2249  /* Used to compute how far we scan ahead */
2250  long strategy_delta;
2251  int bufs_to_lap;
2252  int bufs_ahead;
2253  float scans_per_alloc;
2254  int reusable_buffers_est;
2255  int upcoming_alloc_est;
2256  int min_scan_buffers;
2257 
2258  /* Variables for the scanning loop proper */
2259  int num_to_scan;
2260  int num_written;
2261  int reusable_buffers;
2262 
2263  /* Variables for final smoothed_density update */
2264  long new_strategy_delta;
2265  uint32 new_recent_alloc;
2266 
2267  /*
2268  * Find out where the freelist clock sweep currently is, and how many
2269  * buffer allocations have happened since our last call.
2270  */
2271  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
2272 
2273  /* Report buffer alloc counts to pgstat */
2274  PendingBgWriterStats.buf_alloc += recent_alloc;
2275 
2276  /*
2277  * If we're not running the LRU scan, just stop after doing the stats
2278  * stuff. We mark the saved state invalid so that we can recover sanely
2279  * if LRU scan is turned back on later.
2280  */
2281  if (bgwriter_lru_maxpages <= 0)
2282  {
2283  saved_info_valid = false;
2284  return true;
2285  }
2286 
2287  /*
2288  * Compute strategy_delta = how many buffers have been scanned by the
2289  * clock sweep since last time. If first time through, assume none. Then
2290  * see if we are still ahead of the clock sweep, and if so, how many
2291  * buffers we could scan before we'd catch up with it and "lap" it. Note:
2292  * weird-looking coding of xxx_passes comparisons are to avoid bogus
2293  * behavior when the passes counts wrap around.
2294  */
2295  if (saved_info_valid)
2296  {
2297  int32 passes_delta = strategy_passes - prev_strategy_passes;
2298 
2299  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
2300  strategy_delta += (long) passes_delta * NBuffers;
2301 
2302  Assert(strategy_delta >= 0);
2303 
2304  if ((int32) (next_passes - strategy_passes) > 0)
2305  {
2306  /* we're one pass ahead of the strategy point */
2307  bufs_to_lap = strategy_buf_id - next_to_clean;
2308 #ifdef BGW_DEBUG
2309  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2310  next_passes, next_to_clean,
2311  strategy_passes, strategy_buf_id,
2312  strategy_delta, bufs_to_lap);
2313 #endif
2314  }
2315  else if (next_passes == strategy_passes &&
2316  next_to_clean >= strategy_buf_id)
2317  {
2318  /* on same pass, but ahead or at least not behind */
2319  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
2320 #ifdef BGW_DEBUG
2321  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2322  next_passes, next_to_clean,
2323  strategy_passes, strategy_buf_id,
2324  strategy_delta, bufs_to_lap);
2325 #endif
2326  }
2327  else
2328  {
2329  /*
2330  * We're behind, so skip forward to the strategy point and start
2331  * cleaning from there.
2332  */
2333 #ifdef BGW_DEBUG
2334  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
2335  next_passes, next_to_clean,
2336  strategy_passes, strategy_buf_id,
2337  strategy_delta);
2338 #endif
2339  next_to_clean = strategy_buf_id;
2340  next_passes = strategy_passes;
2341  bufs_to_lap = NBuffers;
2342  }
2343  }
2344  else
2345  {
2346  /*
2347  * Initializing at startup or after LRU scanning had been off. Always
2348  * start at the strategy point.
2349  */
2350 #ifdef BGW_DEBUG
2351  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
2352  strategy_passes, strategy_buf_id);
2353 #endif
2354  strategy_delta = 0;
2355  next_to_clean = strategy_buf_id;
2356  next_passes = strategy_passes;
2357  bufs_to_lap = NBuffers;
2358  }
2359 
2360  /* Update saved info for next time */
2361  prev_strategy_buf_id = strategy_buf_id;
2362  prev_strategy_passes = strategy_passes;
2363  saved_info_valid = true;
2364 
2365  /*
2366  * Compute how many buffers had to be scanned for each new allocation, ie,
2367  * 1/density of reusable buffers, and track a moving average of that.
2368  *
2369  * If the strategy point didn't move, we don't update the density estimate
2370  */
2371  if (strategy_delta > 0 && recent_alloc > 0)
2372  {
2373  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
2374  smoothed_density += (scans_per_alloc - smoothed_density) /
2375  smoothing_samples;
2376  }
2377 
2378  /*
2379  * Estimate how many reusable buffers there are between the current
2380  * strategy point and where we've scanned ahead to, based on the smoothed
2381  * density estimate.
2382  */
2383  bufs_ahead = NBuffers - bufs_to_lap;
2384  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
2385 
2386  /*
2387  * Track a moving average of recent buffer allocations. Here, rather than
2388  * a true average we want a fast-attack, slow-decline behavior: we
2389  * immediately follow any increase.
2390  */
2391  if (smoothed_alloc <= (float) recent_alloc)
2392  smoothed_alloc = recent_alloc;
2393  else
2394  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
2395  smoothing_samples;
2396 
2397  /* Scale the estimate by a GUC to allow more aggressive tuning. */
2398  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
2399 
2400  /*
2401  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
2402  * eventually underflow to zero, and the underflows produce annoying
2403  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
2404  * zero, there's no point in tracking smaller and smaller values of
2405  * smoothed_alloc, so just reset it to exactly zero to avoid this
2406  * syndrome. It will pop back up as soon as recent_alloc increases.
2407  */
2408  if (upcoming_alloc_est == 0)
2409  smoothed_alloc = 0;
2410 
2411  /*
2412  * Even in cases where there's been little or no buffer allocation
2413  * activity, we want to make a small amount of progress through the buffer
2414  * cache so that as many reusable buffers as possible are clean after an
2415  * idle period.
2416  *
2417  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
2418  * the BGW will be called during the scan_whole_pool time; slice the
2419  * buffer pool into that many sections.
2420  */
2421  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
2422 
2423  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
2424  {
2425 #ifdef BGW_DEBUG
2426  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
2427  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
2428 #endif
2429  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
2430  }
2431 
2432  /*
2433  * Now write out dirty reusable buffers, working forward from the
2434  * next_to_clean point, until we have lapped the strategy scan, or cleaned
2435  * enough buffers to match our estimate of the next cycle's allocation
2436  * requirements, or hit the bgwriter_lru_maxpages limit.
2437  */
2438 
2439  /* Make sure we can handle the pin inside SyncOneBuffer */
2441 
2442  num_to_scan = bufs_to_lap;
2443  num_written = 0;
2444  reusable_buffers = reusable_buffers_est;
2445 
2446  /* Execute the LRU scan */
2447  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
2448  {
2449  int sync_state = SyncOneBuffer(next_to_clean, true,
2450  wb_context);
2451 
2452  if (++next_to_clean >= NBuffers)
2453  {
2454  next_to_clean = 0;
2455  next_passes++;
2456  }
2457  num_to_scan--;
2458 
2459  if (sync_state & BUF_WRITTEN)
2460  {
2461  reusable_buffers++;
2462  if (++num_written >= bgwriter_lru_maxpages)
2463  {
2465  break;
2466  }
2467  }
2468  else if (sync_state & BUF_REUSABLE)
2469  reusable_buffers++;
2470  }
2471 
2472  PendingBgWriterStats.buf_written_clean += num_written;
2473 
2474 #ifdef BGW_DEBUG
2475  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
2476  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
2477  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
2478  bufs_to_lap - num_to_scan,
2479  num_written,
2480  reusable_buffers - reusable_buffers_est);
2481 #endif
2482 
2483  /*
2484  * Consider the above scan as being like a new allocation scan.
2485  * Characterize its density and update the smoothed one based on it. This
2486  * effectively halves the moving average period in cases where both the
2487  * strategy and the background writer are doing some useful scanning,
2488  * which is helpful because a long memory isn't as desirable on the
2489  * density estimates.
2490  */
2491  new_strategy_delta = bufs_to_lap - num_to_scan;
2492  new_recent_alloc = reusable_buffers - reusable_buffers_est;
2493  if (new_strategy_delta > 0 && new_recent_alloc > 0)
2494  {
2495  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
2496  smoothed_density += (scans_per_alloc - smoothed_density) /
2497  smoothing_samples;
2498 
2499 #ifdef BGW_DEBUG
2500  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
2501  new_recent_alloc, new_strategy_delta,
2502  scans_per_alloc, smoothed_density);
2503 #endif
2504  }
2505 
2506  /* Return true if OK to hibernate */
2507  return (bufs_to_lap == 0 && recent_alloc == 0);
2508 }
int BgWriterDelay
Definition: bgwriter.c:61
#define BUF_REUSABLE
Definition: bufmgr.c:71
double bgwriter_lru_multiplier
Definition: bufmgr.c:136
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:2527
int bgwriter_lru_maxpages
Definition: bufmgr.c:135
#define BUF_WRITTEN
Definition: bufmgr.c:70
signed int int32
Definition: c.h:430
#define DEBUG2
Definition: elog.h:25
#define DEBUG1
Definition: elog.h:26
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:394
int NBuffers
Definition: globals.c:136
PgStat_BgWriterStats PendingBgWriterStats
ResourceOwner CurrentResourceOwner
Definition: resowner.c:146
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:950
PgStat_Counter buf_written_clean
Definition: pgstat.h:262
PgStat_Counter maxwritten_clean
Definition: pgstat.h:263
PgStat_Counter buf_alloc
Definition: pgstat.h:264

References Assert(), bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, CurrentResourceOwner, DEBUG1, DEBUG2, elog(), PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, ResourceOwnerEnlargeBuffers(), StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ BufferAlloc()

static BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1119 of file bufmgr.c.

1123 {
1124  BufferTag newTag; /* identity of requested block */
1125  uint32 newHash; /* hash value for newTag */
1126  LWLock *newPartitionLock; /* buffer partition lock for it */
1127  BufferTag oldTag; /* previous identity of selected buffer */
1128  uint32 oldHash; /* hash value for oldTag */
1129  LWLock *oldPartitionLock; /* buffer partition lock for it */
1130  uint32 oldFlags;
1131  int buf_id;
1132  BufferDesc *buf;
1133  bool valid;
1134  uint32 buf_state;
1135 
1136  /* create a tag so we can lookup the buffer */
1137  InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
1138 
1139  /* determine its hash code and partition lock ID */
1140  newHash = BufTableHashCode(&newTag);
1141  newPartitionLock = BufMappingPartitionLock(newHash);
1142 
1143  /* see if the block is in the buffer pool already */
1144  LWLockAcquire(newPartitionLock, LW_SHARED);
1145  buf_id = BufTableLookup(&newTag, newHash);
1146  if (buf_id >= 0)
1147  {
1148  /*
1149  * Found it. Now, pin the buffer so no one can steal it from the
1150  * buffer pool, and check to see if the correct data has been loaded
1151  * into the buffer.
1152  */
1153  buf = GetBufferDescriptor(buf_id);
1154 
1155  valid = PinBuffer(buf, strategy);
1156 
1157  /* Can release the mapping lock as soon as we've pinned it */
1158  LWLockRelease(newPartitionLock);
1159 
1160  *foundPtr = true;
1161 
1162  if (!valid)
1163  {
1164  /*
1165  * We can only get here if (a) someone else is still reading in
1166  * the page, or (b) a previous read attempt failed. We have to
1167  * wait for any active read attempt to finish, and then set up our
1168  * own read attempt if the page is still not BM_VALID.
1169  * StartBufferIO does it all.
1170  */
1171  if (StartBufferIO(buf, true))
1172  {
1173  /*
1174  * If we get here, previous attempts to read the buffer must
1175  * have failed ... but we shall bravely try again.
1176  */
1177  *foundPtr = false;
1178  }
1179  }
1180 
1181  return buf;
1182  }
1183 
1184  /*
1185  * Didn't find it in the buffer pool. We'll have to initialize a new
1186  * buffer. Remember to unlock the mapping lock while doing the work.
1187  */
1188  LWLockRelease(newPartitionLock);
1189 
1190  /* Loop here in case we have to try another victim buffer */
1191  for (;;)
1192  {
1193  /*
1194  * Ensure, while the spinlock's not yet held, that there's a free
1195  * refcount entry.
1196  */
1198 
1199  /*
1200  * Select a victim buffer. The buffer is returned with its header
1201  * spinlock still held!
1202  */
1203  buf = StrategyGetBuffer(strategy, &buf_state);
1204 
1205  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1206 
1207  /* Must copy buffer flags while we still hold the spinlock */
1208  oldFlags = buf_state & BUF_FLAG_MASK;
1209 
1210  /* Pin the buffer and then release the buffer spinlock */
1212 
1213  /*
1214  * If the buffer was dirty, try to write it out. There is a race
1215  * condition here, in that someone might dirty it after we released it
1216  * above, or even while we are writing it out (since our share-lock
1217  * won't prevent hint-bit updates). We will recheck the dirty bit
1218  * after re-locking the buffer header.
1219  */
1220  if (oldFlags & BM_DIRTY)
1221  {
1222  /*
1223  * We need a share-lock on the buffer contents to write it out
1224  * (else we might write invalid data, eg because someone else is
1225  * compacting the page contents while we write). We must use a
1226  * conditional lock acquisition here to avoid deadlock. Even
1227  * though the buffer was not pinned (and therefore surely not
1228  * locked) when StrategyGetBuffer returned it, someone else could
1229  * have pinned and exclusive-locked it by the time we get here. If
1230  * we try to get the lock unconditionally, we'd block waiting for
1231  * them; if they later block waiting for us, deadlock ensues.
1232  * (This has been observed to happen when two backends are both
1233  * trying to split btree index pages, and the second one just
1234  * happens to be trying to split the page the first one got from
1235  * StrategyGetBuffer.)
1236  */
1238  LW_SHARED))
1239  {
1240  /*
1241  * If using a nondefault strategy, and writing the buffer
1242  * would require a WAL flush, let the strategy decide whether
1243  * to go ahead and write/reuse the buffer or to choose another
1244  * victim. We need lock to inspect the page LSN, so this
1245  * can't be done inside StrategyGetBuffer.
1246  */
1247  if (strategy != NULL)
1248  {
1249  XLogRecPtr lsn;
1250 
1251  /* Read the LSN while holding buffer header lock */
1252  buf_state = LockBufHdr(buf);
1253  lsn = BufferGetLSN(buf);
1254  UnlockBufHdr(buf, buf_state);
1255 
1256  if (XLogNeedsFlush(lsn) &&
1257  StrategyRejectBuffer(strategy, buf))
1258  {
1259  /* Drop lock/pin and loop around for another buffer */
1261  UnpinBuffer(buf);
1262  continue;
1263  }
1264  }
1265 
1266  /* OK, do the I/O */
1267  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
1269  smgr->smgr_rlocator.locator.dbOid,
1271 
1272  FlushBuffer(buf, NULL);
1274 
1276  &buf->tag);
1277 
1278  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
1280  smgr->smgr_rlocator.locator.dbOid,
1282  }
1283  else
1284  {
1285  /*
1286  * Someone else has locked the buffer, so give it up and loop
1287  * back to get another one.
1288  */
1289  UnpinBuffer(buf);
1290  continue;
1291  }
1292  }
1293 
1294  /*
1295  * To change the association of a valid buffer, we'll need to have
1296  * exclusive lock on both the old and new mapping partitions.
1297  */
1298  if (oldFlags & BM_TAG_VALID)
1299  {
1300  /*
1301  * Need to compute the old tag's hashcode and partition lock ID.
1302  * XXX is it worth storing the hashcode in BufferDesc so we need
1303  * not recompute it here? Probably not.
1304  */
1305  oldTag = buf->tag;
1306  oldHash = BufTableHashCode(&oldTag);
1307  oldPartitionLock = BufMappingPartitionLock(oldHash);
1308 
1309  /*
1310  * Must lock the lower-numbered partition first to avoid
1311  * deadlocks.
1312  */
1313  if (oldPartitionLock < newPartitionLock)
1314  {
1315  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1316  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1317  }
1318  else if (oldPartitionLock > newPartitionLock)
1319  {
1320  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1321  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1322  }
1323  else
1324  {
1325  /* only one partition, only one lock */
1326  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1327  }
1328  }
1329  else
1330  {
1331  /* if it wasn't valid, we need only the new partition */
1332  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1333  /* remember we have no old-partition lock or tag */
1334  oldPartitionLock = NULL;
1335  /* keep the compiler quiet about uninitialized variables */
1336  oldHash = 0;
1337  }
1338 
1339  /*
1340  * Try to make a hashtable entry for the buffer under its new tag.
1341  * This could fail because while we were writing someone else
1342  * allocated another buffer for the same block we want to read in.
1343  * Note that we have not yet removed the hashtable entry for the old
1344  * tag.
1345  */
1346  buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
1347 
1348  if (buf_id >= 0)
1349  {
1350  /*
1351  * Got a collision. Someone has already done what we were about to
1352  * do. We'll just handle this as if it were found in the buffer
1353  * pool in the first place. First, give up the buffer we were
1354  * planning to use.
1355  */
1356  UnpinBuffer(buf);
1357 
1358  /* Can give up that buffer's mapping partition lock now */
1359  if (oldPartitionLock != NULL &&
1360  oldPartitionLock != newPartitionLock)
1361  LWLockRelease(oldPartitionLock);
1362 
1363  /* remaining code should match code at top of routine */
1364 
1365  buf = GetBufferDescriptor(buf_id);
1366 
1367  valid = PinBuffer(buf, strategy);
1368 
1369  /* Can release the mapping lock as soon as we've pinned it */
1370  LWLockRelease(newPartitionLock);
1371 
1372  *foundPtr = true;
1373 
1374  if (!valid)
1375  {
1376  /*
1377  * We can only get here if (a) someone else is still reading
1378  * in the page, or (b) a previous read attempt failed. We
1379  * have to wait for any active read attempt to finish, and
1380  * then set up our own read attempt if the page is still not
1381  * BM_VALID. StartBufferIO does it all.
1382  */
1383  if (StartBufferIO(buf, true))
1384  {
1385  /*
1386  * If we get here, previous attempts to read the buffer
1387  * must have failed ... but we shall bravely try again.
1388  */
1389  *foundPtr = false;
1390  }
1391  }
1392 
1393  return buf;
1394  }
1395 
1396  /*
1397  * Need to lock the buffer header too in order to change its tag.
1398  */
1399  buf_state = LockBufHdr(buf);
1400 
1401  /*
1402  * Somebody could have pinned or re-dirtied the buffer while we were
1403  * doing the I/O and making the new hashtable entry. If so, we can't
1404  * recycle this buffer; we must undo everything we've done and start
1405  * over with a new victim buffer.
1406  */
1407  oldFlags = buf_state & BUF_FLAG_MASK;
1408  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
1409  break;
1410 
1411  UnlockBufHdr(buf, buf_state);
1412  BufTableDelete(&newTag, newHash);
1413  if (oldPartitionLock != NULL &&
1414  oldPartitionLock != newPartitionLock)
1415  LWLockRelease(oldPartitionLock);
1416  LWLockRelease(newPartitionLock);
1417  UnpinBuffer(buf);
1418  }
1419 
1420  /*
1421  * Okay, it's finally safe to rename the buffer.
1422  *
1423  * Clearing BM_VALID here is necessary, clearing the dirtybits is just
1424  * paranoia. We also reset the usage_count since any recency of use of
1425  * the old content is no longer relevant. (The usage_count starts out at
1426  * 1 so that the buffer can survive one clock-sweep pass.)
1427  *
1428  * Make sure BM_PERMANENT is set for buffers that must be written at every
1429  * checkpoint. Unlogged buffers only need to be written at shutdown
1430  * checkpoints, except for their "init" forks, which need to be treated
1431  * just like permanent relations.
1432  */
1433  buf->tag = newTag;
1434  buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
1437  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1438  buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
1439  else
1440  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1441 
1442  UnlockBufHdr(buf, buf_state);
1443 
1444  if (oldPartitionLock != NULL)
1445  {
1446  BufTableDelete(&oldTag, oldHash);
1447  if (oldPartitionLock != newPartitionLock)
1448  LWLockRelease(oldPartitionLock);
1449  }
1450 
1451  LWLockRelease(newPartitionLock);
1452 
1453  /*
1454  * Buffer contents are currently invalid. Try to obtain the right to
1455  * start I/O. If StartBufferIO returns false, then someone else managed
1456  * to read it before we did, so there's nothing left for BufferAlloc() to
1457  * do.
1458  */
1459  if (StartBufferIO(buf, true))
1460  *foundPtr = false;
1461  else
1462  *foundPtr = true;
1463 
1464  return buf;
1465 }
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_TAG_VALID
Definition: buf_internals.h:61
#define BM_PERMANENT
Definition: buf_internals.h:67
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
static BufferDesc * GetBufferDescriptor(uint32 id)
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static LWLock * BufMappingPartitionLock(uint32 hashcode)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:149
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:91
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:79
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:119
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:1704
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1807
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:63
void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
Definition: bufmgr.c:4907
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:4559
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:217
static void UnpinBuffer(BufferDesc *buf)
Definition: bufmgr.c:1850
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2823
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
Definition: freelist.c:201
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf)
Definition: freelist.c:685
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1194
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1802
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1365
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
@ INIT_FORKNUM
Definition: relpath.h:53
Definition: lwlock.h:40
RelFileLocator locator
RelFileNumber relNumber
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:42
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:2841
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), BackendWritebackContext, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BUF_USAGECOUNT_ONE, BufferDescriptorGetContentLock(), BufferGetLSN, BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), RelFileLocator::dbOid, FlushBuffer(), GetBufferDescriptor(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), PinBuffer(), PinBuffer_Locked(), RelFileLocator::relNumber, ReservePrivateRefCountEntry(), ScheduleBufferTagForWriteback(), SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, StartBufferIO(), StrategyGetBuffer(), StrategyRejectBuffer(), UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by ReadBuffer_common().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 2763 of file bufmgr.c.

2764 {
2765  BufferDesc *bufHdr;
2766 
2767  Assert(BufferIsPinned(buffer));
2768 
2769  if (BufferIsLocal(buffer))
2770  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2771  else
2772  bufHdr = GetBufferDescriptor(buffer - 1);
2773 
2774  /* pinned, so OK to read tag without spinlock */
2775  return bufHdr->tag.blockNum;
2776 }
#define BufferIsLocal(buffer)
Definition: buf.h:37
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:450
BufferTag tag
BlockNumber blockNum
Definition: buf_internals.h:96

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_page_prune(), heap_prune_chain(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), index_compute_xid_horizon_for_tuples(), lazy_scan_noprune(), lazy_scan_prune(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgWalk(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and XLogReadBufferExtended().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 3012 of file bufmgr.c.

3013 {
3014  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
3015  char *page = BufferGetPage(buffer);
3016  XLogRecPtr lsn;
3017  uint32 buf_state;
3018 
3019  /*
3020  * If we don't need locking for correctness, fastpath out.
3021  */
3022  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
3023  return PageGetLSN(page);
3024 
3025  /* Make sure we've got a real buffer, and that we hold a pin on it. */
3026  Assert(BufferIsValid(buffer));
3027  Assert(BufferIsPinned(buffer));
3028 
3029  buf_state = LockBufHdr(bufHdr);
3030  lsn = PageGetLSN(page);
3031  UnlockBufHdr(bufHdr, buf_state);
3032 
3033  return lsn;
3034 }
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:280
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
#define XLogHintBitIsNeeded()
Definition: xlog.h:115

References Assert(), PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_killitems(), _bt_readpage(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 2784 of file bufmgr.c.

2786 {
2787  BufferDesc *bufHdr;
2788 
2789  /* Do the same checks as BufferGetBlockNumber. */
2790  Assert(BufferIsPinned(buffer));
2791 
2792  if (BufferIsLocal(buffer))
2793  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2794  else
2795  bufHdr = GetBufferDescriptor(buffer - 1);
2796 
2797  /* pinned, so OK to read tag without spinlock */
2798  *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
2799  *forknum = BufTagGetForkNum(&bufHdr->tag);
2800  *blknum = bufHdr->tag.blockNum;
2801 }

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 2982 of file bufmgr.c.

2983 {
2984  BufferDesc *bufHdr;
2985 
2986  /* Local buffers are used only for temp relations. */
2987  if (BufferIsLocal(buffer))
2988  return false;
2989 
2990  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2991  Assert(BufferIsValid(buffer));
2992  Assert(BufferIsPinned(buffer));
2993 
2994  /*
2995  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
2996  * need not bother with the buffer header spinlock. Even if someone else
2997  * changes the buffer header state while we're doing this, the state is
2998  * changed atomically, so we'll read the old value or the new value, but
2999  * not random garbage.
3000  */
3001  bufHdr = GetBufferDescriptor(buffer - 1);
3002  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
3003 }
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:236
pg_atomic_uint32 state

References Assert(), BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 1948 of file bufmgr.c.

1949 {
1950  uint32 buf_state;
1951  int buf_id;
1952  int num_to_scan;
1953  int num_spaces;
1954  int num_processed;
1955  int num_written;
1956  CkptTsStatus *per_ts_stat = NULL;
1957  Oid last_tsid;
1958  binaryheap *ts_heap;
1959  int i;
1960  int mask = BM_DIRTY;
1961  WritebackContext wb_context;
1962 
1963  /* Make sure we can handle the pin inside SyncOneBuffer */
1965 
1966  /*
1967  * Unless this is a shutdown checkpoint or we have been explicitly told,
1968  * we write only permanent, dirty buffers. But at shutdown or end of
1969  * recovery, we write all dirty buffers.
1970  */
1973  mask |= BM_PERMANENT;
1974 
1975  /*
1976  * Loop over all buffers, and mark the ones that need to be written with
1977  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
1978  * can estimate how much work needs to be done.
1979  *
1980  * This allows us to write only those pages that were dirty when the
1981  * checkpoint began, and not those that get dirtied while it proceeds.
1982  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
1983  * later in this function, or by normal backends or the bgwriter cleaning
1984  * scan, the flag is cleared. Any buffer dirtied after this point won't
1985  * have the flag set.
1986  *
1987  * Note that if we fail to write some buffer, we may leave buffers with
1988  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
1989  * certainly need to be written for the next checkpoint attempt, too.
1990  */
1991  num_to_scan = 0;
1992  for (buf_id = 0; buf_id < NBuffers; buf_id++)
1993  {
1994  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
1995 
1996  /*
1997  * Header spinlock is enough to examine BM_DIRTY, see comment in
1998  * SyncOneBuffer.
1999  */
2000  buf_state = LockBufHdr(bufHdr);
2001 
2002  if ((buf_state & mask) == mask)
2003  {
2004  CkptSortItem *item;
2005 
2006  buf_state |= BM_CHECKPOINT_NEEDED;
2007 
2008  item = &CkptBufferIds[num_to_scan++];
2009  item->buf_id = buf_id;
2010  item->tsId = bufHdr->tag.spcOid;
2011  item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
2012  item->forkNum = BufTagGetForkNum(&bufHdr->tag);
2013  item->blockNum = bufHdr->tag.blockNum;
2014  }
2015 
2016  UnlockBufHdr(bufHdr, buf_state);
2017 
2018  /* Check for barrier events in case NBuffers is large. */
2021  }
2022 
2023  if (num_to_scan == 0)
2024  return; /* nothing to do */
2025 
2027 
2028  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
2029 
2030  /*
2031  * Sort buffers that need to be written to reduce the likelihood of random
2032  * IO. The sorting is also important for the implementation of balancing
2033  * writes between tablespaces. Without balancing writes we'd potentially
2034  * end up writing to the tablespaces one-by-one; possibly overloading the
2035  * underlying system.
2036  */
2037  sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
2038 
2039  num_spaces = 0;
2040 
2041  /*
2042  * Allocate progress status for each tablespace with buffers that need to
2043  * be flushed. This requires the to-be-flushed array to be sorted.
2044  */
2045  last_tsid = InvalidOid;
2046  for (i = 0; i < num_to_scan; i++)
2047  {
2048  CkptTsStatus *s;
2049  Oid cur_tsid;
2050 
2051  cur_tsid = CkptBufferIds[i].tsId;
2052 
2053  /*
2054  * Grow array of per-tablespace status structs, every time a new
2055  * tablespace is found.
2056  */
2057  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
2058  {
2059  Size sz;
2060 
2061  num_spaces++;
2062 
2063  /*
2064  * Not worth adding grow-by-power-of-2 logic here - even with a
2065  * few hundred tablespaces this should be fine.
2066  */
2067  sz = sizeof(CkptTsStatus) * num_spaces;
2068 
2069  if (per_ts_stat == NULL)
2070  per_ts_stat = (CkptTsStatus *) palloc(sz);
2071  else
2072  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
2073 
2074  s = &per_ts_stat[num_spaces - 1];
2075  memset(s, 0, sizeof(*s));
2076  s->tsId = cur_tsid;
2077 
2078  /*
2079  * The first buffer in this tablespace. As CkptBufferIds is sorted
2080  * by tablespace all (s->num_to_scan) buffers in this tablespace
2081  * will follow afterwards.
2082  */
2083  s->index = i;
2084 
2085  /*
2086  * progress_slice will be determined once we know how many buffers
2087  * are in each tablespace, i.e. after this loop.
2088  */
2089 
2090  last_tsid = cur_tsid;
2091  }
2092  else
2093  {
2094  s = &per_ts_stat[num_spaces - 1];
2095  }
2096 
2097  s->num_to_scan++;
2098 
2099  /* Check for barrier events. */
2102  }
2103 
2104  Assert(num_spaces > 0);
2105 
2106  /*
2107  * Build a min-heap over the write-progress in the individual tablespaces,
2108  * and compute how large a portion of the total progress a single
2109  * processed buffer is.
2110  */
2111  ts_heap = binaryheap_allocate(num_spaces,
2113  NULL);
2114 
2115  for (i = 0; i < num_spaces; i++)
2116  {
2117  CkptTsStatus *ts_stat = &per_ts_stat[i];
2118 
2119  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
2120 
2121  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
2122  }
2123 
2124  binaryheap_build(ts_heap);
2125 
2126  /*
2127  * Iterate through to-be-checkpointed buffers and write the ones (still)
2128  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
2129  * tablespaces; otherwise the sorting would lead to only one tablespace
2130  * receiving writes at a time, making inefficient use of the hardware.
2131  */
2132  num_processed = 0;
2133  num_written = 0;
2134  while (!binaryheap_empty(ts_heap))
2135  {
2136  BufferDesc *bufHdr = NULL;
2137  CkptTsStatus *ts_stat = (CkptTsStatus *)
2139 
2140  buf_id = CkptBufferIds[ts_stat->index].buf_id;
2141  Assert(buf_id != -1);
2142 
2143  bufHdr = GetBufferDescriptor(buf_id);
2144 
2145  num_processed++;
2146 
2147  /*
2148  * We don't need to acquire the lock here, because we're only looking
2149  * at a single bit. It's possible that someone else writes the buffer
2150  * and clears the flag right after we check, but that doesn't matter
2151  * since SyncOneBuffer will then do nothing. However, there is a
2152  * further race condition: it's conceivable that between the time we
2153  * examine the bit here and the time SyncOneBuffer acquires the lock,
2154  * someone else not only wrote the buffer but replaced it with another
2155  * page and dirtied it. In that improbable case, SyncOneBuffer will
2156  * write the buffer though we didn't need to. It doesn't seem worth
2157  * guarding against this, though.
2158  */
2160  {
2161  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
2162  {
2163  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
2165  num_written++;
2166  }
2167  }
2168 
2169  /*
2170  * Measure progress independent of actually having to flush the buffer
2171  * - otherwise writing become unbalanced.
2172  */
2173  ts_stat->progress += ts_stat->progress_slice;
2174  ts_stat->num_scanned++;
2175  ts_stat->index++;
2176 
2177  /* Have all the buffers from the tablespace been processed? */
2178  if (ts_stat->num_scanned == ts_stat->num_to_scan)
2179  {
2180  binaryheap_remove_first(ts_heap);
2181  }
2182  else
2183  {
2184  /* update heap with the new progress */
2185  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
2186  }
2187 
2188  /*
2189  * Sleep to throttle our I/O rate.
2190  *
2191  * (This will check for barrier events even if it doesn't sleep.)
2192  */
2193  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
2194  }
2195 
2196  /* issue all pending flushes */
2197  IssuePendingWritebacks(&wb_context);
2198 
2199  pfree(per_ts_stat);
2200  per_ts_stat = NULL;
2201  binaryheap_free(ts_heap);
2202 
2203  /*
2204  * Update checkpoint statistics. As noted above, this doesn't include
2205  * buffers written by other backends or bgwriter scan.
2206  */
2207  CheckpointStats.ckpt_bufs_written += num_written;
2208 
2209  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
2210 }
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:125
void binaryheap_add_unordered(binaryheap *heap, Datum d)
Definition: binaryheap.c:109
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:32
Datum binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:173
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:68
void binaryheap_replace_first(binaryheap *heap, Datum d)
Definition: binaryheap.c:207
Datum binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:158
#define binaryheap_empty(h)
Definition: binaryheap.h:52
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:4872
int checkpoint_flush_after
Definition: bufmgr.c:158
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:4895
struct CkptTsStatus CkptTsStatus
void IssuePendingWritebacks(WritebackContext *context)
Definition: bufmgr.c:4948
double float8
Definition: c.h:566
size_t Size
Definition: c.h:541
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:697
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:37
int i
Definition: isn.c:73
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1321
void * palloc(Size size)
Definition: mcxt.c:1199
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:670
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:660
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:467
int ckpt_bufs_written
Definition: xlog.h:162
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition: bufmgr.c:109
int index
Definition: bufmgr.c:117
int num_scanned
Definition: bufmgr.c:114
float8 progress
Definition: bufmgr.c:108
int num_to_scan
Definition: bufmgr.c:112
Oid tsId
Definition: bufmgr.c:99
PgStat_Counter buf_written_checkpoints
Definition: pgstat.h:274
Oid spcOid
Definition: buf_internals.h:92
CheckpointStatsData CheckpointStats
Definition: xlog.c:212
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:135
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:138
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:134

References Assert(), binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buf_written_checkpoints, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, CurrentResourceOwner, DatumGetPointer(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u32(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), ResourceOwnerEnlargeBuffers(), buftag::spcOid, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 4807 of file bufmgr.c.

4808 {
4809  int ret;
4810  RelFileLocator rlocatora;
4811  RelFileLocator rlocatorb;
4812 
4813  rlocatora = BufTagGetRelFileLocator(ba);
4814  rlocatorb = BufTagGetRelFileLocator(bb);
4815 
4816  ret = rlocator_comparator(&rlocatora, &rlocatorb);
4817 
4818  if (ret != 0)
4819  return ret;
4820 
4821  if (BufTagGetForkNum(ba) < BufTagGetForkNum(bb))
4822  return -1;
4823  if (BufTagGetForkNum(ba) > BufTagGetForkNum(bb))
4824  return 1;
4825 
4826  if (ba->blockNum < bb->blockNum)
4827  return -1;
4828  if (ba->blockNum > bb->blockNum)
4829  return 1;
4830 
4831  return 0;
4832 }
static int rlocator_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4728

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), and rlocator_comparator().

◆ BufmgrCommit()

void BufmgrCommit ( void  )

Definition at line 2749 of file bufmgr.c.

2750 {
2751  /* Nothing to do in bufmgr anymore... */
2752 }

Referenced by PrepareTransaction(), and RecordTransactionCommit().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 2655 of file bufmgr.c.

2656 {
2657 #ifdef USE_ASSERT_CHECKING
2658  int RefCountErrors = 0;
2660  int i;
2661 
2662  /* check the array */
2663  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
2664  {
2666 
2667  if (res->buffer != InvalidBuffer)
2668  {
2669  PrintBufferLeakWarning(res->buffer);
2670  RefCountErrors++;
2671  }
2672  }
2673 
2674  /* if necessary search the hash */
2676  {
2677  HASH_SEQ_STATUS hstat;
2678 
2680  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
2681  {
2682  PrintBufferLeakWarning(res->buffer);
2683  RefCountErrors++;
2684  }
2685  }
2686 
2687  Assert(RefCountErrors == 0);
2688 #endif
2689 }
#define InvalidBuffer
Definition: buf.h:25
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:90
void PrintBufferLeakWarning(Buffer buffer)
Definition: bufmgr.c:2695
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:199
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:200
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1431
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1421

References Assert(), hash_seq_init(), hash_seq_search(), i, InvalidBuffer, PrintBufferLeakWarning(), PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and res.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 2739 of file bufmgr.c.

2740 {
2741  BufferSync(flags);
2742 }
static void BufferSync(int flags)
Definition: bufmgr.c:1948

References BufferSync().

Referenced by CheckPointGuts().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 4841 of file bufmgr.c.

4842 {
4843  /* compare tablespace */
4844  if (a->tsId < b->tsId)
4845  return -1;
4846  else if (a->tsId > b->tsId)
4847  return 1;
4848  /* compare relation */
4849  if (a->relNumber < b->relNumber)
4850  return -1;
4851  else if (a->relNumber > b->relNumber)
4852  return 1;
4853  /* compare fork */
4854  else if (a->forkNum < b->forkNum)
4855  return -1;
4856  else if (a->forkNum > b->forkNum)
4857  return 1;
4858  /* compare block number */
4859  else if (a->blockNum < b->blockNum)
4860  return -1;
4861  else if (a->blockNum > b->blockNum)
4862  return 1;
4863  /* equal page IDs are unlikely, but not impossible */
4864  return 0;
4865 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69

References a, and b.

◆ ConditionalLockBuffer()

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 4406 of file bufmgr.c.

4407 {
4408  BufferDesc *bufHdr;
4409  uint32 buf_state,
4410  refcount;
4411 
4412  Assert(BufferIsValid(buffer));
4413 
4414  if (BufferIsLocal(buffer))
4415  {
4416  refcount = LocalRefCount[-buffer - 1];
4417  /* There should be exactly one pin */
4418  Assert(refcount > 0);
4419  if (refcount != 1)
4420  return false;
4421  /* Nobody else to wait for */
4422  return true;
4423  }
4424 
4425  /* There should be exactly one local pin */
4426  refcount = GetPrivateRefCount(buffer);
4427  Assert(refcount);
4428  if (refcount != 1)
4429  return false;
4430 
4431  /* Try to acquire lock */
4432  if (!ConditionalLockBuffer(buffer))
4433  return false;
4434 
4435  bufHdr = GetBufferDescriptor(buffer - 1);
4436  buf_state = LockBufHdr(bufHdr);
4437  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
4438 
4439  Assert(refcount > 0);
4440  if (refcount == 1)
4441  {
4442  /* Successfully acquired exclusive lock with pincount 1 */
4443  UnlockBufHdr(bufHdr, buf_state);
4444  return true;
4445  }
4446 
4447  /* Failed, so release the lock */
4448  UnlockBufHdr(bufHdr, buf_state);
4449  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4450  return false;
4451 }
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:4198
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4172
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:105

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 3799 of file bufmgr.c.

3801 {
3802  RelFileLocatorBackend rlocator;
3803  char relpersistence;
3804 
3805  /* Set the relpersistence. */
3806  relpersistence = permanent ?
3807  RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
3808 
3809  /*
3810  * Create and copy all forks of the relation. During create database we
3811  * have a separate cleanup mechanism which deletes complete database
3812  * directory. Therefore, each individual relation doesn't need to be
3813  * registered for cleanup.
3814  */
3815  RelationCreateStorage(dst_rlocator, relpersistence, false);
3816 
3817  /* copy main fork. */
3818  RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM,
3819  permanent);
3820 
3821  /* copy those extra forks that exist */
3822  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
3823  forkNum <= MAX_FORKNUM; forkNum++)
3824  {
3825  if (smgrexists(smgropen(src_rlocator, InvalidBackendId), forkNum))
3826  {
3827  smgrcreate(smgropen(dst_rlocator, InvalidBackendId), forkNum, false);
3828 
3829  /*
3830  * WAL log creation if the relation is persistent, or this is the
3831  * init fork of an unlogged relation.
3832  */
3833  if (permanent || forkNum == INIT_FORKNUM)
3834  log_smgrcreate(&dst_rlocator, forkNum);
3835 
3836  /* Copy a fork's data, block by block. */
3837  RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
3838  permanent);
3839  }
3840  }
3841 
3842  /* close source and destination smgr if exists. */
3843  rlocator.backend = InvalidBackendId;
3844 
3845  rlocator.locator = src_rlocator;
3846  smgrcloserellocator(rlocator);
3847 
3848  rlocator.locator = dst_rlocator;
3849  smgrcloserellocator(rlocator);
3850 }
#define InvalidBackendId
Definition: backendid.h:23
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition: bufmgr.c:3709
ForkNumber
Definition: relpath.h:48
@ MAIN_FORKNUM
Definition: relpath.h:50
#define MAX_FORKNUM
Definition: relpath.h:62
void smgrcloserellocator(RelFileLocatorBackend rlocator)
Definition: smgr.c:346
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:369
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:146
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:120
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:185

References RelFileLocatorBackend::backend, INIT_FORKNUM, InvalidBackendId, RelFileLocatorBackend::locator, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcloserellocator(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 3413 of file bufmgr.c.

3414 {
3415  int i;
3416 
3417  /*
3418  * We needn't consider local buffers, since by assumption the target
3419  * database isn't our own.
3420  */
3421 
3422  for (i = 0; i < NBuffers; i++)
3423  {
3424  BufferDesc *bufHdr = GetBufferDescriptor(i);
3425  uint32 buf_state;
3426 
3427  /*
3428  * As in DropRelationBuffers, an unlocked precheck should be
3429  * safe and saves some cycles.
3430  */
3431  if (bufHdr->tag.dbOid != dbid)
3432  continue;
3433 
3434  buf_state = LockBufHdr(bufHdr);
3435  if (bufHdr->tag.dbOid == dbid)
3436  InvalidateBuffer(bufHdr); /* releases spinlock */
3437  else
3438  UnlockBufHdr(bufHdr, buf_state);
3439  }
3440 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1485
Oid dbOid
Definition: buf_internals.h:93

References buftag::dbOid, GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, BufferDesc::tag, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 3058 of file bufmgr.c.

3060 {
3061  int i;
3062  int j;
3063  RelFileLocatorBackend rlocator;
3064  BlockNumber nForkBlock[MAX_FORKNUM];
3065  uint64 nBlocksToInvalidate = 0;
3066 
3067  rlocator = smgr_reln->smgr_rlocator;
3068 
3069  /* If it's a local relation, it's localbuf.c's problem. */
3070  if (RelFileLocatorBackendIsTemp(rlocator))
3071  {
3072  if (rlocator.backend == MyBackendId)
3073  {
3074  for (j = 0; j < nforks; j++)
3075  DropRelationLocalBuffers(rlocator.locator, forkNum[j],
3076  firstDelBlock[j]);
3077  }
3078  return;
3079  }
3080 
3081  /*
3082  * To remove all the pages of the specified relation forks from the buffer
3083  * pool, we need to scan the entire buffer pool but we can optimize it by
3084  * finding the buffers from BufMapping table provided we know the exact
3085  * size of each fork of the relation. The exact size is required to ensure
3086  * that we don't leave any buffer for the relation being dropped as
3087  * otherwise the background writer or checkpointer can lead to a PANIC
3088  * error while flushing buffers corresponding to files that don't exist.
3089  *
3090  * To know the exact size, we rely on the size cached for each fork by us
3091  * during recovery which limits the optimization to recovery and on
3092  * standbys but we can easily extend it once we have shared cache for
3093  * relation size.
3094  *
3095  * In recovery, we cache the value returned by the first lseek(SEEK_END)
3096  * and the future writes keeps the cached value up-to-date. See
3097  * smgrextend. It is possible that the value of the first lseek is smaller
3098  * than the actual number of existing blocks in the file due to buggy
3099  * Linux kernels that might not have accounted for the recent write. But
3100  * that should be fine because there must not be any buffers after that
3101  * file size.
3102  */
3103  for (i = 0; i < nforks; i++)
3104  {
3105  /* Get the number of blocks for a relation's fork */
3106  nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
3107 
3108  if (nForkBlock[i] == InvalidBlockNumber)
3109  {
3110  nBlocksToInvalidate = InvalidBlockNumber;
3111  break;
3112  }
3113 
3114  /* calculate the number of blocks to be invalidated */
3115  nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
3116  }
3117 
3118  /*
3119  * We apply the optimization iff the total number of blocks to invalidate
3120  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3121  */
3122  if (BlockNumberIsValid(nBlocksToInvalidate) &&
3123  nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3124  {
3125  for (j = 0; j < nforks; j++)
3126  FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
3127  nForkBlock[j], firstDelBlock[j]);
3128  return;
3129  }
3130 
3131  for (i = 0; i < NBuffers; i++)
3132  {
3133  BufferDesc *bufHdr = GetBufferDescriptor(i);
3134  uint32 buf_state;
3135 
3136  /*
3137  * We can make this a tad faster by prechecking the buffer tag before
3138  * we attempt to lock the buffer; this saves a lot of lock
3139  * acquisitions in typical cases. It should be safe because the
3140  * caller must have AccessExclusiveLock on the relation, or some other
3141  * reason to be certain that no one is loading new pages of the rel
3142  * into the buffer pool. (Otherwise we might well miss such pages
3143  * entirely.) Therefore, while the tag might be changing while we
3144  * look at it, it can't be changing *to* a value we care about, only
3145  * *away* from such a value. So false negatives are impossible, and
3146  * false positives are safe because we'll recheck after getting the
3147  * buffer lock.
3148  *
3149  * We could check forkNum and blockNum as well as the rlocator, but
3150  * the incremental win from doing so seems small.
3151  */
3152  if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
3153  continue;
3154 
3155  buf_state = LockBufHdr(bufHdr);
3156 
3157  for (j = 0; j < nforks; j++)
3158  {
3159  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
3160  BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
3161  bufHdr->tag.blockNum >= firstDelBlock[j])
3162  {
3163  InvalidateBuffer(bufHdr); /* releases spinlock */
3164  break;
3165  }
3166  }
3167  if (j >= nforks)
3168  UnlockBufHdr(bufHdr, buf_state);
3169  }
3170 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:81
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:3352
BackendId MyBackendId
Definition: globals.c:85
int j
Definition: isn.c:74
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:326
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:603

References RelFileLocatorBackend::backend, buftag::blockNum, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, RelFileLocatorBackendIsTemp, SMgrRelationData::smgr_rlocator, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 3181 of file bufmgr.c.

3182 {
3183  int i;
3184  int n = 0;
3185  SMgrRelation *rels;
3186  BlockNumber (*block)[MAX_FORKNUM + 1];
3187  uint64 nBlocksToInvalidate = 0;
3188  RelFileLocator *locators;
3189  bool cached = true;
3190  bool use_bsearch;
3191 
3192  if (nlocators == 0)
3193  return;
3194 
3195  rels = palloc(sizeof(SMgrRelation) * nlocators); /* non-local relations */
3196 
3197  /* If it's a local relation, it's localbuf.c's problem. */
3198  for (i = 0; i < nlocators; i++)
3199  {
3200  if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
3201  {
3202  if (smgr_reln[i]->smgr_rlocator.backend == MyBackendId)
3203  DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
3204  }
3205  else
3206  rels[n++] = smgr_reln[i];
3207  }
3208 
3209  /*
3210  * If there are no non-local relations, then we're done. Release the
3211  * memory and return.
3212  */
3213  if (n == 0)
3214  {
3215  pfree(rels);
3216  return;
3217  }
3218 
3219  /*
3220  * This is used to remember the number of blocks for all the relations
3221  * forks.
3222  */
3223  block = (BlockNumber (*)[MAX_FORKNUM + 1])
3224  palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
3225 
3226  /*
3227  * We can avoid scanning the entire buffer pool if we know the exact size
3228  * of each of the given relation forks. See DropRelationBuffers.
3229  */
3230  for (i = 0; i < n && cached; i++)
3231  {
3232  for (int j = 0; j <= MAX_FORKNUM; j++)
3233  {
3234  /* Get the number of blocks for a relation's fork. */
3235  block[i][j] = smgrnblocks_cached(rels[i], j);
3236 
3237  /* We need to only consider the relation forks that exists. */
3238  if (block[i][j] == InvalidBlockNumber)
3239  {
3240  if (!smgrexists(rels[i], j))
3241  continue;
3242  cached = false;
3243  break;
3244  }
3245 
3246  /* calculate the total number of blocks to be invalidated */
3247  nBlocksToInvalidate += block[i][j];
3248  }
3249  }
3250 
3251  /*
3252  * We apply the optimization iff the total number of blocks to invalidate
3253  * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
3254  */
3255  if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
3256  {
3257  for (i = 0; i < n; i++)
3258  {
3259  for (int j = 0; j <= MAX_FORKNUM; j++)
3260  {
3261  /* ignore relation forks that doesn't exist */
3262  if (!BlockNumberIsValid(block[i][j]))
3263  continue;
3264 
3265  /* drop all the buffers for a particular relation fork */
3266  FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
3267  j, block[i][j], 0);
3268  }
3269  }
3270 
3271  pfree(block);
3272  pfree(rels);
3273  return;
3274  }
3275 
3276  pfree(block);
3277  locators = palloc(sizeof(RelFileLocator) * n); /* non-local relations */
3278  for (i = 0; i < n; i++)
3279  locators[i] = rels[i]->smgr_rlocator.locator;
3280 
3281  /*
3282  * For low number of relations to drop just use a simple walk through, to
3283  * save the bsearch overhead. The threshold to use is rather a guess than
3284  * an exactly determined value, as it depends on many factors (CPU and RAM
3285  * speeds, amount of shared buffers etc.).
3286  */
3287  use_bsearch = n > RELS_BSEARCH_THRESHOLD;
3288 
3289  /* sort the list of rlocators if necessary */
3290  if (use_bsearch)
3291  pg_qsort(locators, n, sizeof(RelFileLocator), rlocator_comparator);
3292 
3293  for (i = 0; i < NBuffers; i++)
3294  {
3295  RelFileLocator *rlocator = NULL;
3296  BufferDesc *bufHdr = GetBufferDescriptor(i);
3297  uint32 buf_state;
3298 
3299  /*
3300  * As in DropRelationBuffers, an unlocked precheck should be
3301  * safe and saves some cycles.
3302  */
3303 
3304  if (!use_bsearch)
3305  {
3306  int j;
3307 
3308  for (j = 0; j < n; j++)
3309  {
3310  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
3311  {
3312  rlocator = &locators[j];
3313  break;
3314  }
3315  }
3316  }
3317  else
3318  {
3319  RelFileLocator locator;
3320 
3321  locator = BufTagGetRelFileLocator(&bufHdr->tag);
3322  rlocator = bsearch((const void *) &(locator),
3323  locators, n, sizeof(RelFileLocator),
3325  }
3326 
3327  /* buffer doesn't belong to any of the given relfilelocators; skip it */
3328  if (rlocator == NULL)
3329  continue;
3330 
3331  buf_state = LockBufHdr(bufHdr);
3332  if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
3333  InvalidateBuffer(bufHdr); /* releases spinlock */
3334  else
3335  UnlockBufHdr(bufHdr, buf_state);
3336  }
3337 
3338  pfree(locators);
3339  pfree(rels);
3340 }
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition: localbuf.c:375
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, if(), InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyBackendId, NBuffers, palloc(), pfree(), pg_qsort(), RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 3352 of file bufmgr.c.

3355 {
3356  BlockNumber curBlock;
3357 
3358  for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
3359  {
3360  uint32 bufHash; /* hash value for tag */
3361  BufferTag bufTag; /* identity of requested block */
3362  LWLock *bufPartitionLock; /* buffer partition lock for it */
3363  int buf_id;
3364  BufferDesc *bufHdr;
3365  uint32 buf_state;
3366 
3367  /* create a tag so we can lookup the buffer */
3368  InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
3369 
3370  /* determine its hash code and partition lock ID */
3371  bufHash = BufTableHashCode(&bufTag);
3372  bufPartitionLock = BufMappingPartitionLock(bufHash);
3373 
3374  /* Check that it is in the buffer pool. If not, do nothing. */
3375  LWLockAcquire(bufPartitionLock, LW_SHARED);
3376  buf_id = BufTableLookup(&bufTag, bufHash);
3377  LWLockRelease(bufPartitionLock);
3378 
3379  if (buf_id < 0)
3380  continue;
3381 
3382  bufHdr = GetBufferDescriptor(buf_id);
3383 
3384  /*
3385  * We need to lock the buffer header and recheck if the buffer is
3386  * still associated with the same block because the buffer could be
3387  * evicted by some other backend loading blocks for a different
3388  * relation after we release lock on the BufMapping table.
3389  */
3390  buf_state = LockBufHdr(bufHdr);
3391 
3392  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
3393  BufTagGetForkNum(&bufHdr->tag) == forkNum &&
3394  bufHdr->tag.blockNum >= firstDelBlock)
3395  InvalidateBuffer(bufHdr); /* releases spinlock */
3396  else
3397  UnlockBufHdr(bufHdr, buf_state);
3398  }
3399 }

References buftag::blockNum, BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), BufferDesc::tag, and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln 
)
static

Definition at line 2823 of file bufmgr.c.

2824 {
2825  XLogRecPtr recptr;
2826  ErrorContextCallback errcallback;
2827  instr_time io_start,
2828  io_time;
2829  Block bufBlock;
2830  char *bufToWrite;
2831  uint32 buf_state;
2832 
2833  /*
2834  * Try to start an I/O operation. If StartBufferIO returns false, then
2835  * someone else flushed the buffer before we could, so we need not do
2836  * anything.
2837  */
2838  if (!StartBufferIO(buf, false))
2839  return;
2840 
2841  /* Setup error traceback support for ereport() */
2843  errcallback.arg = (void *) buf;
2844  errcallback.previous = error_context_stack;
2845  error_context_stack = &errcallback;
2846 
2847  /* Find smgr relation for buffer */
2848  if (reln == NULL)
2850 
2851  TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag),
2852  buf->tag.blockNum,
2854  reln->smgr_rlocator.locator.dbOid,
2856 
2857  buf_state = LockBufHdr(buf);
2858 
2859  /*
2860  * Run PageGetLSN while holding header lock, since we don't have the
2861  * buffer locked exclusively in all cases.
2862  */
2863  recptr = BufferGetLSN(buf);
2864 
2865  /* To check if block content changes while flushing. - vadim 01/17/97 */
2866  buf_state &= ~BM_JUST_DIRTIED;
2867  UnlockBufHdr(buf, buf_state);
2868 
2869  /*
2870  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
2871  * rule that log updates must hit disk before any of the data-file changes
2872  * they describe do.
2873  *
2874  * However, this rule does not apply to unlogged relations, which will be
2875  * lost after a crash anyway. Most unlogged relation pages do not bear
2876  * LSNs since we never emit WAL records for them, and therefore flushing
2877  * up through the buffer LSN would be useless, but harmless. However,
2878  * GiST indexes use LSNs internally to track page-splits, and therefore
2879  * unlogged GiST pages bear "fake" LSNs generated by
2880  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
2881  * LSN counter could advance past the WAL insertion point; and if it did
2882  * happen, attempting to flush WAL through that location would fail, with
2883  * disastrous system-wide consequences. To make sure that can't happen,
2884  * skip the flush if the buffer isn't permanent.
2885  */
2886  if (buf_state & BM_PERMANENT)
2887  XLogFlush(recptr);
2888 
2889  /*
2890  * Now it's safe to write buffer to disk. Note that no one else should
2891  * have been able to write it while we were busy with log flushing because
2892  * only one process at a time can set the BM_IO_IN_PROGRESS bit.
2893  */
2894  bufBlock = BufHdrGetBlock(buf);
2895 
2896  /*
2897  * Update page checksum if desired. Since we have only shared lock on the
2898  * buffer, other processes might be updating hint bits in it, so we must
2899  * copy the page to private storage if we do checksumming.
2900  */
2901  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
2902 
2903  if (track_io_timing)
2904  INSTR_TIME_SET_CURRENT(io_start);
2905 
2906  /*
2907  * bufToWrite is either the shared buffer or a copy, as appropriate.
2908  */
2909  smgrwrite(reln,
2910  BufTagGetForkNum(&buf->tag),
2911  buf->tag.blockNum,
2912  bufToWrite,
2913  false);
2914 
2915  if (track_io_timing)
2916  {
2917  INSTR_TIME_SET_CURRENT(io_time);
2918  INSTR_TIME_SUBTRACT(io_time, io_start);
2921  }
2922 
2924 
2925  /*
2926  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
2927  * end the BM_IO_IN_PROGRESS state.
2928  */
2929  TerminateBufferIO(buf, true, 0);
2930 
2931  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(BufTagGetForkNum(&buf->tag),
2932  buf->tag.blockNum,
2934  reln->smgr_rlocator.locator.dbOid,
2936 
2937  /* Pop the error context stack */
2938  error_context_stack = errcallback.previous;
2939 }
bool track_io_timing
Definition: bufmgr.c:137
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:62
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4688
void * Block
Definition: bufmgr.h:24
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1510
Pointer Page
Definition: bufpage.h:78
ErrorContextCallback * error_context_stack
Definition: elog.c:94
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:89
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:91
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:103
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:138
struct timespec instr_time
Definition: instr_time.h:83
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:470
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:554
instr_time blk_write_time
Definition: instrument.h:37
int64 shared_blks_written
Definition: instrument.h:29
struct ErrorContextCallback * previous
Definition: elog.h:234
void(* callback)(void *arg)
Definition: elog.h:235
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2512

References ErrorContextCallback::arg, BufferUsage::blk_write_time, BM_JUST_DIRTIED, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, RelFileLocator::dbOid, error_context_stack, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBackendId, RelFileLocatorBackend::locator, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_buffer_write_time, ErrorContextCallback::previous, RelFileLocator::relNumber, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rlocator, smgropen(), smgrwrite(), RelFileLocator::spcOid, StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdr(), and XLogFlush().

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 3868 of file bufmgr.c.

3869 {
3870  int i;
3871  BufferDesc *bufHdr;
3872 
3873  /* Make sure we can handle the pin inside the loop */
3875 
3876  for (i = 0; i < NBuffers; i++)
3877  {
3878  uint32 buf_state;
3879 
3880  bufHdr = GetBufferDescriptor(i);
3881 
3882  /*
3883  * As in DropRelationBuffers, an unlocked precheck should be
3884  * safe and saves some cycles.
3885  */
3886  if (bufHdr->tag.dbOid != dbid)
3887  continue;
3888 
3890 
3891  buf_state = LockBufHdr(bufHdr);
3892  if (bufHdr->tag.dbOid == dbid &&
3893  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3894  {
3895  PinBuffer_Locked(bufHdr);
3897  FlushBuffer(bufHdr, NULL);
3899  UnpinBuffer(bufHdr);
3900  }
3901  else
3902  UnlockBufHdr(bufHdr, buf_state);
3903  }
3904 }

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), CurrentResourceOwner, buftag::dbOid, FlushBuffer(), GetBufferDescriptor(), i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 3911 of file bufmgr.c.

3912 {
3913  BufferDesc *bufHdr;
3914 
3915  /* currently not needed, but no fundamental reason not to support */
3916  Assert(!BufferIsLocal(buffer));
3917 
3918  Assert(BufferIsPinned(buffer));
3919 
3920  bufHdr = GetBufferDescriptor(buffer - 1);
3921 
3923 
3924  FlushBuffer(bufHdr, NULL);
3925 }
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1918

References Assert(), PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor(), and LWLockHeldByMe().

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 3519 of file bufmgr.c.

3520 {
3521  int i;
3522  BufferDesc *bufHdr;
3523 
3524  if (RelationUsesLocalBuffers(rel))
3525  {
3526  for (i = 0; i < NLocBuffer; i++)
3527  {
3528  uint32 buf_state;
3529 
3530  bufHdr = GetLocalBufferDescriptor(i);
3531  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
3532  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
3533  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3534  {
3535  ErrorContextCallback errcallback;
3536  Page localpage;
3537 
3538  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
3539 
3540  /* Setup error traceback support for ereport() */
3542  errcallback.arg = (void *) bufHdr;
3543  errcallback.previous = error_context_stack;
3544  error_context_stack = &errcallback;
3545 
3546  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
3547 
3549  BufTagGetForkNum(&bufHdr->tag),
3550  bufHdr->tag.blockNum,
3551  localpage,
3552  false);
3553 
3554  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
3555  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
3556 
3557  /* Pop the error context stack */
3558  error_context_stack = errcallback.previous;
3559  }
3560  }
3561 
3562  return;
3563  }
3564 
3565  /* Make sure we can handle the pin inside the loop */
3567 
3568  for (i = 0; i < NBuffers; i++)
3569  {
3570  uint32 buf_state;
3571 
3572  bufHdr = GetBufferDescriptor(i);
3573 
3574  /*
3575  * As in DropRelationBuffers, an unlocked precheck should be
3576  * safe and saves some cycles.
3577  */
3578  if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator))
3579  continue;
3580 
3582 
3583  buf_state = LockBufHdr(bufHdr);
3584  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
3585  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3586  {
3587  PinBuffer_Locked(bufHdr);
3589  FlushBuffer(bufHdr, RelationGetSmgr(rel));
3591  UnpinBuffer(bufHdr);
3592  }
3593  else
3594  UnlockBufHdr(bufHdr, buf_state);
3595  }
3596 }
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:272
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:66
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4708
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1539
int NLocBuffer
Definition: localbuf.c:41
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:569
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:635
RelFileLocator rd_locator
Definition: rel.h:56

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), smgrwrite(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 3608 of file bufmgr.c.

3609 {
3610  int i;
3611  SMgrSortArray *srels;
3612  bool use_bsearch;
3613 
3614  if (nrels == 0)
3615  return;
3616 
3617  /* fill-in array for qsort */
3618  srels = palloc(sizeof(SMgrSortArray) * nrels);
3619 
3620  for (i = 0; i < nrels; i++)
3621  {
3622  Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
3623 
3624  srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
3625  srels[i].srel = smgrs[i];
3626  }
3627 
3628  /*
3629  * Save the bsearch overhead for low number of relations to sync. See
3630  * DropRelationsAllBuffers for details.
3631  */
3632  use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
3633 
3634  /* sort the list of SMgrRelations if necessary */
3635  if (use_bsearch)
3636  pg_qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
3637 
3638  /* Make sure we can handle the pin inside the loop */
3640 
3641  for (i = 0; i < NBuffers; i++)
3642  {
3643  SMgrSortArray *srelent = NULL;
3644  BufferDesc *bufHdr = GetBufferDescriptor(i);
3645  uint32 buf_state;
3646 
3647  /*
3648  * As in DropRelationBuffers, an unlocked precheck should be
3649  * safe and saves some cycles.
3650  */
3651 
3652  if (!use_bsearch)
3653  {
3654  int j;
3655 
3656  for (j = 0; j < nrels; j++)
3657  {
3658  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
3659  {
3660  srelent = &srels[j];
3661  break;
3662  }
3663  }
3664  }
3665  else
3666  {
3667  RelFileLocator rlocator;
3668 
3669  rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
3670  srelent = bsearch((const void *) &(rlocator),
3671  srels, nrels, sizeof(SMgrSortArray),
3673  }
3674 
3675  /* buffer doesn't belong to any of the given relfilelocators; skip it */
3676  if (srelent == NULL)
3677  continue;
3678 
3680 
3681  buf_state = LockBufHdr(bufHdr);
3682  if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
3683  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3684  {
3685  PinBuffer_Locked(bufHdr);
3687  FlushBuffer(bufHdr, srelent->srel);
3689  UnpinBuffer(bufHdr);
3690  }
3691  else
3692  UnlockBufHdr(bufHdr, buf_state);
3693  }
3694 
3695  pfree(srels);
3696 }
SMgrRelation srel
Definition: bufmgr.c:130
RelFileLocator rlocator
Definition: bufmgr.c:129

References Assert(), BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock(), BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, FlushBuffer(), GetBufferDescriptor(), i, j, RelFileLocatorBackend::locator, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, palloc(), pfree(), pg_qsort(), PinBuffer_Locked(), RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), SMgrSortArray::rlocator, rlocator_comparator(), SMgrRelationData::smgr_rlocator, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 412 of file bufmgr.c.

413 {
414  Assert(ref->refcount == 0);
415 
416  if (ref >= &PrivateRefCountArray[0] &&
418  {
419  ref->buffer = InvalidBuffer;
420 
421  /*
422  * Mark the just used entry as reserved - in many scenarios that
423  * allows us to avoid ever having to search the array/hash for free
424  * entries.
425  */
426  ReservedRefCountEntry = ref;
427  }
428  else
429  {
430  bool found;
431  Buffer buffer = ref->buffer;
432 
434  (void *) &buffer,
435  HASH_REMOVE,
436  &found);
437  Assert(found);
440  }
441 }
int Buffer
Definition: buf.h:23
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:203
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:953
@ HASH_REMOVE
Definition: hsearch.h:115

References Assert(), PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountEntry.

Referenced by UnpinBuffer().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 389 of file bufmgr.c.

390 {
392 
393  Assert(BufferIsValid(buffer));
394  Assert(!BufferIsLocal(buffer));
395 
396  /*
397  * Not moving the entry - that's ok for the current users, but we might
398  * want to change this one day.
399  */
400  ref = GetPrivateRefCountEntry(buffer, false);
401 
402  if (ref == NULL)
403  return 0;
404  return ref->refcount;
405 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:309

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), GetPrivateRefCountEntry(), and PrivateRefCountEntry::refcount.

Referenced by ConditionalLockBufferForCleanup(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), PrintBufferLeakWarning(), and ReadRecentBuffer().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 309 of file bufmgr.c.

310 {
312  int i;
313 
314  Assert(BufferIsValid(buffer));
315  Assert(!BufferIsLocal(buffer));
316 
317  /*
318  * First search for references in the array, that'll be sufficient in the
319  * majority of cases.
320  */
321  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
322  {
324 
325  if (res->buffer == buffer)
326  return res;
327  }
328 
329  /*
330  * By here we know that the buffer, if already pinned, isn't residing in
331  * the array.
332  *
333  * Only look up the buffer in the hashtable if we've previously overflowed
334  * into it.
335  */
336  if (PrivateRefCountOverflowed == 0)
337  return NULL;
338 
340  (void *) &buffer,
341  HASH_FIND,
342  NULL);
343 
344  if (res == NULL)
345  return NULL;
346  else if (!do_move)
347  {
348  /* caller doesn't want us to move the hash entry into the array */
349  return res;
350  }
351  else
352  {
353  /* move buffer from hashtable into the free array slot */
354  bool found;
356 
357  /* Ensure there's a free array slot */
359 
360  /* Use up the reserved slot */
361  Assert(ReservedRefCountEntry != NULL);
363  ReservedRefCountEntry = NULL;
364  Assert(free->buffer == InvalidBuffer);
365 
366  /* and fill it */
367  free->buffer = buffer;
368  free->refcount = res->refcount;
369 
370  /* delete from hashtable */
372  (void *) &buffer,
373  HASH_REMOVE,
374  &found);
375  Assert(found);
378 
379  return free;
380  }
381 }
#define free(a)
Definition: header.h:65
@ HASH_FIND
Definition: hsearch.h:113

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, res, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBuffer().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 4380 of file bufmgr.c.

4381 {
4382  int bufid = GetStartupBufferPinWaitBufId();
4383 
4384  /*
4385  * If we get woken slowly then it's possible that the Startup process was
4386  * already woken by other backends before we got here. Also possible that
4387  * we get here by multiple interrupts or interrupts at inappropriate
4388  * times, so make sure we do nothing if the bufid is not set.
4389  */
4390  if (bufid < 0)
4391  return false;
4392 
4393  if (GetPrivateRefCount(bufid + 1) > 0)
4394  return true;
4395 
4396  return false;
4397 }
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:645

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and RecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 3969 of file bufmgr.c.

3970 {
3971  Assert(BufferIsPinned(buffer));
3973  if (BufferIsLocal(buffer))
3974  LocalRefCount[-buffer - 1]++;
3975  else
3976  {
3977  PrivateRefCountEntry *ref;
3978 
3979  ref = GetPrivateRefCountEntry(buffer, true);
3980  Assert(ref != NULL);
3981  ref->refcount++;
3982  }
3984 }
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:963

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, PrivateRefCountEntry::refcount, ResourceOwnerEnlargeBuffers(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferPoolAccess()

void InitBufferPoolAccess ( void  )

Definition at line 2611 of file bufmgr.c.

2612 {
2613  HASHCTL hash_ctl;
2614 
2615  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
2616 
2617  hash_ctl.keysize = sizeof(int32);
2618  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
2619 
2620  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
2621  HASH_ELEM | HASH_BLOBS);
2622 
2623  /*
2624  * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
2625  * the corresponding phase of backend shutdown.
2626  */
2627  Assert(MyProc != NULL);
2629 }
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:2636
struct PrivateRefCountEntry PrivateRefCountEntry
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:350
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
PGPROC * MyProc
Definition: proc.c:68
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76

References Assert(), AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MyProc, on_shmem_exit(), PrivateRefCountArray, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1485 of file bufmgr.c.

1486 {
1487  BufferTag oldTag;
1488  uint32 oldHash; /* hash value for oldTag */
1489  LWLock *oldPartitionLock; /* buffer partition lock for it */
1490  uint32 oldFlags;
1491  uint32 buf_state;
1492 
1493  /* Save the original buffer tag before dropping the spinlock */
1494  oldTag = buf->tag;
1495 
1496  buf_state = pg_atomic_read_u32(&buf->state);
1497  Assert(buf_state & BM_LOCKED);
1498  UnlockBufHdr(buf, buf_state);
1499 
1500  /*
1501  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1502  * worth storing the hashcode in BufferDesc so we need not recompute it
1503  * here? Probably not.
1504  */
1505  oldHash = BufTableHashCode(&oldTag);
1506  oldPartitionLock = BufMappingPartitionLock(oldHash);
1507 
1508 retry:
1509 
1510  /*
1511  * Acquire exclusive mapping lock in preparation for changing the buffer's
1512  * association.
1513  */
1514  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1515 
1516  /* Re-lock the buffer header */
1517  buf_state = LockBufHdr(buf);
1518 
1519  /* If it's changed while we were waiting for lock, do nothing */
1520  if (!BufferTagsEqual(&buf->tag, &oldTag))
1521  {
1522  UnlockBufHdr(buf, buf_state);
1523  LWLockRelease(oldPartitionLock);
1524  return;
1525  }
1526 
1527  /*
1528  * We assume the only reason for it to be pinned is that someone else is
1529  * flushing the page out. Wait for them to finish. (This could be an
1530  * infinite loop if the refcount is messed up... it would be nice to time
1531  * out after awhile, but there seems no way to be sure how many loops may
1532  * be needed. Note that if the other guy has pinned the buffer but not
1533  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1534  * be busy-looping here.)
1535  */
1536  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1537  {
1538  UnlockBufHdr(buf, buf_state);
1539  LWLockRelease(oldPartitionLock);
1540  /* safety check: should definitely not be our *own* pin */
1542  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1543  WaitIO(buf);
1544  goto retry;
1545  }
1546 
1547  /*
1548  * Clear out the buffer's tag and flags. We must do this to ensure that
1549  * linear scans of the buffer array don't think the buffer is valid.
1550  */
1551  oldFlags = buf_state & BUF_FLAG_MASK;
1552  ClearBufferTag(&buf->tag);
1553  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1554  UnlockBufHdr(buf, buf_state);
1555 
1556  /*
1557  * Remove the buffer from the lookup hashtable, if it was in there.
1558  */
1559  if (oldFlags & BM_TAG_VALID)
1560  BufTableDelete(&oldTag, oldHash);
1561 
1562  /*
1563  * Done with mapping lock.
1564  */
1565  LWLockRelease(oldPartitionLock);
1566 
1567  /*
1568  * Insert the buffer at the head of the list of free buffers.
1569  */
1571 }
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
#define BM_LOCKED
Definition: buf_internals.h:58
static void ClearBufferTag(BufferTag *tag)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:4516
#define ERROR
Definition: elog.h:35
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:363

References Assert(), BM_LOCKED, BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog(), ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), StrategyFreeBuffer(), UnlockBufHdr(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 4462 of file bufmgr.c.

4463 {
4464  BufferDesc *bufHdr;
4465  uint32 buf_state;
4466 
4467  Assert(BufferIsValid(buffer));
4468 
4469  if (BufferIsLocal(buffer))
4470  {
4471  /* There should be exactly one pin */
4472  if (LocalRefCount[-buffer - 1] != 1)
4473  return false;
4474  /* Nobody else to wait for */
4475  return true;
4476  }
4477 
4478  /* There should be exactly one local pin */
4479  if (GetPrivateRefCount(buffer) != 1)
4480  return false;
4481 
4482  bufHdr = GetBufferDescriptor(buffer - 1);
4483 
4484  /* caller must hold exclusive lock on buffer */
4486  LW_EXCLUSIVE));
4487 
4488  buf_state = LockBufHdr(bufHdr);
4489 
4490  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4491  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4492  {
4493  /* pincount is OK. */
4494  UnlockBufHdr(bufHdr, buf_state);
4495  return true;
4496  }
4497 
4498  UnlockBufHdr(bufHdr, buf_state);
4499  return false;
4500 }
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1962

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsValid(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext context)

Definition at line 4948 of file bufmgr.c.

4949 {
4950  int i;
4951 
4952  if (context->nr_pending == 0)
4953  return;
4954 
4955  /*
4956  * Executing the writes in-order can make them a lot faster, and allows to
4957  * merge writeback requests to consecutive blocks into larger writebacks.
4958  */
4959  sort_pending_writebacks(context->pending_writebacks, context->nr_pending);
4960 
4961  /*
4962  * Coalesce neighbouring writes, but nothing else. For that we iterate
4963  * through the, now sorted, array of pending flushes, and look forward to
4964  * find all neighbouring (or identical) writes.
4965  */
4966  for (i = 0; i < context->nr_pending; i++)
4967  {
4970  SMgrRelation reln;
4971  int ahead;
4972  BufferTag tag;
4973  RelFileLocator currlocator;
4974  Size nblocks = 1;
4975 
4976  cur = &context->pending_writebacks[i];
4977  tag = cur->tag;
4978  currlocator = BufTagGetRelFileLocator(&tag);
4979 
4980  /*
4981  * Peek ahead, into following writeback requests, to see if they can
4982  * be combined with the current one.
4983  */
4984  for (ahead = 0; i + ahead + 1 < context->nr_pending; ahead++)
4985  {
4986 
4987  next = &context->pending_writebacks[i + ahead + 1];
4988 
4989  /* different file, stop */
4990  if (!RelFileLocatorEquals(currlocator,
4991  BufTagGetRelFileLocator(&next->tag)) ||
4992  BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
4993  break;
4994 
4995  /* ok, block queued twice, skip */
4996  if (cur->tag.blockNum == next->tag.blockNum)
4997  continue;
4998 
4999  /* only merge consecutive writes */
5000  if (cur->tag.blockNum + 1 != next->tag.blockNum)
5001  break;
5002 
5003  nblocks++;
5004  cur = next;
5005  }
5006 
5007  i += ahead;
5008 
5009  /* and finally tell the kernel to write the data to storage */
5010  reln = smgropen(currlocator, InvalidBackendId);
5011  smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
5012  }
5013 
5014  context->nr_pending = 0;
5015 }
static int32 next
Definition: blutils.c:219
struct cursor * cur
Definition: ecpg.c:28
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:567
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, i, InvalidBackendId, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, RelFileLocatorEquals, smgropen(), and smgrwriteback().

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 4708 of file bufmgr.c.

4709 {
4710  BufferDesc *bufHdr = (BufferDesc *) arg;
4711 
4712  if (bufHdr != NULL)
4713  {
4714  char *path = relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
4715  MyBackendId,
4716  BufTagGetForkNum(&bufHdr->tag));
4717 
4718  errcontext("writing block %u of relation %s",
4719  bufHdr->tag.blockNum, path);
4720  pfree(path);
4721  }
4722 }
#define errcontext
Definition: elog.h:192
void * arg
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:85

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, MyBackendId, pfree(), relpathbackend, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 4172 of file bufmgr.c.

4173 {
4174  BufferDesc *buf;
4175 
4176  Assert(BufferIsPinned(buffer));
4177  if (BufferIsLocal(buffer))
4178  return; /* local buffers need no lock */
4179 
4180  buf = GetBufferDescriptor(buffer - 1);
4181 
4182  if (mode == BUFFER_LOCK_UNLOCK)
4184  else if (mode == BUFFER_LOCK_SHARE)
4186  else if (mode == BUFFER_LOCK_EXCLUSIVE)
4188  else
4189  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
4190 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:106
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:107
static PgChecksumMode mode
Definition: pg_checksums.c:65

References Assert(), buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, elog(), ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), and mode.

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_getnewbuf(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), blbulkdelete(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), brinbuild(), brinbuildempty(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), entryLoadMoreItems(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), ginbuildempty(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgetpage(), heapgettup(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), ScanSourceDatabasePgClass(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 4229 of file bufmgr.c.

4230 {
4231  BufferDesc *bufHdr;
4232  char *new_status = NULL;
4233  TimestampTz waitStart = 0;
4234  bool logged_recovery_conflict = false;
4235 
4236  Assert(BufferIsPinned(buffer));
4237  Assert(PinCountWaitBuf == NULL);
4238 
4239  if (BufferIsLocal(buffer))
4240  {
4241  /* There should be exactly one pin */
4242  if (LocalRefCount[-buffer - 1] != 1)
4243  elog(ERROR, "incorrect local pin count: %d",
4244  LocalRefCount[-buffer - 1]);
4245  /* Nobody else to wait for */
4246  return;
4247  }
4248 
4249  /* There should be exactly one local pin */
4250  if (GetPrivateRefCount(buffer) != 1)
4251  elog(ERROR, "incorrect local pin count: %d",
4252  GetPrivateRefCount(buffer));
4253 
4254  bufHdr = GetBufferDescriptor(buffer - 1);
4255 
4256  for (;;)
4257  {
4258  uint32 buf_state;
4259 
4260  /* Try to acquire lock */
4262  buf_state = LockBufHdr(bufHdr);
4263 
4264  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4265  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
4266  {
4267  /* Successfully acquired exclusive lock with pincount 1 */
4268  UnlockBufHdr(bufHdr, buf_state);
4269 
4270  /*
4271  * Emit the log message if recovery conflict on buffer pin was
4272  * resolved but the startup process waited longer than
4273  * deadlock_timeout for it.
4274  */
4275  if (logged_recovery_conflict)
4277  waitStart, GetCurrentTimestamp(),
4278  NULL, false);
4279 
4280  /* Report change to non-waiting status */
4281  if (new_status)
4282  {
4283  set_ps_display(new_status);
4284  pfree(new_status);
4285  }
4286  return;
4287  }
4288  /* Failed, so mark myself as waiting for pincount 1 */
4289  if (buf_state & BM_PIN_COUNT_WAITER)
4290  {
4291  UnlockBufHdr(bufHdr, buf_state);
4292  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4293  elog(ERROR, "multiple backends attempting to wait for pincount 1");
4294  }
4296  PinCountWaitBuf = bufHdr;
4297  buf_state |= BM_PIN_COUNT_WAITER;
4298  UnlockBufHdr(bufHdr, buf_state);
4299  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4300 
4301  /* Wait to be signaled by UnpinBuffer() */
4302  if (InHotStandby)
4303  {
4304  /* Report change to waiting status */
4305  if (update_process_title && new_status == NULL)
4306  {
4307  const char *old_status;
4308  int len;
4309 
4310  old_status = get_ps_display(&len);
4311  new_status = (char *) palloc(len + 8 + 1);
4312  memcpy(new_status, old_status, len);
4313  strcpy(new_status + len, " waiting");
4314  set_ps_display(new_status);
4315  new_status[len] = '\0'; /* truncate off " waiting" */
4316  }
4317 
4318  /*
4319  * Emit the log message if the startup process is waiting longer
4320  * than deadlock_timeout for recovery conflict on buffer pin.
4321  *
4322  * Skip this if first time through because the startup process has
4323  * not started waiting yet in this case. So, the wait start
4324  * timestamp is set after this logic.
4325  */
4326  if (waitStart != 0 && !logged_recovery_conflict)
4327  {
4329 
4330  if (TimestampDifferenceExceeds(waitStart, now,
4331  DeadlockTimeout))
4332  {
4334  waitStart, now, NULL, true);
4335  logged_recovery_conflict = true;
4336  }
4337  }
4338 
4339  /*
4340  * Set the wait start timestamp if logging is enabled and first
4341  * time through.
4342  */
4343  if (log_recovery_conflict_waits && waitStart == 0)
4344  waitStart = GetCurrentTimestamp();
4345 
4346  /* Publish the bufid that Startup process waits on */
4347  SetStartupBufferPinWaitBufId(buffer - 1);
4348  /* Set alarm and then wait to be signaled by UnpinBuffer() */
4350  /* Reset the published bufid */
4352  }
4353  else
4355 
4356  /*
4357  * Remove flag marking us as waiter. Normally this will not be set
4358  * anymore, but ProcWaitForSignal() can return for other signals as
4359  * well. We take care to only reset the flag if we're the waiter, as
4360  * theoretically another backend could have started waiting. That's
4361  * impossible with the current usages due to table level locking, but
4362  * better be safe.
4363  */
4364  buf_state = LockBufHdr(bufHdr);
4365  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
4367  buf_state &= ~BM_PIN_COUNT_WAITER;
4368  UnlockBufHdr(bufHdr, buf_state);
4369 
4370  PinCountWaitBuf = NULL;
4371  /* Loop back and try again */
4372  }
4373 }
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1719
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1573
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1537
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:65
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:167
int64 TimestampTz
Definition: timestamp.h:39
const void size_t len
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:44
const char * get_ps_display(int *displen)
Definition: ps_status.c:414
bool update_process_title
Definition: ps_status.c:35
void set_ps_display(const char *activity)
Definition: ps_status.c:342
int DeadlockTimeout
Definition: proc.c:60
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:633
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1878
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:787
bool log_recovery_conflict_waits
Definition: standby.c:43
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:274
int wait_backend_pgprocno
int pgprocno
Definition: proc.h:191
#define PG_WAIT_BUFFER_PIN
Definition: wait_event.h:20
#define InHotStandby
Definition: xlogutils.h:57

References Assert(), BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, DeadlockTimeout, elog(), ERROR, get_ps_display(), GetBufferDescriptor(), GetCurrentTimestamp(), GetPrivateRefCount(), InHotStandby, len, LocalRefCount, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProc, now(), palloc(), pfree(), PG_WAIT_BUFFER_PIN, PGPROC::pgprocno, PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), update_process_title, and BufferDesc::wait_backend_pgprocno.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), ReadBuffer_common(), and XLogReadBufferForRedoExtended().

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 4755 of file bufmgr.c.

4756 {
4757  SpinDelayStatus delayStatus;
4758  uint32 old_buf_state;
4759 
4760  init_local_spin_delay(&delayStatus);
4761 
4762  while (true)
4763  {
4764  /* set BM_LOCKED flag */
4765  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
4766  /* if it wasn't set before we're OK */
4767  if (!(old_buf_state & BM_LOCKED))
4768  break;
4769  perform_spin_delay(&delayStatus);
4770  }
4771  finish_spin_delay(&delayStatus);
4772  return old_buf_state | BM_LOCKED;
4773 }
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:367
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:126
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:186
#define init_local_spin_delay(status)
Definition: s_lock.h:863

References BM_LOCKED, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetBufferFromRing(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadBuffer_common(), ReadRecentBuffer(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBuffer(), and WaitIO().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 1583 of file bufmgr.c.

1584 {
1585  BufferDesc *bufHdr;
1586  uint32 buf_state;
1587  uint32 old_buf_state;
1588 
1589  if (!BufferIsValid(buffer))
1590  elog(ERROR, "bad buffer ID: %d", buffer);
1591 
1592  if (BufferIsLocal(buffer))
1593  {
1594  MarkLocalBufferDirty(buffer);
1595  return;
1596  }
1597 
1598  bufHdr = GetBufferDescriptor(buffer - 1);
1599 
1600  Assert(BufferIsPinned(buffer));
1602  LW_EXCLUSIVE));
1603 
1604  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
1605  for (;;)
1606  {
1607  if (old_buf_state & BM_LOCKED)
1608  old_buf_state = WaitBufHdrUnlocked(bufHdr);
1609 
1610  buf_state = old_buf_state;
1611 
1612  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1613  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
1614 
1615  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
1616  buf_state))
1617  break;
1618  }
1619 
1620  /*
1621  * If the buffer was not dirty already, do vacuum accounting.
1622  */
1623  if (!(old_buf_state & BM_DIRTY))
1624  {
1625  VacuumPageDirty++;
1627  if (VacuumCostActive)
1629  }
1630 }
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:306
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4783
bool VacuumCostActive
Definition: globals.c:153
int64 VacuumPageDirty
Definition: globals.c:150
int VacuumCostBalance
Definition: globals.c:152
int VacuumCostPageDirty
Definition: globals.c:144
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:286
int64 shared_blks_dirtied
Definition: instrument.h:28

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, BufferIsValid(), elog(), ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newroot(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_freeze_execute_prepared(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune(), heap_xlog_update(), heap_xlog_vacuum(), heap_xlog_visible(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_page(), log_newpage_range(), moveLeafs(), nextval_internal(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 4001 of file bufmgr.c.

4002 {
4003  BufferDesc *bufHdr;
4004  Page page = BufferGetPage(buffer);
4005 
4006  if (!BufferIsValid(buffer))
4007  elog(ERROR, "bad buffer ID: %d", buffer);
4008 
4009  if (BufferIsLocal(buffer))
4010  {
4011  MarkLocalBufferDirty(buffer);
4012  return;
4013  }
4014 
4015  bufHdr = GetBufferDescriptor(buffer - 1);
4016 
4017  Assert(GetPrivateRefCount(buffer) > 0);
4018  /* here, either share or exclusive lock is OK */
4020 
4021  /*
4022  * This routine might get called many times on the same page, if we are
4023  * making the first scan after commit of an xact that added/deleted many
4024  * tuples. So, be as quick as we can if the buffer is already dirty. We
4025  * do this by not acquiring spinlock if it looks like the status bits are
4026  * already set. Since we make this test unlocked, there's a chance we
4027  * might fail to notice that the flags have just been cleared, and failed
4028  * to reset them, due to memory-ordering issues. But since this function
4029  * is only intended to be used in cases where failing to write out the
4030  * data would be harmless anyway, it doesn't really matter.
4031  */
4032  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
4034  {
4036  bool dirtied = false;
4037  bool delayChkptFlags = false;
4038  uint32 buf_state;
4039 
4040  /*
4041  * If we need to protect hint bit updates from torn writes, WAL-log a
4042  * full page image of the page. This full page image is only necessary
4043  * if the hint bit update is the first change to the page since the
4044  * last checkpoint.
4045  *
4046  * We don't check full_page_writes here because that logic is included
4047  * when we call XLogInsert() since the value changes dynamically.
4048  */
4049  if (XLogHintBitIsNeeded() &&
4050  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
4051  {
4052  /*
4053  * If we must not write WAL, due to a relfilelocator-specific
4054  * condition or being in recovery, don't dirty the page. We can
4055  * set the hint, just not dirty the page as a result so the hint
4056  * is lost when we evict the page or shutdown.
4057  *
4058  * See src/backend/storage/page/README for longer discussion.
4059  */
4060  if (RecoveryInProgress() ||
4062  return;
4063 
4064  /*
4065  * If the block is already dirty because we either made a change
4066  * or set a hint already, then we don't need to write a full page
4067  * image. Note that aggressive cleaning of blocks dirtied by hint
4068  * bit setting would increase the call rate. Bulk setting of hint
4069  * bits would reduce the call rate...
4070  *
4071  * We must issue the WAL record before we mark the buffer dirty.
4072  * Otherwise we might write the page before we write the WAL. That
4073  * causes a race condition, since a checkpoint might occur between
4074  * writing the WAL record and marking the buffer dirty. We solve
4075  * that with a kluge, but one that is already in use during
4076  * transaction commit to prevent race conditions. Basically, we
4077  * simply prevent the checkpoint WAL record from being written
4078  * until we have marked the buffer dirty. We don't start the
4079  * checkpoint flush until we have marked dirty, so our checkpoint
4080  * must flush the change to disk successfully or the checkpoint
4081  * never gets written, so crash recovery will fix.
4082  *
4083  * It's possible we may enter here without an xid, so it is
4084  * essential that CreateCheckPoint waits for virtual transactions
4085  * rather than full transactionids.
4086  */
4089  delayChkptFlags = true;
4090  lsn = XLogSaveBufferForHint(buffer, buffer_std);
4091  }
4092 
4093  buf_state = LockBufHdr(bufHdr);
4094 
4095  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
4096 
4097  if (!(buf_state & BM_DIRTY))
4098  {
4099  dirtied = true; /* Means "will be dirtied by this action" */
4100 
4101  /*
4102  * Set the page LSN if we wrote a backup block. We aren't supposed
4103  * to set this when only holding a share lock but as long as we
4104  * serialise it somehow we're OK. We choose to set LSN while
4105  * holding the buffer header lock, which causes any reader of an
4106  * LSN who holds only a share lock to also obtain a buffer header
4107  * lock before using PageGetLSN(), which is enforced in
4108  * BufferGetLSNAtomic().
4109  *
4110  * If checksums are enabled, you might think we should reset the
4111  * checksum here. That will happen when the page is written
4112  * sometime later in this checkpoint cycle.
4113  */
4114  if (!XLogRecPtrIsInvalid(lsn))
4115  PageSetLSN(page, lsn);
4116  }
4117 
4118  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
4119  UnlockBufHdr(bufHdr, buf_state);
4120 
4121  if (delayChkptFlags)
4123 
4124  if (dirtied)
4125  {
4126  VacuumPageDirty++;
4128  if (VacuumCostActive)
4130  }
4131  }
4132 }
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define DELAY_CHKPT_START
Definition: proc.h:119
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition: storage.c:550
int delayChkptFlags
Definition: proc.h:231
bool RecoveryInProgress(void)
Definition: xlog.c:5912
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:1019

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferGetPage(), BufferIsLocal, BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog(), ERROR, GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 283 of file bufmgr.c.

284 {
286 
287  /* only allowed to be called when a reservation has been made */
288  Assert(ReservedRefCountEntry != NULL);
289 
290  /* use up the reserved entry */
292  ReservedRefCountEntry = NULL;
293 
294  /* and fill it */
295  res->buffer = buffer;
296  res->refcount = 0;
297 
298  return res;
299 }

References Assert(), PrivateRefCountEntry::buffer, res, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 1704 of file bufmgr.c.

1705 {
1707  bool result;
1708  PrivateRefCountEntry *ref;
1709 
1710  ref = GetPrivateRefCountEntry(b, true);
1711 
1712  if (ref == NULL)
1713  {
1714  uint32 buf_state;
1715  uint32 old_buf_state;
1716 
1718  ref = NewPrivateRefCountEntry(b);
1719 
1720  old_buf_state = pg_atomic_read_u32(&buf->state);
1721  for (;;)
1722  {
1723  if (old_buf_state & BM_LOCKED)
1724  old_buf_state = WaitBufHdrUnlocked(buf);
1725 
1726  buf_state = old_buf_state;
1727 
1728  /* increase refcount */
1729  buf_state += BUF_REFCOUNT_ONE;
1730 
1731  if (strategy == NULL)
1732  {
1733  /* Default case: increase usagecount unless already max. */
1735  buf_state += BUF_USAGECOUNT_ONE;
1736  }
1737  else
1738  {
1739  /*
1740  * Ring buffers shouldn't evict others from pool. Thus we
1741  * don't make usagecount more than 1.
1742  */
1743  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
1744  buf_state += BUF_USAGECOUNT_ONE;
1745  }
1746 
1747  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
1748  buf_state))
1749  {
1750  result = (buf_state & BM_VALID) != 0;
1751 
1752  /*
1753  * Assume that we acquired a buffer pin for the purposes of
1754  * Valgrind buffer client checks (even in !result case) to
1755  * keep things simple. Buffers that are unsafe to access are
1756  * not generally guaranteed to be marked undefined or
1757  * non-accessible in any case.
1758  */
1760  break;
1761  }
1762  }
1763  }
1764  else
1765  {
1766  /*
1767  * If we previously pinned the buffer, it must surely be valid.
1768  *
1769  * Note: We deliberately avoid a Valgrind client request here.
1770  * Individual access methods can optionally superimpose buffer page
1771  * client requests on top of our client requests to enforce that
1772  * buffers are only accessed while locked (and pinned). It's possible
1773  * that the buffer page is legitimately non-accessible here. We
1774  * cannot meddle with that.
1775  */
1776  result = true;
1777  }
1778 
1779  ref->refcount++;
1780  Assert(ref->refcount > 0);
1782  return result;
1783 }
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:76
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:50
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:283
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26

References Assert(), b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservePrivateRefCountEntry(), ResourceOwnerRememberBuffer(), VALGRIND_MAKE_MEM_DEFINED, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 1807 of file bufmgr.c.

1808 {
1809  Buffer b;
1810  PrivateRefCountEntry *ref;
1811  uint32 buf_state;
1812 
1813  /*
1814  * As explained, We don't expect any preexisting pins. That allows us to
1815  * manipulate the PrivateRefCount after releasing the spinlock
1816  */
1818 
1819  /*
1820  * Buffer can't have a preexisting pin, so mark its page as defined to
1821  * Valgrind (this is similar to the PinBuffer() case where the backend
1822  * doesn't already have a buffer pin)
1823  */
1825 
1826  /*
1827  * Since we hold the buffer spinlock, we can update the buffer state and
1828  * release the lock in one operation.
1829  */
1830  buf_state = pg_atomic_read_u32(&buf->state);
1831  Assert(buf_state & BM_LOCKED);
1832  buf_state += BUF_REFCOUNT_ONE;
1833  UnlockBufHdr(buf, buf_state);
1834 
1836 
1837  ref = NewPrivateRefCountEntry(b);
1838  ref->refcount++;
1839 
1841 }

References Assert(), b, BM_LOCKED, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufHdrGetBlock, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), UnlockBufHdr(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), ReadRecentBuffer(), and SyncOneBuffer().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 592 of file bufmgr.c.

593 {
594  Assert(RelationIsValid(reln));
595  Assert(BlockNumberIsValid(blockNum));
596 
597  if (RelationUsesLocalBuffers(reln))
598  {
599  /* see comments in ReadBufferExtended */
600  if (RELATION_IS_OTHER_TEMP(reln))
601  ereport(ERROR,
602  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
603  errmsg("cannot access temporary tables of other sessions")));
604 
605  /* pass it off to localbuf.c */
606  return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
607  }
608  else
609  {
610  /* pass it to the shared buffer version */
611  return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
612  }
613 }
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:505
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:64
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:656
#define RelationIsValid(relation)
Definition: rel.h:474

References Assert(), BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by acquire_sample_rows(), BitmapPrefetch(), count_nondeletable_pages(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 505 of file bufmgr.c.

508 {
509  PrefetchBufferResult result = {InvalidBuffer, false};
510  BufferTag newTag; /* identity of requested block */
511  uint32 newHash; /* hash value for newTag */
512  LWLock *newPartitionLock; /* buffer partition lock for it */
513  int buf_id;
514 
515  Assert(BlockNumberIsValid(blockNum));
516 
517  /* create a tag so we can lookup the buffer */
518  InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
519  forkNum, blockNum);
520 
521  /* determine its hash code and partition lock ID */
522  newHash = BufTableHashCode(&newTag);
523  newPartitionLock = BufMappingPartitionLock(newHash);
524 
525  /* see if the block is in the buffer pool already */
526  LWLockAcquire(newPartitionLock, LW_SHARED);
527  buf_id = BufTableLookup(&newTag, newHash);
528  LWLockRelease(newPartitionLock);
529 
530  /* If not in buffers, initiate prefetch */
531  if (buf_id < 0)
532  {
533 #ifdef USE_PREFETCH
534  /*
535  * Try to initiate an asynchronous read. This returns false in
536  * recovery if the relation file doesn't exist.
537  */
538  if (smgrprefetch(smgr_reln, forkNum, blockNum))
539  result.initiated_io = true;
540 #endif /* USE_PREFETCH */
541  }
542  else
543  {
544  /*
545  * Report the buffer it was in at that time. The caller may be able
546  * to avoid a buffer table lookup, but it's not pinned and it must be
547  * rechecked!
548  */
549  result.recent_buffer = buf_id + 1;
550  }
551 
552  /*
553  * If the block *is* in buffers, we do nothing. This is not really ideal:
554  * the block might be just about to be evicted, which would be stupid
555  * since we know we are going to need it soon. But the only easy answer
556  * is to bump the usage_count, which does not seem like a great solution:
557  * when the caller does ultimately touch the block, usage_count would get
558  * bumped again, resulting in too much favoritism for blocks that are
559  * involved in a prefetch sequence. A real fix would involve some
560  * additional per-buffer state, and it's not clear that there's enough of
561  * a problem to justify that.
562  */
563 
564  return result;
565 }
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:518
Buffer recent_buffer
Definition: bufmgr.h:54

References Assert(), BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, RelFileLocatorBackend::locator, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rlocator, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ PrintBufferLeakWarning()

void PrintBufferLeakWarning ( Buffer  buffer)

Definition at line 2695 of file bufmgr.c.

2696 {
2697  BufferDesc *buf;
2698  int32 loccount;
2699  char *path;
2700  BackendId backend;
2701  uint32 buf_state;
2702 
2703  Assert(BufferIsValid(buffer));
2704  if (BufferIsLocal(buffer))
2705  {
2706  buf = GetLocalBufferDescriptor(-buffer - 1);
2707  loccount = LocalRefCount[-buffer - 1];
2708  backend = MyBackendId;
2709  }
2710  else
2711  {
2712  buf = GetBufferDescriptor(buffer - 1);
2713  loccount = GetPrivateRefCount(buffer);
2714  backend = InvalidBackendId;
2715  }
2716 
2717  /* theoretically we should lock the bufhdr here */
2718  path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend,
2719  BufTagGetForkNum(&buf->tag));
2720  buf_state = pg_atomic_read_u32(&buf->state);
2721  elog(WARNING,
2722  "buffer refcount leak: [%03d] "
2723  "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
2724  buffer, path,
2725  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
2726  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
2727  pfree(path);
2728 }
int BackendId
Definition: backendid.h:21

References Assert(), buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), elog(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), InvalidBackendId, LocalRefCount, MyBackendId, pfree(), pg_atomic_read_u32(), relpathbackend, and WARNING.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResourceOwnerReleaseInternal().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 712 of file bufmgr.c.

713 {
714  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
715 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:759
@ RBM_NORMAL
Definition: bufmgr.h:39

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinbuild(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static Buffer ReadBuffer_common ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool hit 
)
static

Definition at line 819 of file bufmgr.c.

822 {
823  BufferDesc *bufHdr;
824  Block bufBlock;
825  bool found;
826  bool isExtend;
827  bool isLocalBuf = SmgrIsTemp(smgr);
828 
829  *hit = false;
830 
831  /* Make sure we will have room to remember the buffer pin */
833 
834  isExtend = (blockNum == P_NEW);
835 
836  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
840  smgr->smgr_rlocator.backend,
841  isExtend);
842 
843  /* Substitute proper block number if caller asked for P_NEW */
844  if (isExtend)
845  {
846  blockNum = smgrnblocks(smgr, forkNum);
847  /* Fail if relation is already at maximum possible length */
848  if (blockNum == P_NEW)
849  ereport(ERROR,
850  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
851  errmsg("cannot extend relation %s beyond %u blocks",
852  relpath(smgr->smgr_rlocator, forkNum),
853  P_NEW)));
854  }
855 
856  if (isLocalBuf)
857  {
858  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
859  if (found)
861  else if (isExtend)
863  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
866  }
867  else
868  {
869  /*
870  * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
871  * not currently in memory.
872  */
873  bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
874  strategy, &found);
875  if (found)
877  else if (isExtend)
879  else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
882  }
883 
884  /* At this point we do NOT hold any locks. */
885 
886  /* if it was already in the buffer pool, we're done */
887  if (found)
888  {
889  if (!isExtend)
890  {
891  /* Just need to update stats before we exit */
892  *hit = true;
893  VacuumPageHit++;
894 
895  if (VacuumCostActive)
897 
898  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
902  smgr->smgr_rlocator.backend,
903  isExtend,
904  found);
905 
906  /*
907  * In RBM_ZERO_AND_LOCK mode the caller expects the page to be
908  * locked on return.
909  */
910  if (!isLocalBuf)
911  {
912  if (mode == RBM_ZERO_AND_LOCK)
914  LW_EXCLUSIVE);
915  else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
917  }
918 
919  return BufferDescriptorGetBuffer(bufHdr);
920  }
921 
922  /*
923  * We get here only in the corner case where we are trying to extend
924  * the relation but we found a pre-existing buffer marked BM_VALID.
925  * This can happen because mdread doesn't complain about reads beyond
926  * EOF (when zero_damaged_pages is ON) and so a previous attempt to
927  * read a block beyond EOF could have left a "valid" zero-filled
928  * buffer. Unfortunately, we have also seen this case occurring
929  * because of buggy Linux kernels that sometimes return an
930  * lseek(SEEK_END) result that doesn't account for a recent write. In
931  * that situation, the pre-existing buffer would contain valid data
932  * that we don't want to overwrite. Since the legitimate case should
933  * always have left a zero-filled buffer, complain if not PageIsNew.
934  */
935  bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
936  if (!PageIsNew((Page) bufBlock))
937  ereport(ERROR,
938  (errmsg("unexpected data beyond EOF in block %u of relation %s",
939  blockNum, relpath(smgr->smgr_rlocator, forkNum)),
940  errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
941 
942  /*
943  * We *must* do smgrextend before succeeding, else the page will not
944  * be reserved by the kernel, and the next P_NEW call will decide to
945  * return the same page. Clear the BM_VALID bit, do the StartBufferIO
946  * call that BufferAlloc didn't, and proceed.
947  */
948  if (isLocalBuf)
949  {
950  /* Only need to adjust flags */
951  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
952 
953  Assert(buf_state & BM_VALID);
954  buf_state &= ~BM_VALID;
955  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
956  }
957  else
958  {
959  /*
960  * Loop to handle the very small possibility that someone re-sets
961  * BM_VALID between our clearing it and StartBufferIO inspecting
962  * it.
963  */
964  do
965  {
966  uint32 buf_state = LockBufHdr(bufHdr);
967 
968  Assert(buf_state & BM_VALID);
969  buf_state &= ~BM_VALID;
970  UnlockBufHdr(bufHdr, buf_state);
971  } while (!StartBufferIO(bufHdr, true));
972  }
973  }
974 
975  /*
976  * if we have gotten to this point, we have allocated a buffer for the
977  * page but its contents are not yet valid. IO_IN_PROGRESS is set for it,
978  * if it's a shared buffer.
979  *
980  * Note: if smgrextend fails, we will end up with a buffer that is
981  * allocated but not marked BM_VALID. P_NEW will still select the same
982  * block number (because the relation didn't get any longer on disk) and
983  * so future attempts to extend the relation will find the same buffer (if
984  * it's not been recycled) but come right back here to try smgrextend
985  * again.
986  */
987  Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
988 
989  bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
990 
991  if (isExtend)
992  {
993  /* new buffers are zero-filled */
994  MemSet((char *) bufBlock, 0, BLCKSZ);
995  /* don't set checksum for all-zero page */
996  smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false);
997 
998  /*
999  * NB: we're *not* doing a ScheduleBufferTagForWriteback here;
1000  * although we're essentially performing a write. At least on linux
1001  * doing so defeats the 'delayed allocation' mechanism, leading to
1002  * increased file fragmentation.
1003  */
1004  }
1005  else
1006  {
1007  /*
1008  * Read in the page, unless the caller intends to overwrite it and
1009  * just wants us to allocate a buffer.
1010  */
1012  MemSet((char *) bufBlock, 0, BLCKSZ);
1013  else
1014  {
1015  instr_time io_start,
1016  io_time;
1017 
1018  if (track_io_timing)
1019  INSTR_TIME_SET_CURRENT(io_start);
1020 
1021  smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
1022 
1023  if (track_io_timing)
1024  {
1025  INSTR_TIME_SET_CURRENT(io_time);
1026  INSTR_TIME_SUBTRACT(io_time, io_start);
1029  }
1030 
1031  /* check for garbage data */
1032  if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
1034  {
1036  {
1037  ereport(WARNING,
1039  errmsg("invalid page in block %u of relation %s; zeroing out page",
1040  blockNum,
1041  relpath(smgr->smgr_rlocator, forkNum))));
1042  MemSet((char *) bufBlock, 0, BLCKSZ);
1043  }
1044  else
1045  ereport(ERROR,
1047  errmsg("invalid page in block %u of relation %s",
1048  blockNum,
1049  relpath(smgr->smgr_rlocator, forkNum))));
1050  }
1051  }
1052  }
1053 
1054  /*
1055  * In RBM_ZERO_AND_LOCK mode, grab the buffer content lock before marking
1056  * the page as valid, to make sure that no other backend sees the zeroed
1057  * page before the caller has had a chance to initialize it.
1058  *
1059  * Since no-one else can be looking at the page contents yet, there is no
1060  * difference between an exclusive lock and a cleanup-strength lock. (Note
1061  * that we cannot use LockBuffer() or LockBufferForCleanup() here, because
1062  * they assert that the buffer is already valid.)
1063  */
1065  !isLocalBuf)
1066  {
1068  }
1069 
1070  if (isLocalBuf)
1071  {
1072  /* Only need to adjust flags */
1073  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
1074 
1075  buf_state |= BM_VALID;
1076  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
1077  }
1078  else
1079  {
1080  /* Set BM_VALID, terminate IO, and wake up any waiters */
1081  TerminateBufferIO(bufHdr, false, BM_VALID);
1082  }
1083 
1084  VacuumPageMiss++;
1085  if (VacuumCostActive)
1087 
1088  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1090  smgr->smgr_rlocator.locator.dbOid,
1092  smgr->smgr_rlocator.backend,
1093  isExtend,
1094  found);
1095 
1096  return BufferDescriptorGetBuffer(bufHdr);
1097 }
bool zero_damaged_pages
Definition: bufmgr.c:134
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition: bufmgr.c:1119
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4229
#define P_NEW
Definition: bufmgr.h:100
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:44
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:42
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:40
@ RBM_NORMAL_NO_LOG
Definition: bufmgr.h:45
bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
Definition: bufpage.c:88
#define PIV_LOG_WARNING
Definition: bufpage.h:465
static bool PageIsNew(Page page)
Definition: bufpage.h:230
#define PIV_REPORT_STAT
Definition: bufpage.h:466
#define MemSet(start, val, len)
Definition: c.h:953
int errhint(const char *fmt,...)
Definition: elog.c:1153
int64 VacuumPageHit
Definition: globals.c:148
int VacuumCostPageMiss
Definition: globals.c:143
int64 VacuumPageMiss
Definition: globals.c:149
int VacuumCostPageHit
Definition: globals.c:142
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition: localbuf.c:109
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
#define pgstat_count_buffer_read_time(n)
Definition: pgstat.h:468
#define relpath(rlocator, forknum)
Definition: relpath.h:94
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:579
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:493
void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
Definition: smgr.c:532
#define SmgrIsTemp(smgr)
Definition: smgr.h:77
int64 local_blks_hit
Definition: instrument.h:30
int64 local_blks_written
Definition: instrument.h:33
int64 shared_blks_read
Definition: instrument.h:27
instr_time blk_read_time
Definition: instrument.h:36
int64 local_blks_read
Definition: instrument.h:31
int64 shared_blks_hit
Definition: instrument.h:26

References Assert(), RelFileLocatorBackend::backend, BufferUsage::blk_read_time, BM_VALID, BufferAlloc(), BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufHdrGetBlock, CurrentResourceOwner, RelFileLocator::dbOid, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), ERROR, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, BufferUsage::local_blks_written, LocalBufferAlloc(), LocalBufHdrGetBlock, RelFileLocatorBackend::locator, LockBufferForCleanup(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), MemSet, mode, P_NEW, PageIsNew(), PageIsVerifiedExtended(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_buffer_read_time, PIV_LOG_WARNING, PIV_REPORT_STAT, RBM_NORMAL, RBM_NORMAL_NO_LOG, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelFileLocator::relNumber, relpath, ResourceOwnerEnlargeBuffers(), BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, BufferUsage::shared_blks_written, SMgrRelationData::smgr_rlocator, smgrextend(), SmgrIsTemp, smgrnblocks(), smgrread(), RelFileLocator::spcOid, StartBufferIO(), BufferDesc::state, TerminateBufferIO(), track_io_timing, UnlockBufHdr(), VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, VacuumPageHit, VacuumPageMiss, WARNING, and zero_damaged_pages.

Referenced by ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)

Definition at line 759 of file bufmgr.c.

761 {
762  bool hit;
763  Buffer buf;
764 
765  /*
766  * Reject attempts to read non-local temporary relations; we would be
767  * likely to get wrong data since we have no visibility into the owning
768  * session's local buffers.
769  */
770  if (RELATION_IS_OTHER_TEMP(reln))
771  ereport(ERROR,
772  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
773  errmsg("cannot access temporary tables of other sessions")));
774 
775  /*
776  * Read the buffer, and update pgstat counters to reflect a cache hit or
777  * miss.
778  */
780  buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence,
781  forkNum, blockNum, mode, strategy, &hit);
782  if (hit)
784  return buf;
785 }
static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
Definition: bufmgr.c:819
#define pgstat_count_buffer_read(rel)
Definition: pgstat.h:550
#define pgstat_count_buffer_hit(rel)
Definition: pgstat.h:555
Form_pg_class rd_rel
Definition: rel.h:110

References buf, ereport, errcode(), errmsg(), ERROR, mode, pgstat_count_buffer_hit, pgstat_count_buffer_read, RelationData::rd_rel, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), autoprewarm_database_main(), blbulkdelete(), blgetbitmap(), blvacuumcleanup(), brin_vacuum_scan(), brinbuildempty(), bt_recheck_sibling_links(), btvacuumpage(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), fill_seq_fork_with_data(), fsm_readbuf(), get_raw_page_internal(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbuildempty(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_analyze_next_block(), heapgetpage(), lazy_scan_heap(), lazy_vacuum_heap_rel(), log_newpage_range(), palloc_btree_page(), pg_prewarm(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), spgvacuumpage(), statapprox_heap(), verify_heapam(), and vm_readbuf().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 799 of file bufmgr.c.

802 {
803  bool hit;
804 
805  SMgrRelation smgr = smgropen(rlocator, InvalidBackendId);
806 
807  return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT :
808  RELPERSISTENCE_UNLOGGED, forkNum, blockNum,
809  mode, strategy, &hit);
810 }

References InvalidBackendId, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 623 of file bufmgr.c.

625 {
626  BufferDesc *bufHdr;
627  BufferTag tag;
628  uint32 buf_state;
629  bool have_private_ref;
630 
631  Assert(BufferIsValid(recent_buffer));
632 
635  InitBufferTag(&tag, &rlocator, forkNum, blockNum);
636 
637  if (BufferIsLocal(recent_buffer))
638  {
639  int b = -recent_buffer - 1;
640 
641  bufHdr = GetLocalBufferDescriptor(b);
642  buf_state = pg_atomic_read_u32(&bufHdr->state);
643 
644  /* Is it still valid and holding the right tag? */
645  if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
646  {
647  /*
648  * Bump buffer's ref and usage counts. This is equivalent of
649  * PinBuffer for a shared buffer.
650  */
651  if (LocalRefCount[b] == 0)
652  {
654  {
655  buf_state += BUF_USAGECOUNT_ONE;
656  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
657  }
658  }
659  LocalRefCount[b]++;
661 
663 
664  return true;
665  }
666  }
667  else
668  {
669  bufHdr = GetBufferDescriptor(recent_buffer - 1);
670  have_private_ref = GetPrivateRefCount(recent_buffer) > 0;
671 
672  /*
673  * Do we already have this buffer pinned with a private reference? If
674  * so, it must be valid and it is safe to check the tag without
675  * locking. If not, we have to lock the header first and then check.
676  */
677  if (have_private_ref)
678  buf_state = pg_atomic_read_u32(&bufHdr->state);
679  else
680  buf_state = LockBufHdr(bufHdr);
681 
682  if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
683  {
684  /*
685  * It's now safe to pin the buffer. We can't pin first and ask
686  * questions later, because it might confuse code paths like
687  * InvalidateBuffer() if we pinned a random non-matching buffer.
688  */
689  if (have_private_ref)
690  PinBuffer(bufHdr, NULL); /* bump pin count */
691  else
692  PinBuffer_Locked(bufHdr); /* pin for first time */
693 
695 
696  return true;
697  }
698 
699  /* If we locked the header above, now unlock. */
700  if (!have_private_ref)
701  UnlockBufHdr(bufHdr, buf_state);
702  }
703 
704  return false;
705 }

References Assert(), b, BM_MAX_USAGE_COUNT, BM_VALID, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), InitBufferTag(), BufferUsage::local_blks_hit, LocalRefCount, LockBufHdr(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, PinBuffer(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), ResourceOwnerRememberBuffer(), BufferUsage::shared_blks_hit, BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by XLogReadBufferExtended().

◆ RelationCopyStorageUsingBuffer()

static void RelationCopyStorageUsingBuffer ( RelFileLocator  srclocator,
RelFileLocator  dstlocator,
ForkNumber  forkNum,
bool  permanent 
)
static

Definition at line 3709 of file bufmgr.c.

3712 {
3713  Buffer srcBuf;
3714  Buffer dstBuf;
3715  Page srcPage;
3716  Page dstPage;
3717  bool use_wal;
3718  BlockNumber nblocks;
3719  BlockNumber blkno;
3721  BufferAccessStrategy bstrategy_src;
3722  BufferAccessStrategy bstrategy_dst;
3723 
3724  /*
3725  * In general, we want to write WAL whenever wal_level > 'minimal', but we
3726  * can skip it when copying any fork of an unlogged relation other than
3727  * the init fork.
3728  */
3729  use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
3730 
3731  /* Get number of blocks in the source relation. */
3732  nblocks = smgrnblocks(smgropen(srclocator, InvalidBackendId),
3733  forkNum);
3734 
3735  /* Nothing to copy; just return. */
3736  if (nblocks == 0)
3737  return;
3738 
3739  /*
3740  * Bulk extend the destination relation of the same size as the source
3741  * relation before starting to copy block by block.
3742  */
3743  memset(buf.data, 0, BLCKSZ);
3744  smgrextend(smgropen(dstlocator, InvalidBackendId), forkNum, nblocks - 1,
3745  buf.data, true);
3746 
3747  /* This is a bulk operation, so use buffer access strategies. */
3748  bstrategy_src = GetAccessStrategy(BAS_BULKREAD);
3749  bstrategy_dst = GetAccessStrategy(BAS_BULKWRITE);
3750 
3751  /* Iterate over each block of the source relation file. */
3752  for (blkno = 0; blkno < nblocks; blkno++)
3753  {
3755 
3756  /* Read block from source relation. */
3757  srcBuf = ReadBufferWithoutRelcache(srclocator, forkNum, blkno,
3758  RBM_NORMAL, bstrategy_src,
3759  permanent);
3760  LockBuffer(srcBuf, BUFFER_LOCK_SHARE);
3761  srcPage = BufferGetPage(srcBuf);
3762 
3763  /* Use P_NEW to extend the destination relation. */
3764  dstBuf = ReadBufferWithoutRelcache(dstlocator, forkNum, blkno,
3765  RBM_NORMAL, bstrategy_dst,
3766  permanent);
3768  dstPage = BufferGetPage(dstBuf);
3769 
3771 
3772  /* Copy page data from the source to the destination. */
3773  memcpy(dstPage, srcPage, BLCKSZ);
3774  MarkBufferDirty(dstBuf);
3775 
3776  /* WAL-log the copied page. */
3777  if (use_wal)
3778  log_newpage_buffer(dstBuf, true);
3779 
3780  END_CRIT_SECTION();
3781 
3782  UnlockReleaseBuffer(dstBuf);
3783  UnlockReleaseBuffer(srcBuf);
3784  }
3785 }
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3954
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1583
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
Definition: bufmgr.c:799
@ BAS_BULKREAD
Definition: bufmgr.h:30
@ BAS_BULKWRITE
Definition: bufmgr.h:32
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
#define XLogIsNeeded()
Definition: xlog.h:104
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1191

References BAS_BULKREAD, BAS_BULKWRITE, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BufferGetPage(), CHECK_FOR_INTERRUPTS, END_CRIT_SECTION, GetAccessStrategy(), INIT_FORKNUM, InvalidBackendId, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), RBM_NORMAL, ReadBufferWithoutRelcache(), smgrextend(), smgrnblocks(), smgropen(), START_CRIT_SECTION, UnlockReleaseBuffer(), and XLogIsNeeded.

Referenced by CreateAndCopyRelationData().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 2950 of file bufmgr.c.

2951 {
2952  if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
2953  {
2954  /*
2955  * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
2956  * tableam returns the size in bytes - but for the purpose of this
2957  * routine, we want the number of blocks. Therefore divide, rounding
2958  * up.
2959  */
2960  uint64 szbytes;
2961 
2962  szbytes = table_relation_size(relation, forkNum);
2963 
2964  return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
2965  }
2966  else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
2967  {
2968  return smgrnblocks(RelationGetSmgr(relation), forkNum);
2969  }
2970  else
2971  Assert(false);
2972 
2973  return 0; /* keep compiler quiet */
2974 }
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1841

References Assert(), RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 1646 of file bufmgr.c.

1649 {
1650  ForkNumber forkNum = MAIN_FORKNUM;
1651  BufferDesc *bufHdr;
1652 
1653  if (BufferIsValid(buffer))
1654  {
1655  Assert(BufferIsPinned(buffer));
1656  if (BufferIsLocal(buffer))
1657  {
1658  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1659  if (bufHdr->tag.blockNum == blockNum &&
1660  BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
1661  BufTagGetForkNum(&bufHdr->tag) == forkNum)
1662  return buffer;
1664  LocalRefCount[-buffer - 1]--;
1665  }
1666  else
1667  {
1668  bufHdr = GetBufferDescriptor(buffer - 1);
1669  /* we have pin, so it's ok to examine tag without spinlock */
1670  if (bufHdr->tag.blockNum == blockNum &&
1671  BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
1672  BufTagGetForkNum(&bufHdr->tag) == forkNum)
1673  return buffer;
1674  UnpinBuffer(bufHdr);
1675  }
1676  }
1677 
1678  return ReadBuffer(relation, blockNum);
1679 }
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:712
void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:972

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), LocalRefCount, MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), ResourceOwnerForgetBuffer(), BufferDesc::tag, and UnpinBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 3931 of file bufmgr.c.

3932 {
3933  if (!BufferIsValid(buffer))
3934  elog(ERROR, "bad buffer ID: %d", buffer);
3935 
3936  if (BufferIsLocal(buffer))
3937  {
3939 
3940  Assert(LocalRefCount[-buffer - 1] > 0);
3941  LocalRefCount[-buffer - 1]--;
3942  return;
3943  }
3944 
3945  UnpinBuffer(GetBufferDescriptor(buffer - 1));
3946 }

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), CurrentResourceOwner, elog(), ERROR, GetBufferDescriptor(), LocalRefCount, ResourceOwnerForgetBuffer(), and UnpinBuffer().

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_getbuf(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndBitmapHeapScan(), ExecEndIndexOnlyScan(), ExecReScanBitmapHeapScan(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgetpage(), heapgettup(), heapgettup_pagemode(), lazy_scan_heap(), lazy_vacuum_heap_rel(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pg_visibility_map_summary(), pgstatindex_impl(), ReadBufferBI(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), ResourceOwnerReleaseInternal(), revmap_get_buffer(), revmap_physical_extend(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ ReservePrivateRefCountEntry()

static void ReservePrivateRefCountEntry ( void  )
static

Definition at line 217 of file bufmgr.c.

218 {
219  /* Already reserved (or freed), nothing to do */
220  if (ReservedRefCountEntry != NULL)
221  return;
222 
223  /*
224  * First search for a free entry the array, that'll be sufficient in the
225  * majority of cases.
226  */
227  {
228  int i;
229 
230  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
231  {
233 
235 
236  if (res->buffer == InvalidBuffer)
237  {
239  return;
240  }
241  }
242  }
243 
244  /*
245  * No luck. All array entries are full. Move one array entry into the hash
246  * table.
247  */
248  {
249  /*
250  * Move entry from the current clock position in the array into the
251  * hashtable. Use that slot.
252  */
253  PrivateRefCountEntry *hashent;
254  bool found;
255 
256  /* select victim slot */