PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)   LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define DROP_RELS_BSEARCH_THRESHOLD   20
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static Buffer ReadBuffer_common (SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf, bool fixOwner)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *flush_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rnode_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const void *p1, const void *p2)
 
static int ckpt_buforder_comparator (const void *pa, const void *pb)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
bool ComputeIoConcurrency (int io_concurrency, double *target)
 
void PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static void InvalidateBuffer (BufferDesc *buf)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
void InitBufferPoolBackend (void)
 
void PrintBufferLeakWarning (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
void BufmgrCommit (void)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelFileNodeBuffers (RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
 
void DropRelFileNodesAllBuffers (RelFileNodeBackend *rnodes, int nnodes)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
void AbortBufferIO (void)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *context)
 
void TestForOldSnapshot_impl (Snapshot snapshot, Relation relation)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = 0
 
int checkpoint_flush_after = 0
 
int bgwriter_flush_after = 0
 
int backend_flush_after = 0
 
int target_prefetch_pages = 0
 
static BufferDescInProgressBuf = NULL
 
static bool IsForInput
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 

Macro Definition Documentation

#define BUF_REUSABLE   0x02

Definition at line 66 of file bufmgr.c.

Referenced by BgBufferSync(), and SyncOneBuffer().

#define BUF_WRITTEN   0x01

Definition at line 65 of file bufmgr.c.

Referenced by BgBufferSync(), BufferSync(), and SyncOneBuffer().

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 58 of file bufmgr.c.

Referenced by BufferAlloc(), and FlushBuffer().

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
#define false
Definition: c.h:210
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
int32 * LocalRefCount
Definition: localbuf.c:45

Definition at line 419 of file bufmgr.c.

Referenced by BufferGetBlockNumber(), BufferGetLSNAtomic(), BufferGetTag(), BufferIsPermanent(), FlushOneBuffer(), IncrBufferRefCount(), MarkBufferDirty(), and ReleaseAndReadBuffer().

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 57 of file bufmgr.c.

Referenced by FlushBuffer(), and ReadBuffer_common().

#define DROP_RELS_BSEARCH_THRESHOLD   20

Definition at line 68 of file bufmgr.c.

Referenced by DropRelFileNodesAllBuffers().

#define LocalBufHdrGetBlock (   bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 61 of file bufmgr.c.

Referenced by FlushRelationBuffers(), and ReadBuffer_common().

#define REFCOUNT_ARRAY_ENTRIES   8

Typedef Documentation

Function Documentation

void AbortBufferIO ( void  )

Definition at line 3974 of file bufmgr.c.

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_VALID, BufferDescriptorGetIOLock, ereport, errcode(), errdetail(), errmsg(), buftag::forkNum, InProgressBuf, IsForInput, LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), pfree(), relpathperm, buftag::rnode, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

3975 {
3977 
3978  if (buf)
3979  {
3980  uint32 buf_state;
3981 
3982  /*
3983  * Since LWLockReleaseAll has already been called, we're not holding
3984  * the buffer's io_in_progress_lock. We have to re-acquire it so that
3985  * we can use TerminateBufferIO. Anyone who's executing WaitIO on the
3986  * buffer will be in a busy spin until we succeed in doing this.
3987  */
3989 
3990  buf_state = LockBufHdr(buf);
3991  Assert(buf_state & BM_IO_IN_PROGRESS);
3992  if (IsForInput)
3993  {
3994  Assert(!(buf_state & BM_DIRTY));
3995 
3996  /* We'd better not think buffer is valid yet */
3997  Assert(!(buf_state & BM_VALID));
3998  UnlockBufHdr(buf, buf_state);
3999  }
4000  else
4001  {
4002  Assert(buf_state & BM_DIRTY);
4003  UnlockBufHdr(buf, buf_state);
4004  /* Issue notice if this is not the first failure... */
4005  if (buf_state & BM_IO_ERROR)
4006  {
4007  /* Buffer is pinned, so we can read tag without spinlock */
4008  char *path;
4009 
4010  path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
4011  ereport(WARNING,
4012  (errcode(ERRCODE_IO_ERROR),
4013  errmsg("could not write block %u of %s",
4014  buf->tag.blockNum, path),
4015  errdetail("Multiple failures --- write error might be permanent.")));
4016  pfree(path);
4017  }
4018  }
4019  TerminateBufferIO(buf, false, BM_IO_ERROR);
4020  }
4021 }
#define relpathperm(rnode, forknum)
Definition: relpath.h:67
ForkNumber forkNum
Definition: buf_internals.h:94
int errcode(int sqlerrcode)
Definition: elog.c:575
#define BM_DIRTY
Definition: buf_internals.h:59
#define BufferDescriptorGetIOLock(bdesc)
static BufferDesc * InProgressBuf
Definition: bufmgr.c:132
void pfree(void *pointer)
Definition: mcxt.c:950
static char * buf
Definition: pg_test_fsync.c:65
int errdetail(const char *fmt,...)
Definition: elog.c:873
unsigned int uint32
Definition: c.h:268
static bool IsForInput
Definition: bufmgr.c:133
#define ereport(elevel, rest)
Definition: elog.h:122
#define WARNING
Definition: elog.h:40
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:3942
#define BM_VALID
Definition: buf_internals.h:60
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define Assert(condition)
Definition: c.h:675
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define UnlockBufHdr(desc, s)
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:62
void AtEOXact_Buffers ( bool  isCommit)

Definition at line 2415 of file bufmgr.c.

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

2416 {
2418 
2419  AtEOXact_LocalBuffers(isCommit);
2420 
2422 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:170
#define Assert(condition)
Definition: c.h:675
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2490
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:572
static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 2471 of file bufmgr.c.

References AbortBufferIO(), AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolBackend().

2472 {
2473  AbortBufferIO();
2474  UnlockBuffers();
2475 
2477 
2478  /* localbuf.c needs a chance too */
2480 }
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:583
void UnlockBuffers(void)
Definition: bufmgr.c:3518
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2490
void AbortBufferIO(void)
Definition: bufmgr.c:3974
bool BgBufferSync ( WritebackContext wb_context)

Definition at line 2045 of file bufmgr.c.

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, BgWriterStats, BUF_REUSABLE, BUF_WRITTEN, CurrentResourceOwner, DEBUG1, DEBUG2, elog, PgStat_MsgBgWriter::m_buf_alloc, PgStat_MsgBgWriter::m_buf_written_clean, PgStat_MsgBgWriter::m_maxwritten_clean, NBuffers, ResourceOwnerEnlargeBuffers(), StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

2046 {
2047  /* info obtained from freelist.c */
2048  int strategy_buf_id;
2049  uint32 strategy_passes;
2050  uint32 recent_alloc;
2051 
2052  /*
2053  * Information saved between calls so we can determine the strategy
2054  * point's advance rate and avoid scanning already-cleaned buffers.
2055  */
2056  static bool saved_info_valid = false;
2057  static int prev_strategy_buf_id;
2058  static uint32 prev_strategy_passes;
2059  static int next_to_clean;
2060  static uint32 next_passes;
2061 
2062  /* Moving averages of allocation rate and clean-buffer density */
2063  static float smoothed_alloc = 0;
2064  static float smoothed_density = 10.0;
2065 
2066  /* Potentially these could be tunables, but for now, not */
2067  float smoothing_samples = 16;
2068  float scan_whole_pool_milliseconds = 120000.0;
2069 
2070  /* Used to compute how far we scan ahead */
2071  long strategy_delta;
2072  int bufs_to_lap;
2073  int bufs_ahead;
2074  float scans_per_alloc;
2075  int reusable_buffers_est;
2076  int upcoming_alloc_est;
2077  int min_scan_buffers;
2078 
2079  /* Variables for the scanning loop proper */
2080  int num_to_scan;
2081  int num_written;
2082  int reusable_buffers;
2083 
2084  /* Variables for final smoothed_density update */
2085  long new_strategy_delta;
2086  uint32 new_recent_alloc;
2087 
2088  /*
2089  * Find out where the freelist clock sweep currently is, and how many
2090  * buffer allocations have happened since our last call.
2091  */
2092  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
2093 
2094  /* Report buffer alloc counts to pgstat */
2095  BgWriterStats.m_buf_alloc += recent_alloc;
2096 
2097  /*
2098  * If we're not running the LRU scan, just stop after doing the stats
2099  * stuff. We mark the saved state invalid so that we can recover sanely
2100  * if LRU scan is turned back on later.
2101  */
2102  if (bgwriter_lru_maxpages <= 0)
2103  {
2104  saved_info_valid = false;
2105  return true;
2106  }
2107 
2108  /*
2109  * Compute strategy_delta = how many buffers have been scanned by the
2110  * clock sweep since last time. If first time through, assume none. Then
2111  * see if we are still ahead of the clock sweep, and if so, how many
2112  * buffers we could scan before we'd catch up with it and "lap" it. Note:
2113  * weird-looking coding of xxx_passes comparisons are to avoid bogus
2114  * behavior when the passes counts wrap around.
2115  */
2116  if (saved_info_valid)
2117  {
2118  int32 passes_delta = strategy_passes - prev_strategy_passes;
2119 
2120  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
2121  strategy_delta += (long) passes_delta *NBuffers;
2122 
2123  Assert(strategy_delta >= 0);
2124 
2125  if ((int32) (next_passes - strategy_passes) > 0)
2126  {
2127  /* we're one pass ahead of the strategy point */
2128  bufs_to_lap = strategy_buf_id - next_to_clean;
2129 #ifdef BGW_DEBUG
2130  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2131  next_passes, next_to_clean,
2132  strategy_passes, strategy_buf_id,
2133  strategy_delta, bufs_to_lap);
2134 #endif
2135  }
2136  else if (next_passes == strategy_passes &&
2137  next_to_clean >= strategy_buf_id)
2138  {
2139  /* on same pass, but ahead or at least not behind */
2140  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
2141 #ifdef BGW_DEBUG
2142  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2143  next_passes, next_to_clean,
2144  strategy_passes, strategy_buf_id,
2145  strategy_delta, bufs_to_lap);
2146 #endif
2147  }
2148  else
2149  {
2150  /*
2151  * We're behind, so skip forward to the strategy point and start
2152  * cleaning from there.
2153  */
2154 #ifdef BGW_DEBUG
2155  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
2156  next_passes, next_to_clean,
2157  strategy_passes, strategy_buf_id,
2158  strategy_delta);
2159 #endif
2160  next_to_clean = strategy_buf_id;
2161  next_passes = strategy_passes;
2162  bufs_to_lap = NBuffers;
2163  }
2164  }
2165  else
2166  {
2167  /*
2168  * Initializing at startup or after LRU scanning had been off. Always
2169  * start at the strategy point.
2170  */
2171 #ifdef BGW_DEBUG
2172  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
2173  strategy_passes, strategy_buf_id);
2174 #endif
2175  strategy_delta = 0;
2176  next_to_clean = strategy_buf_id;
2177  next_passes = strategy_passes;
2178  bufs_to_lap = NBuffers;
2179  }
2180 
2181  /* Update saved info for next time */
2182  prev_strategy_buf_id = strategy_buf_id;
2183  prev_strategy_passes = strategy_passes;
2184  saved_info_valid = true;
2185 
2186  /*
2187  * Compute how many buffers had to be scanned for each new allocation, ie,
2188  * 1/density of reusable buffers, and track a moving average of that.
2189  *
2190  * If the strategy point didn't move, we don't update the density estimate
2191  */
2192  if (strategy_delta > 0 && recent_alloc > 0)
2193  {
2194  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
2195  smoothed_density += (scans_per_alloc - smoothed_density) /
2196  smoothing_samples;
2197  }
2198 
2199  /*
2200  * Estimate how many reusable buffers there are between the current
2201  * strategy point and where we've scanned ahead to, based on the smoothed
2202  * density estimate.
2203  */
2204  bufs_ahead = NBuffers - bufs_to_lap;
2205  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
2206 
2207  /*
2208  * Track a moving average of recent buffer allocations. Here, rather than
2209  * a true average we want a fast-attack, slow-decline behavior: we
2210  * immediately follow any increase.
2211  */
2212  if (smoothed_alloc <= (float) recent_alloc)
2213  smoothed_alloc = recent_alloc;
2214  else
2215  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
2216  smoothing_samples;
2217 
2218  /* Scale the estimate by a GUC to allow more aggressive tuning. */
2219  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
2220 
2221  /*
2222  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
2223  * eventually underflow to zero, and the underflows produce annoying
2224  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
2225  * zero, there's no point in tracking smaller and smaller values of
2226  * smoothed_alloc, so just reset it to exactly zero to avoid this
2227  * syndrome. It will pop back up as soon as recent_alloc increases.
2228  */
2229  if (upcoming_alloc_est == 0)
2230  smoothed_alloc = 0;
2231 
2232  /*
2233  * Even in cases where there's been little or no buffer allocation
2234  * activity, we want to make a small amount of progress through the buffer
2235  * cache so that as many reusable buffers as possible are clean after an
2236  * idle period.
2237  *
2238  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
2239  * the BGW will be called during the scan_whole_pool time; slice the
2240  * buffer pool into that many sections.
2241  */
2242  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
2243 
2244  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
2245  {
2246 #ifdef BGW_DEBUG
2247  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
2248  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
2249 #endif
2250  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
2251  }
2252 
2253  /*
2254  * Now write out dirty reusable buffers, working forward from the
2255  * next_to_clean point, until we have lapped the strategy scan, or cleaned
2256  * enough buffers to match our estimate of the next cycle's allocation
2257  * requirements, or hit the bgwriter_lru_maxpages limit.
2258  */
2259 
2260  /* Make sure we can handle the pin inside SyncOneBuffer */
2262 
2263  num_to_scan = bufs_to_lap;
2264  num_written = 0;
2265  reusable_buffers = reusable_buffers_est;
2266 
2267  /* Execute the LRU scan */
2268  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
2269  {
2270  int sync_state = SyncOneBuffer(next_to_clean, true,
2271  wb_context);
2272 
2273  if (++next_to_clean >= NBuffers)
2274  {
2275  next_to_clean = 0;
2276  next_passes++;
2277  }
2278  num_to_scan--;
2279 
2280  if (sync_state & BUF_WRITTEN)
2281  {
2282  reusable_buffers++;
2283  if (++num_written >= bgwriter_lru_maxpages)
2284  {
2286  break;
2287  }
2288  }
2289  else if (sync_state & BUF_REUSABLE)
2290  reusable_buffers++;
2291  }
2292 
2293  BgWriterStats.m_buf_written_clean += num_written;
2294 
2295 #ifdef BGW_DEBUG
2296  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
2297  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
2298  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
2299  bufs_to_lap - num_to_scan,
2300  num_written,
2301  reusable_buffers - reusable_buffers_est);
2302 #endif
2303 
2304  /*
2305  * Consider the above scan as being like a new allocation scan.
2306  * Characterize its density and update the smoothed one based on it. This
2307  * effectively halves the moving average period in cases where both the
2308  * strategy and the background writer are doing some useful scanning,
2309  * which is helpful because a long memory isn't as desirable on the
2310  * density estimates.
2311  */
2312  new_strategy_delta = bufs_to_lap - num_to_scan;
2313  new_recent_alloc = reusable_buffers - reusable_buffers_est;
2314  if (new_strategy_delta > 0 && new_recent_alloc > 0)
2315  {
2316  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
2317  smoothed_density += (scans_per_alloc - smoothed_density) /
2318  smoothing_samples;
2319 
2320 #ifdef BGW_DEBUG
2321  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
2322  new_recent_alloc, new_strategy_delta,
2323  scans_per_alloc, smoothed_density);
2324 #endif
2325  }
2326 
2327  /* Return true if OK to hibernate */
2328  return (bufs_to_lap == 0 && recent_alloc == 0);
2329 }
PgStat_Counter m_buf_alloc
Definition: pgstat.h:420
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:378
#define DEBUG1
Definition: elog.h:25
int BgWriterDelay
Definition: bgwriter.c:67
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
PgStat_Counter m_maxwritten_clean
Definition: pgstat.h:417
PgStat_Counter m_buf_written_clean
Definition: pgstat.h:416
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:127
double bgwriter_lru_multiplier
Definition: bufmgr.c:110
signed int int32
Definition: c.h:256
#define BUF_REUSABLE
Definition: bufmgr.c:66
int bgwriter_lru_maxpages
Definition: bufmgr.c:109
#define DEBUG2
Definition: elog.h:24
unsigned int uint32
Definition: c.h:268
#define BUF_WRITTEN
Definition: bufmgr.c:65
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *flush_context)
Definition: bufmgr.c:2348
#define Assert(condition)
Definition: c.h:675
int NBuffers
Definition: globals.c:122
#define elog
Definition: elog.h:219
static BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 987 of file bufmgr.c.

References Assert, BackendWritebackContext, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_FLAG_MASK, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BUF_USAGECOUNT_ONE, BufferDescriptorGetContentLock, BufferGetLSN, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), RelFileNode::dbNode, FALSE, FlushBuffer(), GetBufferDescriptor, INIT_BUFFERTAG, INIT_FORKNUM, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), RelFileNodeBackend::node, NULL, PinBuffer(), PinBuffer_Locked(), RelFileNode::relNode, RELPERSISTENCE_PERMANENT, ReservePrivateRefCountEntry(), ScheduleBufferTagForWriteback(), SMgrRelationData::smgr_rnode, RelFileNode::spcNode, StartBufferIO(), StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, TRUE, UnlockBufHdr, UnpinBuffer(), and XLogNeedsFlush().

Referenced by ReadBuffer_common().

991 {
992  BufferTag newTag; /* identity of requested block */
993  uint32 newHash; /* hash value for newTag */
994  LWLock *newPartitionLock; /* buffer partition lock for it */
995  BufferTag oldTag; /* previous identity of selected buffer */
996  uint32 oldHash; /* hash value for oldTag */
997  LWLock *oldPartitionLock; /* buffer partition lock for it */
998  uint32 oldFlags;
999  int buf_id;
1000  BufferDesc *buf;
1001  bool valid;
1002  uint32 buf_state;
1003 
1004  /* create a tag so we can lookup the buffer */
1005  INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
1006 
1007  /* determine its hash code and partition lock ID */
1008  newHash = BufTableHashCode(&newTag);
1009  newPartitionLock = BufMappingPartitionLock(newHash);
1010 
1011  /* see if the block is in the buffer pool already */
1012  LWLockAcquire(newPartitionLock, LW_SHARED);
1013  buf_id = BufTableLookup(&newTag, newHash);
1014  if (buf_id >= 0)
1015  {
1016  /*
1017  * Found it. Now, pin the buffer so no one can steal it from the
1018  * buffer pool, and check to see if the correct data has been loaded
1019  * into the buffer.
1020  */
1021  buf = GetBufferDescriptor(buf_id);
1022 
1023  valid = PinBuffer(buf, strategy);
1024 
1025  /* Can release the mapping lock as soon as we've pinned it */
1026  LWLockRelease(newPartitionLock);
1027 
1028  *foundPtr = TRUE;
1029 
1030  if (!valid)
1031  {
1032  /*
1033  * We can only get here if (a) someone else is still reading in
1034  * the page, or (b) a previous read attempt failed. We have to
1035  * wait for any active read attempt to finish, and then set up our
1036  * own read attempt if the page is still not BM_VALID.
1037  * StartBufferIO does it all.
1038  */
1039  if (StartBufferIO(buf, true))
1040  {
1041  /*
1042  * If we get here, previous attempts to read the buffer must
1043  * have failed ... but we shall bravely try again.
1044  */
1045  *foundPtr = FALSE;
1046  }
1047  }
1048 
1049  return buf;
1050  }
1051 
1052  /*
1053  * Didn't find it in the buffer pool. We'll have to initialize a new
1054  * buffer. Remember to unlock the mapping lock while doing the work.
1055  */
1056  LWLockRelease(newPartitionLock);
1057 
1058  /* Loop here in case we have to try another victim buffer */
1059  for (;;)
1060  {
1061  /*
1062  * Ensure, while the spinlock's not yet held, that there's a free
1063  * refcount entry.
1064  */
1066 
1067  /*
1068  * Select a victim buffer. The buffer is returned with its header
1069  * spinlock still held!
1070  */
1071  buf = StrategyGetBuffer(strategy, &buf_state);
1072 
1073  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1074 
1075  /* Must copy buffer flags while we still hold the spinlock */
1076  oldFlags = buf_state & BUF_FLAG_MASK;
1077 
1078  /* Pin the buffer and then release the buffer spinlock */
1079  PinBuffer_Locked(buf);
1080 
1081  /*
1082  * If the buffer was dirty, try to write it out. There is a race
1083  * condition here, in that someone might dirty it after we released it
1084  * above, or even while we are writing it out (since our share-lock
1085  * won't prevent hint-bit updates). We will recheck the dirty bit
1086  * after re-locking the buffer header.
1087  */
1088  if (oldFlags & BM_DIRTY)
1089  {
1090  /*
1091  * We need a share-lock on the buffer contents to write it out
1092  * (else we might write invalid data, eg because someone else is
1093  * compacting the page contents while we write). We must use a
1094  * conditional lock acquisition here to avoid deadlock. Even
1095  * though the buffer was not pinned (and therefore surely not
1096  * locked) when StrategyGetBuffer returned it, someone else could
1097  * have pinned and exclusive-locked it by the time we get here. If
1098  * we try to get the lock unconditionally, we'd block waiting for
1099  * them; if they later block waiting for us, deadlock ensues.
1100  * (This has been observed to happen when two backends are both
1101  * trying to split btree index pages, and the second one just
1102  * happens to be trying to split the page the first one got from
1103  * StrategyGetBuffer.)
1104  */
1106  LW_SHARED))
1107  {
1108  /*
1109  * If using a nondefault strategy, and writing the buffer
1110  * would require a WAL flush, let the strategy decide whether
1111  * to go ahead and write/reuse the buffer or to choose another
1112  * victim. We need lock to inspect the page LSN, so this
1113  * can't be done inside StrategyGetBuffer.
1114  */
1115  if (strategy != NULL)
1116  {
1117  XLogRecPtr lsn;
1118 
1119  /* Read the LSN while holding buffer header lock */
1120  buf_state = LockBufHdr(buf);
1121  lsn = BufferGetLSN(buf);
1122  UnlockBufHdr(buf, buf_state);
1123 
1124  if (XLogNeedsFlush(lsn) &&
1125  StrategyRejectBuffer(strategy, buf))
1126  {
1127  /* Drop lock/pin and loop around for another buffer */
1129  UnpinBuffer(buf, true);
1130  continue;
1131  }
1132  }
1133 
1134  /* OK, do the I/O */
1135  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
1136  smgr->smgr_rnode.node.spcNode,
1137  smgr->smgr_rnode.node.dbNode,
1138  smgr->smgr_rnode.node.relNode);
1139 
1140  FlushBuffer(buf, NULL);
1142 
1144  &buf->tag);
1145 
1146  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
1147  smgr->smgr_rnode.node.spcNode,
1148  smgr->smgr_rnode.node.dbNode,
1149  smgr->smgr_rnode.node.relNode);
1150  }
1151  else
1152  {
1153  /*
1154  * Someone else has locked the buffer, so give it up and loop
1155  * back to get another one.
1156  */
1157  UnpinBuffer(buf, true);
1158  continue;
1159  }
1160  }
1161 
1162  /*
1163  * To change the association of a valid buffer, we'll need to have
1164  * exclusive lock on both the old and new mapping partitions.
1165  */
1166  if (oldFlags & BM_TAG_VALID)
1167  {
1168  /*
1169  * Need to compute the old tag's hashcode and partition lock ID.
1170  * XXX is it worth storing the hashcode in BufferDesc so we need
1171  * not recompute it here? Probably not.
1172  */
1173  oldTag = buf->tag;
1174  oldHash = BufTableHashCode(&oldTag);
1175  oldPartitionLock = BufMappingPartitionLock(oldHash);
1176 
1177  /*
1178  * Must lock the lower-numbered partition first to avoid
1179  * deadlocks.
1180  */
1181  if (oldPartitionLock < newPartitionLock)
1182  {
1183  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1184  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1185  }
1186  else if (oldPartitionLock > newPartitionLock)
1187  {
1188  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1189  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1190  }
1191  else
1192  {
1193  /* only one partition, only one lock */
1194  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1195  }
1196  }
1197  else
1198  {
1199  /* if it wasn't valid, we need only the new partition */
1200  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1201  /* remember we have no old-partition lock or tag */
1202  oldPartitionLock = NULL;
1203  /* this just keeps the compiler quiet about uninit variables */
1204  oldHash = 0;
1205  }
1206 
1207  /*
1208  * Try to make a hashtable entry for the buffer under its new tag.
1209  * This could fail because while we were writing someone else
1210  * allocated another buffer for the same block we want to read in.
1211  * Note that we have not yet removed the hashtable entry for the old
1212  * tag.
1213  */
1214  buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
1215 
1216  if (buf_id >= 0)
1217  {
1218  /*
1219  * Got a collision. Someone has already done what we were about to
1220  * do. We'll just handle this as if it were found in the buffer
1221  * pool in the first place. First, give up the buffer we were
1222  * planning to use.
1223  */
1224  UnpinBuffer(buf, true);
1225 
1226  /* Can give up that buffer's mapping partition lock now */
1227  if (oldPartitionLock != NULL &&
1228  oldPartitionLock != newPartitionLock)
1229  LWLockRelease(oldPartitionLock);
1230 
1231  /* remaining code should match code at top of routine */
1232 
1233  buf = GetBufferDescriptor(buf_id);
1234 
1235  valid = PinBuffer(buf, strategy);
1236 
1237  /* Can release the mapping lock as soon as we've pinned it */
1238  LWLockRelease(newPartitionLock);
1239 
1240  *foundPtr = TRUE;
1241 
1242  if (!valid)
1243  {
1244  /*
1245  * We can only get here if (a) someone else is still reading
1246  * in the page, or (b) a previous read attempt failed. We
1247  * have to wait for any active read attempt to finish, and
1248  * then set up our own read attempt if the page is still not
1249  * BM_VALID. StartBufferIO does it all.
1250  */
1251  if (StartBufferIO(buf, true))
1252  {
1253  /*
1254  * If we get here, previous attempts to read the buffer
1255  * must have failed ... but we shall bravely try again.
1256  */
1257  *foundPtr = FALSE;
1258  }
1259  }
1260 
1261  return buf;
1262  }
1263 
1264  /*
1265  * Need to lock the buffer header too in order to change its tag.
1266  */
1267  buf_state = LockBufHdr(buf);
1268 
1269  /*
1270  * Somebody could have pinned or re-dirtied the buffer while we were
1271  * doing the I/O and making the new hashtable entry. If so, we can't
1272  * recycle this buffer; we must undo everything we've done and start
1273  * over with a new victim buffer.
1274  */
1275  oldFlags = buf_state & BUF_FLAG_MASK;
1276  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
1277  break;
1278 
1279  UnlockBufHdr(buf, buf_state);
1280  BufTableDelete(&newTag, newHash);
1281  if (oldPartitionLock != NULL &&
1282  oldPartitionLock != newPartitionLock)
1283  LWLockRelease(oldPartitionLock);
1284  LWLockRelease(newPartitionLock);
1285  UnpinBuffer(buf, true);
1286  }
1287 
1288  /*
1289  * Okay, it's finally safe to rename the buffer.
1290  *
1291  * Clearing BM_VALID here is necessary, clearing the dirtybits is just
1292  * paranoia. We also reset the usage_count since any recency of use of
1293  * the old content is no longer relevant. (The usage_count starts out at
1294  * 1 so that the buffer can survive one clock-sweep pass.)
1295  *
1296  * Make sure BM_PERMANENT is set for buffers that must be written at every
1297  * checkpoint. Unlogged buffers only need to be written at shutdown
1298  * checkpoints, except for their "init" forks, which need to be treated
1299  * just like permanent relations.
1300  */
1301  buf->tag = newTag;
1302  buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
1305  if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
1306  buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
1307  else
1308  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1309 
1310  UnlockBufHdr(buf, buf_state);
1311 
1312  if (oldPartitionLock != NULL)
1313  {
1314  BufTableDelete(&oldTag, oldHash);
1315  if (oldPartitionLock != newPartitionLock)
1316  LWLockRelease(oldPartitionLock);
1317  }
1318 
1319  LWLockRelease(newPartitionLock);
1320 
1321  /*
1322  * Buffer contents are currently invalid. Try to get the io_in_progress
1323  * lock. If StartBufferIO returns false, then someone else managed to
1324  * read it before we did, so there's nothing left for BufferAlloc() to do.
1325  */
1326  if (StartBufferIO(buf, true))
1327  *foundPtr = FALSE;
1328  else
1329  *foundPtr = TRUE;
1330 
1331  return buf;
1332 }
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:1571
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
Definition: freelist.c:184
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:150
Definition: lwlock.h:32
#define BM_PERMANENT
Definition: buf_internals.h:67
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3068
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:80
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:92
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2665
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
#define RELPERSISTENCE_PERMANENT
Definition: pg_class.h:170
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:3875
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:120
#define FALSE
Definition: c.h:221
void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
Definition: bufmgr.c:4242
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1283
static char * buf
Definition: pg_test_fsync.c:65
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:268
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1694
#define BM_VALID
Definition: buf_internals.h:60
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf)
Definition: freelist.c:669
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1656
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
#define UnlockBufHdr(desc, s)
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
#define TRUE
Definition: c.h:217
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:58
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 2605 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, GetLocalBufferDescriptor, and BufferDesc::tag.

Referenced by _bt_checkpage(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_step(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), CheckForSerializableConflictIn(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_with_data(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoCreateIndex(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistformdownlink(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRedoCreateIndex(), gistRelocateBuildBuffersOnSplit(), hash_xlog_add_ovfl_page(), heap_delete(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_prune_chain(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), index_getnext(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgRedoCreateIndex(), spgSplitNodeAction(), spgWalk(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and XLogReadBufferExtended().

2606 {
2607  BufferDesc *bufHdr;
2608 
2609  Assert(BufferIsPinned(buffer));
2610 
2611  if (BufferIsLocal(buffer))
2612  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2613  else
2614  bufHdr = GetBufferDescriptor(buffer - 1);
2615 
2616  /* pinned, so OK to read tag without spinlock */
2617  return bufHdr->tag.blockNum;
2618 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
BufferTag tag
XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 2832 of file bufmgr.c.

References Assert, BufferGetPage, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, LockBufHdr(), PageGetLSN, UnlockBufHdr, and XLogHintBitIsNeeded.

Referenced by gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

2833 {
2834  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
2835  char *page = BufferGetPage(buffer);
2836  XLogRecPtr lsn;
2837  uint32 buf_state;
2838 
2839  /*
2840  * If we don't need locking for correctness, fastpath out.
2841  */
2842  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
2843  return PageGetLSN(page);
2844 
2845  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2846  Assert(BufferIsValid(buffer));
2847  Assert(BufferIsPinned(buffer));
2848 
2849  buf_state = LockBufHdr(bufHdr);
2850  lsn = PageGetLSN(page);
2851  UnlockBufHdr(bufHdr, buf_state);
2852 
2853  return lsn;
2854 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:268
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define PageGetLSN(page)
Definition: bufpage.h:363
#define UnlockBufHdr(desc, s)
#define XLogHintBitIsNeeded()
Definition: xlog.h:156
void BufferGetTag ( Buffer  buffer,
RelFileNode rnode,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 2626 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, buftag::rnode, and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

2628 {
2629  BufferDesc *bufHdr;
2630 
2631  /* Do the same checks as BufferGetBlockNumber. */
2632  Assert(BufferIsPinned(buffer));
2633 
2634  if (BufferIsLocal(buffer))
2635  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2636  else
2637  bufHdr = GetBufferDescriptor(buffer - 1);
2638 
2639  /* pinned, so OK to read tag without spinlock */
2640  *rnode = bufHdr->tag.rnode;
2641  *forknum = bufHdr->tag.forkNum;
2642  *blknum = bufHdr->tag.blockNum;
2643 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
bool BufferIsPermanent ( Buffer  buffer)

Definition at line 2802 of file bufmgr.c.

References Assert, BM_PERMANENT, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

2803 {
2804  BufferDesc *bufHdr;
2805 
2806  /* Local buffers are used only for temp relations. */
2807  if (BufferIsLocal(buffer))
2808  return false;
2809 
2810  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2811  Assert(BufferIsValid(buffer));
2812  Assert(BufferIsPinned(buffer));
2813 
2814  /*
2815  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
2816  * need not bother with the buffer header spinlock. Even if someone else
2817  * changes the buffer header state while we're doing this, the state is
2818  * changed atomically, so we'll read the old value or the new value, but
2819  * not random garbage.
2820  */
2821  bufHdr = GetBufferDescriptor(buffer - 1);
2822  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
2823 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
#define BM_PERMANENT
Definition: buf_internals.h:67
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
pg_atomic_uint32 state
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static void BufferSync ( int  flags)
static

Definition at line 1778 of file bufmgr.c.

References Assert, BgWriterStats, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), ckpt_buforder_comparator(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, CurrentResourceOwner, DatumGetPointer, buftag::forkNum, CkptSortItem::forkNum, GetBufferDescriptor, i, CkptTsStatus::index, InvalidOid, IssuePendingWritebacks(), LockBufHdr(), PgStat_MsgBgWriter::m_buf_written_checkpoints, NBuffers, NULL, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), pfree(), pg_atomic_read_u32(), PointerGetDatum, CkptTsStatus::progress, CkptTsStatus::progress_slice, qsort, RelFileNode::relNode, CkptSortItem::relNode, repalloc(), ResourceOwnerEnlargeBuffers(), buftag::rnode, RelFileNode::spcNode, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr, and WritebackContextInit().

Referenced by CheckPointBuffers().

1779 {
1780  uint32 buf_state;
1781  int buf_id;
1782  int num_to_scan;
1783  int num_spaces;
1784  int num_processed;
1785  int num_written;
1786  CkptTsStatus *per_ts_stat = NULL;
1787  Oid last_tsid;
1788  binaryheap *ts_heap;
1789  int i;
1790  int mask = BM_DIRTY;
1791  WritebackContext wb_context;
1792 
1793  /* Make sure we can handle the pin inside SyncOneBuffer */
1795 
1796  /*
1797  * Unless this is a shutdown checkpoint or we have been explicitly told,
1798  * we write only permanent, dirty buffers. But at shutdown or end of
1799  * recovery, we write all dirty buffers.
1800  */
1803  mask |= BM_PERMANENT;
1804 
1805  /*
1806  * Loop over all buffers, and mark the ones that need to be written with
1807  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
1808  * can estimate how much work needs to be done.
1809  *
1810  * This allows us to write only those pages that were dirty when the
1811  * checkpoint began, and not those that get dirtied while it proceeds.
1812  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
1813  * later in this function, or by normal backends or the bgwriter cleaning
1814  * scan, the flag is cleared. Any buffer dirtied after this point won't
1815  * have the flag set.
1816  *
1817  * Note that if we fail to write some buffer, we may leave buffers with
1818  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
1819  * certainly need to be written for the next checkpoint attempt, too.
1820  */
1821  num_to_scan = 0;
1822  for (buf_id = 0; buf_id < NBuffers; buf_id++)
1823  {
1824  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
1825 
1826  /*
1827  * Header spinlock is enough to examine BM_DIRTY, see comment in
1828  * SyncOneBuffer.
1829  */
1830  buf_state = LockBufHdr(bufHdr);
1831 
1832  if ((buf_state & mask) == mask)
1833  {
1834  CkptSortItem *item;
1835 
1836  buf_state |= BM_CHECKPOINT_NEEDED;
1837 
1838  item = &CkptBufferIds[num_to_scan++];
1839  item->buf_id = buf_id;
1840  item->tsId = bufHdr->tag.rnode.spcNode;
1841  item->relNode = bufHdr->tag.rnode.relNode;
1842  item->forkNum = bufHdr->tag.forkNum;
1843  item->blockNum = bufHdr->tag.blockNum;
1844  }
1845 
1846  UnlockBufHdr(bufHdr, buf_state);
1847  }
1848 
1849  if (num_to_scan == 0)
1850  return; /* nothing to do */
1851 
1853 
1854  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
1855 
1856  /*
1857  * Sort buffers that need to be written to reduce the likelihood of random
1858  * IO. The sorting is also important for the implementation of balancing
1859  * writes between tablespaces. Without balancing writes we'd potentially
1860  * end up writing to the tablespaces one-by-one; possibly overloading the
1861  * underlying system.
1862  */
1863  qsort(CkptBufferIds, num_to_scan, sizeof(CkptSortItem),
1865 
1866  num_spaces = 0;
1867 
1868  /*
1869  * Allocate progress status for each tablespace with buffers that need to
1870  * be flushed. This requires the to-be-flushed array to be sorted.
1871  */
1872  last_tsid = InvalidOid;
1873  for (i = 0; i < num_to_scan; i++)
1874  {
1875  CkptTsStatus *s;
1876  Oid cur_tsid;
1877 
1878  cur_tsid = CkptBufferIds[i].tsId;
1879 
1880  /*
1881  * Grow array of per-tablespace status structs, every time a new
1882  * tablespace is found.
1883  */
1884  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
1885  {
1886  Size sz;
1887 
1888  num_spaces++;
1889 
1890  /*
1891  * Not worth adding grow-by-power-of-2 logic here - even with a
1892  * few hundred tablespaces this should be fine.
1893  */
1894  sz = sizeof(CkptTsStatus) * num_spaces;
1895 
1896  if (per_ts_stat == NULL)
1897  per_ts_stat = (CkptTsStatus *) palloc(sz);
1898  else
1899  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
1900 
1901  s = &per_ts_stat[num_spaces - 1];
1902  memset(s, 0, sizeof(*s));
1903  s->tsId = cur_tsid;
1904 
1905  /*
1906  * The first buffer in this tablespace. As CkptBufferIds is sorted
1907  * by tablespace all (s->num_to_scan) buffers in this tablespace
1908  * will follow afterwards.
1909  */
1910  s->index = i;
1911 
1912  /*
1913  * progress_slice will be determined once we know how many buffers
1914  * are in each tablespace, i.e. after this loop.
1915  */
1916 
1917  last_tsid = cur_tsid;
1918  }
1919  else
1920  {
1921  s = &per_ts_stat[num_spaces - 1];
1922  }
1923 
1924  s->num_to_scan++;
1925  }
1926 
1927  Assert(num_spaces > 0);
1928 
1929  /*
1930  * Build a min-heap over the write-progress in the individual tablespaces,
1931  * and compute how large a portion of the total progress a single
1932  * processed buffer is.
1933  */
1934  ts_heap = binaryheap_allocate(num_spaces,
1936  NULL);
1937 
1938  for (i = 0; i < num_spaces; i++)
1939  {
1940  CkptTsStatus *ts_stat = &per_ts_stat[i];
1941 
1942  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
1943 
1944  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
1945  }
1946 
1947  binaryheap_build(ts_heap);
1948 
1949  /*
1950  * Iterate through to-be-checkpointed buffers and write the ones (still)
1951  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
1952  * tablespaces; otherwise the sorting would lead to only one tablespace
1953  * receiving writes at a time, making inefficient use of the hardware.
1954  */
1955  num_processed = 0;
1956  num_written = 0;
1957  while (!binaryheap_empty(ts_heap))
1958  {
1959  BufferDesc *bufHdr = NULL;
1960  CkptTsStatus *ts_stat = (CkptTsStatus *)
1962 
1963  buf_id = CkptBufferIds[ts_stat->index].buf_id;
1964  Assert(buf_id != -1);
1965 
1966  bufHdr = GetBufferDescriptor(buf_id);
1967 
1968  num_processed++;
1969 
1970  /*
1971  * We don't need to acquire the lock here, because we're only looking
1972  * at a single bit. It's possible that someone else writes the buffer
1973  * and clears the flag right after we check, but that doesn't matter
1974  * since SyncOneBuffer will then do nothing. However, there is a
1975  * further race condition: it's conceivable that between the time we
1976  * examine the bit here and the time SyncOneBuffer acquires the lock,
1977  * someone else not only wrote the buffer but replaced it with another
1978  * page and dirtied it. In that improbable case, SyncOneBuffer will
1979  * write the buffer though we didn't need to. It doesn't seem worth
1980  * guarding against this, though.
1981  */
1983  {
1984  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
1985  {
1986  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
1988  num_written++;
1989  }
1990  }
1991 
1992  /*
1993  * Measure progress independent of actually having to flush the buffer
1994  * - otherwise writing become unbalanced.
1995  */
1996  ts_stat->progress += ts_stat->progress_slice;
1997  ts_stat->num_scanned++;
1998  ts_stat->index++;
1999 
2000  /* Have all the buffers from the tablespace been processed? */
2001  if (ts_stat->num_scanned == ts_stat->num_to_scan)
2002  {
2003  binaryheap_remove_first(ts_heap);
2004  }
2005  else
2006  {
2007  /* update heap with the new progress */
2008  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
2009  }
2010 
2011  /*
2012  * Sleep to throttle our I/O rate.
2013  */
2014  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
2015  }
2016 
2017  /* issue all pending flushes */
2018  IssuePendingWritebacks(&wb_context);
2019 
2020  pfree(per_ts_stat);
2021  per_ts_stat = NULL;
2022  binaryheap_free(ts_heap);
2023 
2024  /*
2025  * Update checkpoint statistics. As noted above, this doesn't include
2026  * buffers written by other backends or bgwriter scan.
2027  */
2028  CheckpointStats.ckpt_bufs_written += num_written;
2029 
2030  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
2031 }
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:677
PgStat_Counter m_buf_written_checkpoints
Definition: pgstat.h:415
#define BM_PERMANENT
Definition: buf_internals.h:67
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:181
Oid tsId
Definition: bufmgr.c:86
#define binaryheap_empty(h)
Definition: binaryheap.h:52
ForkNumber forkNum
Definition: buf_internals.h:94
#define PointerGetDatum(X)
Definition: postgres.h:562
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:4207
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:127
int checkpoint_flush_after
Definition: bufmgr.c:118
void binaryheap_replace_first(binaryheap *heap, Datum d)
Definition: binaryheap.c:204
unsigned int Oid
Definition: postgres_ext.h:31
#define BM_DIRTY
Definition: buf_internals.h:59
void binaryheap_add_unordered(binaryheap *heap, Datum d)
Definition: binaryheap.c:110
void IssuePendingWritebacks(WritebackContext *context)
Definition: bufmgr.c:4276
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:4230
void pfree(void *pointer)
Definition: mcxt.c:950
double float8
Definition: c.h:381
Datum binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:159
int num_to_scan
Definition: bufmgr.c:99
float8 progress_slice
Definition: bufmgr.c:96
int index
Definition: bufmgr.c:104
float8 progress
Definition: bufmgr.c:95
static int ckpt_buforder_comparator(const void *pa, const void *pb)
Definition: bufmgr.c:4175
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:176
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:268
#define BUF_WRITTEN
Definition: bufmgr.c:65
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
int ckpt_bufs_written
Definition: xlog.h:205
BlockNumber blockNum
#define InvalidOid
Definition: postgres_ext.h:36
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:126
const symbol * s
Definition: header.h:17
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *flush_context)
Definition: bufmgr.c:2348
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
CheckpointStatsData CheckpointStats
Definition: xlog.c:173
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
size_t Size
Definition: c.h:356
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:69
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:963
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:33
#define DatumGetPointer(X)
Definition: postgres.h:555
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:849
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
pg_atomic_uint32 state
Datum binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:174
int num_scanned
Definition: bufmgr.c:101
#define qsort(a, b, c, d)
Definition: port.h:440
ForkNumber forkNum
struct CkptTsStatus CkptTsStatus
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:175
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static int buffertag_comparator ( const void *  p1,
const void *  p2 
)
static

Definition at line 4144 of file bufmgr.c.

References buftag::blockNum, buftag::forkNum, buftag::rnode, and rnode_comparator().

Referenced by IssuePendingWritebacks().

4145 {
4146  const BufferTag *ba = (const BufferTag *) a;
4147  const BufferTag *bb = (const BufferTag *) b;
4148  int ret;
4149 
4150  ret = rnode_comparator(&ba->rnode, &bb->rnode);
4151 
4152  if (ret != 0)
4153  return ret;
4154 
4155  if (ba->forkNum < bb->forkNum)
4156  return -1;
4157  if (ba->forkNum > bb->forkNum)
4158  return 1;
4159 
4160  if (ba->blockNum < bb->blockNum)
4161  return -1;
4162  if (ba->blockNum > bb->blockNum)
4163  return 1;
4164 
4165  return 0;
4166 }
ForkNumber forkNum
Definition: buf_internals.h:94
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4065
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
void BufmgrCommit ( void  )

Definition at line 2591 of file bufmgr.c.

Referenced by PrepareTransaction(), and RecordTransactionCommit().

2592 {
2593  /* Nothing to do in bufmgr anymore... */
2594 }
static void CheckForBufferLeaks ( void  )
static

Definition at line 2490 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, NULL, PrintBufferLeakWarning(), PrivateRefCountArray, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

2491 {
2492 #ifdef USE_ASSERT_CHECKING
2493  int RefCountErrors = 0;
2494  PrivateRefCountEntry *res;
2495  int i;
2496 
2497  /* check the array */
2498  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
2499  {
2500  res = &PrivateRefCountArray[i];
2501 
2502  if (res->buffer != InvalidBuffer)
2503  {
2505  RefCountErrors++;
2506  }
2507  }
2508 
2509  /* if necessary search the hash */
2511  {
2512  HASH_SEQ_STATUS hstat;
2513 
2515  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
2516  {
2518  RefCountErrors++;
2519  }
2520 
2521  }
2522 
2523  Assert(RefCountErrors == 0);
2524 #endif
2525 }
void PrintBufferLeakWarning(Buffer buffer)
Definition: bufmgr.c:2531
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:170
#define InvalidBuffer
Definition: buf.h:25
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:168
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:77
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1353
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1343
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:169
int i
void CheckPointBuffers ( int  flags)

Definition at line 2574 of file bufmgr.c.

References BufferSync(), CheckpointStats, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and smgrsync().

Referenced by CheckPointGuts().

2575 {
2576  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
2578  BufferSync(flags);
2580  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
2581  smgrsync();
2583  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
2584 }
TimestampTz ckpt_sync_end_t
Definition: xlog.h:202
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1569
static void BufferSync(int flags)
Definition: bufmgr.c:1778
CheckpointStatsData CheckpointStats
Definition: xlog.c:173
TimestampTz ckpt_write_t
Definition: xlog.h:200
TimestampTz ckpt_sync_t
Definition: xlog.h:201
void smgrsync(void)
Definition: smgr.c:759
static int ckpt_buforder_comparator ( const void *  pa,
const void *  pb 
)
static

Definition at line 4175 of file bufmgr.c.

References CkptSortItem::blockNum, CkptSortItem::forkNum, CkptSortItem::relNode, and CkptSortItem::tsId.

Referenced by BufferSync().

4176 {
4177  const CkptSortItem *a = (CkptSortItem *) pa;
4178  const CkptSortItem *b = (CkptSortItem *) pb;
4179 
4180  /* compare tablespace */
4181  if (a->tsId < b->tsId)
4182  return -1;
4183  else if (a->tsId > b->tsId)
4184  return 1;
4185  /* compare relation */
4186  if (a->relNode < b->relNode)
4187  return -1;
4188  else if (a->relNode > b->relNode)
4189  return 1;
4190  /* compare fork */
4191  else if (a->forkNum < b->forkNum)
4192  return -1;
4193  else if (a->forkNum > b->forkNum)
4194  return 1;
4195  /* compare block number */
4196  else if (a->blockNum < b->blockNum)
4197  return -1;
4198  else /* should not be the same block ... */
4199  return 1;
4200 }
BlockNumber blockNum
ForkNumber forkNum
bool ComputeIoConcurrency ( int  io_concurrency,
double *  target 
)

Definition at line 467 of file bufmgr.c.

References i, Max, MAX_IO_CONCURRENCY, and Min.

Referenced by check_effective_io_concurrency(), and ExecInitBitmapHeapScan().

468 {
469  double new_prefetch_pages = 0.0;
470  int i;
471 
472  /*
473  * Make sure the io_concurrency value is within valid range; it may have
474  * been forced with a manual pg_tablespace update.
475  */
476  io_concurrency = Min(Max(io_concurrency, 0), MAX_IO_CONCURRENCY);
477 
478  /*----------
479  * The user-visible GUC parameter is the number of drives (spindles),
480  * which we need to translate to a number-of-pages-to-prefetch target.
481  * The target value is stashed in *extra and then assigned to the actual
482  * variable by assign_effective_io_concurrency.
483  *
484  * The expected number of prefetch pages needed to keep N drives busy is:
485  *
486  * drives | I/O requests
487  * -------+----------------
488  * 1 | 1
489  * 2 | 2/1 + 2/2 = 3
490  * 3 | 3/1 + 3/2 + 3/3 = 5 1/2
491  * 4 | 4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
492  * n | n * H(n)
493  *
494  * This is called the "coupon collector problem" and H(n) is called the
495  * harmonic series. This could be approximated by n * ln(n), but for
496  * reasonable numbers of drives we might as well just compute the series.
497  *
498  * Alternatively we could set the target to the number of pages necessary
499  * so that the expected number of active spindles is some arbitrary
500  * percentage of the total. This sounds the same but is actually slightly
501  * different. The result ends up being ln(1-P)/ln((n-1)/n) where P is
502  * that desired fraction.
503  *
504  * Experimental results show that both of these formulas aren't aggressive
505  * enough, but we don't really have any better proposals.
506  *
507  * Note that if io_concurrency = 0 (disabled), we must set target = 0.
508  *----------
509  */
510 
511  for (i = 1; i <= io_concurrency; i++)
512  new_prefetch_pages += (double) io_concurrency / (double) i;
513 
514  *target = new_prefetch_pages;
515 
516  /* This range check shouldn't fail, but let's be paranoid */
517  return (new_prefetch_pages >= 0.0 && new_prefetch_pages < (double) INT_MAX);
518 }
#define MAX_IO_CONCURRENCY
Definition: bufmgr.h:79
#define Min(x, y)
Definition: c.h:806
#define Max(x, y)
Definition: c.h:800
int i
bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 3572 of file bufmgr.c.

References Assert, buf, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, GetBufferDescriptor, LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_getbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

3573 {
3574  BufferDesc *buf;
3575 
3576  Assert(BufferIsValid(buffer));
3577  if (BufferIsLocal(buffer))
3578  return true; /* act as though we got it */
3579 
3580  buf = GetBufferDescriptor(buffer - 1);
3581 
3583  LW_EXCLUSIVE);
3584 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1283
static char * buf
Definition: pg_test_fsync.c:65
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 3718 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid, ConditionalLockBuffer(), GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr.

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), lazy_scan_heap(), and lazy_vacuum_heap().

3719 {
3720  BufferDesc *bufHdr;
3721  uint32 buf_state,
3722  refcount;
3723 
3724  Assert(BufferIsValid(buffer));
3725 
3726  if (BufferIsLocal(buffer))
3727  {
3728  refcount = LocalRefCount[-buffer - 1];
3729  /* There should be exactly one pin */
3730  Assert(refcount > 0);
3731  if (refcount != 1)
3732  return false;
3733  /* Nobody else to wait for */
3734  return true;
3735  }
3736 
3737  /* There should be exactly one local pin */
3738  refcount = GetPrivateRefCount(buffer);
3739  Assert(refcount);
3740  if (refcount != 1)
3741  return false;
3742 
3743  /* Try to acquire lock */
3744  if (!ConditionalLockBuffer(buffer))
3745  return false;
3746 
3747  bufHdr = GetBufferDescriptor(buffer - 1);
3748  buf_state = LockBufHdr(bufHdr);
3749  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
3750 
3751  Assert(refcount > 0);
3752  if (refcount == 1)
3753  {
3754  /* Successfully acquired exclusive lock with pincount 1 */
3755  UnlockBufHdr(bufHdr, buf_state);
3756  return true;
3757  }
3758 
3759  /* Failed, so release the lock */
3760  UnlockBufHdr(bufHdr, buf_state);
3761  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3762  return false;
3763 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:268
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:3572
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
void DropDatabaseBuffers ( Oid  dbid)

Definition at line 3043 of file bufmgr.c.

References RelFileNode::dbNode, GetBufferDescriptor, i, InvalidateBuffer(), LockBufHdr(), NBuffers, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by dbase_redo(), dropdb(), and movedb().

3044 {
3045  int i;
3046 
3047  /*
3048  * We needn't consider local buffers, since by assumption the target
3049  * database isn't our own.
3050  */
3051 
3052  for (i = 0; i < NBuffers; i++)
3053  {
3054  BufferDesc *bufHdr = GetBufferDescriptor(i);
3055  uint32 buf_state;
3056 
3057  /*
3058  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3059  * and saves some cycles.
3060  */
3061  if (bufHdr->tag.rnode.dbNode != dbid)
3062  continue;
3063 
3064  buf_state = LockBufHdr(bufHdr);
3065  if (bufHdr->tag.rnode.dbNode == dbid)
3066  InvalidateBuffer(bufHdr); /* releases spinlock */
3067  else
3068  UnlockBufHdr(bufHdr, buf_state);
3069  }
3070 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1352
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:268
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
void DropRelFileNodeBuffers ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
BlockNumber  firstDelBlock 
)

Definition at line 2883 of file bufmgr.c.

References RelFileNodeBackend::backend, buftag::blockNum, DropRelFileNodeLocalBuffers(), buftag::forkNum, GetBufferDescriptor, i, InvalidateBuffer(), LockBufHdr(), MyBackendId, NBuffers, RelFileNodeBackend::node, RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrdounlinkfork(), and smgrtruncate().

2885 {
2886  int i;
2887 
2888  /* If it's a local relation, it's localbuf.c's problem. */
2889  if (RelFileNodeBackendIsTemp(rnode))
2890  {
2891  if (rnode.backend == MyBackendId)
2892  DropRelFileNodeLocalBuffers(rnode.node, forkNum, firstDelBlock);
2893  return;
2894  }
2895 
2896  for (i = 0; i < NBuffers; i++)
2897  {
2898  BufferDesc *bufHdr = GetBufferDescriptor(i);
2899  uint32 buf_state;
2900 
2901  /*
2902  * We can make this a tad faster by prechecking the buffer tag before
2903  * we attempt to lock the buffer; this saves a lot of lock
2904  * acquisitions in typical cases. It should be safe because the
2905  * caller must have AccessExclusiveLock on the relation, or some other
2906  * reason to be certain that no one is loading new pages of the rel
2907  * into the buffer pool. (Otherwise we might well miss such pages
2908  * entirely.) Therefore, while the tag might be changing while we
2909  * look at it, it can't be changing *to* a value we care about, only
2910  * *away* from such a value. So false negatives are impossible, and
2911  * false positives are safe because we'll recheck after getting the
2912  * buffer lock.
2913  *
2914  * We could check forkNum and blockNum as well as the rnode, but the
2915  * incremental win from doing so seems small.
2916  */
2917  if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
2918  continue;
2919 
2920  buf_state = LockBufHdr(bufHdr);
2921  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
2922  bufHdr->tag.forkNum == forkNum &&
2923  bufHdr->tag.blockNum >= firstDelBlock)
2924  InvalidateBuffer(bufHdr); /* releases spinlock */
2925  else
2926  UnlockBufHdr(bufHdr, buf_state);
2927  }
2928 }
BackendId MyBackendId
Definition: globals.c:72
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ForkNumber forkNum
Definition: buf_internals.h:94
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1352
void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:320
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:268
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
BackendId backend
Definition: relfilenode.h:75
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
void DropRelFileNodesAllBuffers ( RelFileNodeBackend rnodes,
int  nnodes 
)

Definition at line 2940 of file bufmgr.c.

References DROP_RELS_BSEARCH_THRESHOLD, DropRelFileNodeAllLocalBuffers(), GetBufferDescriptor, i, InvalidateBuffer(), LockBufHdr(), MyBackendId, NBuffers, RelFileNodeBackend::node, NULL, palloc(), pfree(), pg_qsort(), RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, rnode_comparator(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrdounlink(), and smgrdounlinkall().

2941 {
2942  int i,
2943  n = 0;
2944  RelFileNode *nodes;
2945  bool use_bsearch;
2946 
2947  if (nnodes == 0)
2948  return;
2949 
2950  nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */
2951 
2952  /* If it's a local relation, it's localbuf.c's problem. */
2953  for (i = 0; i < nnodes; i++)
2954  {
2955  if (RelFileNodeBackendIsTemp(rnodes[i]))
2956  {
2957  if (rnodes[i].backend == MyBackendId)
2958  DropRelFileNodeAllLocalBuffers(rnodes[i].node);
2959  }
2960  else
2961  nodes[n++] = rnodes[i].node;
2962  }
2963 
2964  /*
2965  * If there are no non-local relations, then we're done. Release the
2966  * memory and return.
2967  */
2968  if (n == 0)
2969  {
2970  pfree(nodes);
2971  return;
2972  }
2973 
2974  /*
2975  * For low number of relations to drop just use a simple walk through, to
2976  * save the bsearch overhead. The threshold to use is rather a guess than
2977  * an exactly determined value, as it depends on many factors (CPU and RAM
2978  * speeds, amount of shared buffers etc.).
2979  */
2980  use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;
2981 
2982  /* sort the list of rnodes if necessary */
2983  if (use_bsearch)
2984  pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
2985 
2986  for (i = 0; i < NBuffers; i++)
2987  {
2988  RelFileNode *rnode = NULL;
2989  BufferDesc *bufHdr = GetBufferDescriptor(i);
2990  uint32 buf_state;
2991 
2992  /*
2993  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
2994  * and saves some cycles.
2995  */
2996 
2997  if (!use_bsearch)
2998  {
2999  int j;
3000 
3001  for (j = 0; j < n; j++)
3002  {
3003  if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
3004  {
3005  rnode = &nodes[j];
3006  break;
3007  }
3008  }
3009  }
3010  else
3011  {
3012  rnode = bsearch((const void *) &(bufHdr->tag.rnode),
3013  nodes, n, sizeof(RelFileNode),
3015  }
3016 
3017  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3018  if (rnode == NULL)
3019  continue;
3020 
3021  buf_state = LockBufHdr(bufHdr);
3022  if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
3023  InvalidateBuffer(bufHdr); /* releases spinlock */
3024  else
3025  UnlockBufHdr(bufHdr, buf_state);
3026  }
3027 
3028  pfree(nodes);
3029 }
BackendId MyBackendId
Definition: globals.c:72
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1352
#define DROP_RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:68
void DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
Definition: localbuf.c:367
void pfree(void *pointer)
Definition: mcxt.c:950
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:268
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4065
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define NULL
Definition: c.h:229
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
Definition: qsort.c:113
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:849
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln 
)
static

Definition at line 2665 of file bufmgr.c.

References ErrorContextCallback::arg, BufferUsage::blk_write_time, buftag::blockNum, BM_JUST_DIRTIED, BM_PERMANENT, BufferGetLSN, BufHdrGetBlock, ErrorContextCallback::callback, RelFileNode::dbNode, error_context_stack, buftag::forkNum, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBackendId, LockBufHdr(), RelFileNodeBackend::node, NULL, PageSetChecksumCopy(), pgBufferUsage, pgstat_count_buffer_write_time, ErrorContextCallback::previous, RelFileNode::relNode, buftag::rnode, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rnode, smgropen(), smgrwrite(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr, and XLogFlush().

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), and SyncOneBuffer().

2666 {
2667  XLogRecPtr recptr;
2668  ErrorContextCallback errcallback;
2669  instr_time io_start,
2670  io_time;
2671  Block bufBlock;
2672  char *bufToWrite;
2673  uint32 buf_state;
2674 
2675  /*
2676  * Acquire the buffer's io_in_progress lock. If StartBufferIO returns
2677  * false, then someone else flushed the buffer before we could, so we need
2678  * not do anything.
2679  */
2680  if (!StartBufferIO(buf, false))
2681  return;
2682 
2683  /* Setup error traceback support for ereport() */
2685  errcallback.arg = (void *) buf;
2686  errcallback.previous = error_context_stack;
2687  error_context_stack = &errcallback;
2688 
2689  /* Find smgr relation for buffer */
2690  if (reln == NULL)
2691  reln = smgropen(buf->tag.rnode, InvalidBackendId);
2692 
2693  TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
2694  buf->tag.blockNum,
2695  reln->smgr_rnode.node.spcNode,
2696  reln->smgr_rnode.node.dbNode,
2697  reln->smgr_rnode.node.relNode);
2698 
2699  buf_state = LockBufHdr(buf);
2700 
2701  /*
2702  * Run PageGetLSN while holding header lock, since we don't have the
2703  * buffer locked exclusively in all cases.
2704  */
2705  recptr = BufferGetLSN(buf);
2706 
2707  /* To check if block content changes while flushing. - vadim 01/17/97 */
2708  buf_state &= ~BM_JUST_DIRTIED;
2709  UnlockBufHdr(buf, buf_state);
2710 
2711  /*
2712  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
2713  * rule that log updates must hit disk before any of the data-file changes
2714  * they describe do.
2715  *
2716  * However, this rule does not apply to unlogged relations, which will be
2717  * lost after a crash anyway. Most unlogged relation pages do not bear
2718  * LSNs since we never emit WAL records for them, and therefore flushing
2719  * up through the buffer LSN would be useless, but harmless. However,
2720  * GiST indexes use LSNs internally to track page-splits, and therefore
2721  * unlogged GiST pages bear "fake" LSNs generated by
2722  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
2723  * LSN counter could advance past the WAL insertion point; and if it did
2724  * happen, attempting to flush WAL through that location would fail, with
2725  * disastrous system-wide consequences. To make sure that can't happen,
2726  * skip the flush if the buffer isn't permanent.
2727  */
2728  if (buf_state & BM_PERMANENT)
2729  XLogFlush(recptr);
2730 
2731  /*
2732  * Now it's safe to write buffer to disk. Note that no one else should
2733  * have been able to write it while we were busy with log flushing because
2734  * we have the io_in_progress lock.
2735  */
2736  bufBlock = BufHdrGetBlock(buf);
2737 
2738  /*
2739  * Update page checksum if desired. Since we have only shared lock on the
2740  * buffer, other processes might be updating hint bits in it, so we must
2741  * copy the page to private storage if we do checksumming.
2742  */
2743  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
2744 
2745  if (track_io_timing)
2746  INSTR_TIME_SET_CURRENT(io_start);
2747 
2748  /*
2749  * bufToWrite is either the shared buffer or a copy, as appropriate.
2750  */
2751  smgrwrite(reln,
2752  buf->tag.forkNum,
2753  buf->tag.blockNum,
2754  bufToWrite,
2755  false);
2756 
2757  if (track_io_timing)
2758  {
2759  INSTR_TIME_SET_CURRENT(io_time);
2760  INSTR_TIME_SUBTRACT(io_time, io_start);
2763  }
2764 
2766 
2767  /*
2768  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
2769  * end the io_in_progress state.
2770  */
2771  TerminateBufferIO(buf, true, 0);
2772 
2773  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(buf->tag.forkNum,
2774  buf->tag.blockNum,
2775  reln->smgr_rnode.node.spcNode,
2776  reln->smgr_rnode.node.dbNode,
2777  reln->smgr_rnode.node.relNode);
2778 
2779  /* Pop the error context stack */
2780  error_context_stack = errcallback.previous;
2781 }
#define BM_PERMANENT
Definition: buf_internals.h:67
ForkNumber forkNum
Definition: buf_internals.h:94
struct timeval instr_time
Definition: instr_time.h:147
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1170
struct ErrorContextCallback * previous
Definition: elog.h:238
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2748
ErrorContextCallback * error_context_stack
Definition: elog.c:88
long shared_blks_written
Definition: instrument.h:24
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:3875
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:167
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:155
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:268
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:137
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:3942
#define InvalidBackendId
Definition: backendid.h:23
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:57
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:202
instr_time blk_write_time
Definition: instrument.h:32
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:1256
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:153
BufferTag tag
void(* callback)(void *arg)
Definition: elog.h:239
#define UnlockBufHdr(desc, s)
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:58
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4027
bool track_io_timing
Definition: bufmgr.c:111
Pointer Page
Definition: bufpage.h:74
BufferUsage pgBufferUsage
Definition: instrument.c:20
void * Block
Definition: bufmgr.h:25
void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 3246 of file bufmgr.c.

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NULL, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by dbase_redo().

3247 {
3248  int i;
3249  BufferDesc *bufHdr;
3250 
3251  /* Make sure we can handle the pin inside the loop */
3253 
3254  for (i = 0; i < NBuffers; i++)
3255  {
3256  uint32 buf_state;
3257 
3258  bufHdr = GetBufferDescriptor(i);
3259 
3260  /*
3261  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3262  * and saves some cycles.
3263  */
3264  if (bufHdr->tag.rnode.dbNode != dbid)
3265  continue;
3266 
3268 
3269  buf_state = LockBufHdr(bufHdr);
3270  if (bufHdr->tag.rnode.dbNode == dbid &&
3271  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3272  {
3273  PinBuffer_Locked(bufHdr);
3275  FlushBuffer(bufHdr, NULL);
3277  UnpinBuffer(bufHdr, true);
3278  }
3279  else
3280  UnlockBufHdr(bufHdr, buf_state);
3281  }
3282 }
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2665
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:268
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1694
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
#define BM_VALID
Definition: buf_internals.h:60
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define NULL
Definition: c.h:229
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1656
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
void FlushOneBuffer ( Buffer  buffer)

Definition at line 3289 of file bufmgr.c.

References Assert, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor, LWLockHeldByMe(), and NULL.

Referenced by XLogReadBufferForRedoExtended().

3290 {
3291  BufferDesc *bufHdr;
3292 
3293  /* currently not needed, but no fundamental reason not to support */
3294  Assert(!BufferIsLocal(buffer));
3295 
3296  Assert(BufferIsPinned(buffer));
3297 
3298  bufHdr = GetBufferDescriptor(buffer - 1);
3299 
3301 
3302  FlushBuffer(bufHdr, NULL);
3303 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1831
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2665
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
void FlushRelationBuffers ( Relation  rel)

Definition at line 3148 of file bufmgr.c.

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock, ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, i, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, RelationUsesLocalBuffers, RelFileNodeEquals, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, smgrwrite(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by ATExecSetTableSpace(), and heap_sync().

3149 {
3150  int i;
3151  BufferDesc *bufHdr;
3152 
3153  /* Open rel at the smgr level if not already done */
3154  RelationOpenSmgr(rel);
3155 
3156  if (RelationUsesLocalBuffers(rel))
3157  {
3158  for (i = 0; i < NLocBuffer; i++)
3159  {
3160  uint32 buf_state;
3161 
3162  bufHdr = GetLocalBufferDescriptor(i);
3163  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3164  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
3165  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3166  {
3167  ErrorContextCallback errcallback;
3168  Page localpage;
3169 
3170  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
3171 
3172  /* Setup error traceback support for ereport() */
3174  errcallback.arg = (void *) bufHdr;
3175  errcallback.previous = error_context_stack;
3176  error_context_stack = &errcallback;
3177 
3178  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
3179 
3180  smgrwrite(rel->rd_smgr,
3181  bufHdr->tag.forkNum,
3182  bufHdr->tag.blockNum,
3183  localpage,
3184  false);
3185 
3186  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
3187  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
3188 
3189  /* Pop the error context stack */
3190  error_context_stack = errcallback.previous;
3191  }
3192  }
3193 
3194  return;
3195  }
3196 
3197  /* Make sure we can handle the pin inside the loop */
3199 
3200  for (i = 0; i < NBuffers; i++)
3201  {
3202  uint32 buf_state;
3203 
3204  bufHdr = GetBufferDescriptor(i);
3205 
3206  /*
3207  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3208  * and saves some cycles.
3209  */
3210  if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
3211  continue;
3212 
3214 
3215  buf_state = LockBufHdr(bufHdr);
3216  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3217  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3218  {
3219  PinBuffer_Locked(bufHdr);
3221  FlushBuffer(bufHdr, rel->rd_smgr);
3223  UnpinBuffer(bufHdr, true);
3224  }
3225  else
3226  UnlockBufHdr(bufHdr, buf_state);
3227  }
3228 }
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:61
ForkNumber forkNum
Definition: buf_internals.h:94
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4046
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
#define GetLocalBufferDescriptor(id)
#define BM_DIRTY
Definition: buf_internals.h:59
struct ErrorContextCallback * previous
Definition: elog.h:238
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2665
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
ErrorContextCallback * error_context_stack
Definition: elog.c:88
#define RelationOpenSmgr(relation)
Definition: rel.h:457
int NLocBuffer
Definition: localbuf.c:41
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:268
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1694
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
#define BM_VALID
Definition: buf_internals.h:60
RelFileNode rd_node
Definition: rel.h:85
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1656
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1199
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:509
BufferTag tag
void(* callback)(void *arg)
Definition: elog.h:239
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:287
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
pg_atomic_uint32 state
Pointer Page
Definition: bufpage.h:74
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 381 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, and REFCOUNT_ARRAY_ENTRIES.

Referenced by UnpinBuffer().

382 {
383  Assert(ref->refcount == 0);
384 
385  if (ref >= &PrivateRefCountArray[0] &&
387  {
388  ref->buffer = InvalidBuffer;
389 
390  /*
391  * Mark the just used entry as reserved - in many scenarios that
392  * allows us to avoid ever having to search the array/hash for free
393  * entries.
394  */
395  ReservedRefCountEntry = ref;
396  }
397  else
398  {
399  bool found;
400  Buffer buffer = ref->buffer;
401 
403  (void *) &buffer,
404  HASH_REMOVE,
405  &found);
406  Assert(found);
409  }
410 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:170
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:885
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:168
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:172
#define Assert(condition)
Definition: c.h:675
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:77
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:169
int Buffer
Definition: buf.h:23
static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 358 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsValid, GetPrivateRefCountEntry(), NULL, and PrivateRefCountEntry::refcount.

Referenced by ConditionalLockBufferForCleanup(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), and PrintBufferLeakWarning().

359 {
361 
362  Assert(BufferIsValid(buffer));
363  Assert(!BufferIsLocal(buffer));
364 
365  /*
366  * Not moving the entry - that's ok for the current users, but we might
367  * want to change this one day.
368  */
369  ref = GetPrivateRefCountEntry(buffer, false);
370 
371  if (ref == NULL)
372  return 0;
373  return ref->refcount;
374 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:278
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 278 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid, free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, NULL, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBuffer().

279 {
281  int i;
282 
283  Assert(BufferIsValid(buffer));
284  Assert(!BufferIsLocal(buffer));
285 
286  /*
287  * First search for references in the array, that'll be sufficient in the
288  * majority of cases.
289  */
290  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
291  {
292  res = &PrivateRefCountArray[i];
293 
294  if (res->buffer == buffer)
295  return res;
296  }
297 
298  /*
299  * By here we know that the buffer, if already pinned, isn't residing in
300  * the array.
301  *
302  * Only look up the buffer in the hashtable if we've previously overflowed
303  * into it.
304  */
305  if (PrivateRefCountOverflowed == 0)
306  return NULL;
307 
309  (void *) &buffer,
310  HASH_FIND,
311  NULL);
312 
313  if (res == NULL)
314  return NULL;
315  else if (!do_move)
316  {
317  /* caller doesn't want us to move the hash entry into the array */
318  return res;
319  }
320  else
321  {
322  /* move buffer from hashtable into the free array slot */
323  bool found;
325 
326  /* Ensure there's a free array slot */
328 
329  /* Use up the reserved slot */
331  free = ReservedRefCountEntry;
333  Assert(free->buffer == InvalidBuffer);
334 
335  /* and fill it */
336  free->buffer = buffer;
337  free->refcount = res->refcount;
338 
339  /* delete from hashtable */
341  (void *) &buffer,
342  HASH_REMOVE,
343  &found);
344  Assert(found);
347 
348  return free;
349  }
350 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:170
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:885
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:168
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:172
#define free(a)
Definition: header.h:65
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:77
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:169
int i
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 3692 of file bufmgr.c.

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and RecoveryConflictInterrupt().

3693 {
3694  int bufid = GetStartupBufferPinWaitBufId();
3695 
3696  /*
3697  * If we get woken slowly then it's possible that the Startup process was
3698  * already woken by other backends before we got here. Also possible that
3699  * we get here by multiple interrupts or interrupts at inappropriate
3700  * times, so make sure we do nothing if the bufid is not set.
3701  */
3702  if (bufid < 0)
3703  return false;
3704 
3705  if (GetPrivateRefCount(bufid + 1) > 0)
3706  return true;
3707 
3708  return false;
3709 }
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:623
void IncrBufferRefCount ( Buffer  buffer)

Definition at line 3347 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, NULL, PrivateRefCountEntry::refcount, ResourceOwnerEnlargeBuffers(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ExecStoreTuple(), ReadBufferBI(), scanPostingTree(), and startScanEntry().

3348 {
3349  Assert(BufferIsPinned(buffer));
3352  if (BufferIsLocal(buffer))
3353  LocalRefCount[-buffer - 1]++;
3354  else
3355  {
3356  PrivateRefCountEntry *ref;
3357 
3358  ref = GetPrivateRefCountEntry(buffer, true);
3359  Assert(ref != NULL);
3360  ref->refcount++;
3361  }
3362 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:278
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:855
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
int32 * LocalRefCount
Definition: localbuf.c:45
void InitBufferPoolAccess ( void  )

Definition at line 2437 of file bufmgr.c.

References HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MemSet, and PrivateRefCountArray.

Referenced by BaseInit().

2438 {
2439  HASHCTL hash_ctl;
2440 
2441  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
2442 
2443  MemSet(&hash_ctl, 0, sizeof(hash_ctl));
2444  hash_ctl.keysize = sizeof(int32);
2445  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
2446 
2447  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
2448  HASH_ELEM | HASH_BLOBS);
2449 }
struct PrivateRefCountEntry PrivateRefCountEntry
#define HASH_ELEM
Definition: hsearch.h:87
Size entrysize
Definition: hsearch.h:73
#define MemSet(start, val, len)
Definition: c.h:857
signed int int32
Definition: c.h:256
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:168
#define HASH_BLOBS
Definition: hsearch.h:88
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:301
Size keysize
Definition: hsearch.h:72
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:169
void InitBufferPoolBackend ( void  )

Definition at line 2461 of file bufmgr.c.

References AtProcExit_Buffers(), and on_shmem_exit().

Referenced by AuxiliaryProcessMain(), and InitPostgres().

2462 {
2464 }
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:348
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:2471
static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1352 of file bufmgr.c.

References Assert, BM_LOCKED, BM_TAG_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer, BUFFERTAGS_EQUAL, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), CLEAR_BUFFERTAG, elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr, and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

1353 {
1354  BufferTag oldTag;
1355  uint32 oldHash; /* hash value for oldTag */
1356  LWLock *oldPartitionLock; /* buffer partition lock for it */
1357  uint32 oldFlags;
1358  uint32 buf_state;
1359 
1360  /* Save the original buffer tag before dropping the spinlock */
1361  oldTag = buf->tag;
1362 
1363  buf_state = pg_atomic_read_u32(&buf->state);
1364  Assert(buf_state & BM_LOCKED);
1365  UnlockBufHdr(buf, buf_state);
1366 
1367  /*
1368  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1369  * worth storing the hashcode in BufferDesc so we need not recompute it
1370  * here? Probably not.
1371  */
1372  oldHash = BufTableHashCode(&oldTag);
1373  oldPartitionLock = BufMappingPartitionLock(oldHash);
1374 
1375 retry:
1376 
1377  /*
1378  * Acquire exclusive mapping lock in preparation for changing the buffer's
1379  * association.
1380  */
1381  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1382 
1383  /* Re-lock the buffer header */
1384  buf_state = LockBufHdr(buf);
1385 
1386  /* If it's changed while we were waiting for lock, do nothing */
1387  if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
1388  {
1389  UnlockBufHdr(buf, buf_state);
1390  LWLockRelease(oldPartitionLock);
1391  return;
1392  }
1393 
1394  /*
1395  * We assume the only reason for it to be pinned is that someone else is
1396  * flushing the page out. Wait for them to finish. (This could be an
1397  * infinite loop if the refcount is messed up... it would be nice to time
1398  * out after awhile, but there seems no way to be sure how many loops may
1399  * be needed. Note that if the other guy has pinned the buffer but not
1400  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1401  * be busy-looping here.)
1402  */
1403  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1404  {
1405  UnlockBufHdr(buf, buf_state);
1406  LWLockRelease(oldPartitionLock);
1407  /* safety check: should definitely not be our *own* pin */
1409  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1410  WaitIO(buf);
1411  goto retry;
1412  }
1413 
1414  /*
1415  * Clear out the buffer's tag and flags. We must do this to ensure that
1416  * linear scans of the buffer array don't think the buffer is valid.
1417  */
1418  oldFlags = buf_state & BUF_FLAG_MASK;
1419  CLEAR_BUFFERTAG(buf->tag);
1420  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1421  UnlockBufHdr(buf, buf_state);
1422 
1423  /*
1424  * Remove the buffer from the lookup hashtable, if it was in there.
1425  */
1426  if (oldFlags & BM_TAG_VALID)
1427  BufTableDelete(&oldTag, oldHash);
1428 
1429  /*
1430  * Done with mapping lock.
1431  */
1432  LWLockRelease(oldPartitionLock);
1433 
1434  /*
1435  * Insert the buffer at the head of the list of free buffers.
1436  */
1437  StrategyFreeBuffer(buf);
1438 }
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:150
Definition: lwlock.h:32
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:3828
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:80
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:347
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
#define ERROR
Definition: elog.h:43
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:268
#define BUFFERTAGS_EQUAL(a, b)
#define BM_LOCKED
Definition: buf_internals.h:58
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define Assert(condition)
Definition: c.h:675
#define CLEAR_BUFFERTAG(a)
Definition: buf_internals.h:98
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
#define BufferDescriptorGetBuffer(bdesc)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
BufferTag tag
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
#define elog
Definition: elog.h:219
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 3774 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr.

Referenced by _hash_doinsert(), _hash_expandtable(), hash_xlog_split_allocate_page(), and hashbucketcleanup().

3775 {
3776  BufferDesc *bufHdr;
3777  uint32 buf_state;
3778 
3779  Assert(BufferIsValid(buffer));
3780 
3781  if (BufferIsLocal(buffer))
3782  {
3783  /* There should be exactly one pin */
3784  if (LocalRefCount[-buffer - 1] != 1)
3785  return false;
3786  /* Nobody else to wait for */
3787  return true;
3788  }
3789 
3790  /* There should be exactly one local pin */
3791  if (GetPrivateRefCount(buffer) != 1)
3792  return false;
3793 
3794  bufHdr = GetBufferDescriptor(buffer - 1);
3795 
3796  /* caller must hold exclusive lock on buffer */
3798  LW_EXCLUSIVE));
3799 
3800  buf_state = LockBufHdr(bufHdr);
3801 
3802  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3803  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
3804  {
3805  /* pincount is OK. */
3806  UnlockBufHdr(bufHdr, buf_state);
3807  return true;
3808  }
3809 
3810  UnlockBufHdr(bufHdr, buf_state);
3811  return false;
3812 }
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1849
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:268
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
void IssuePendingWritebacks ( WritebackContext context)

Definition at line 4276 of file bufmgr.c.

References buftag::blockNum, buffertag_comparator(), cur, buftag::forkNum, i, InvalidBackendId, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, qsort, RelFileNodeEquals, buftag::rnode, smgropen(), smgrwriteback(), and PendingWriteback::tag.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

4277 {
4278  int i;
4279 
4280  if (context->nr_pending == 0)
4281  return;
4282 
4283  /*
4284  * Executing the writes in-order can make them a lot faster, and allows to
4285  * merge writeback requests to consecutive blocks into larger writebacks.
4286  */
4287  qsort(&context->pending_writebacks, context->nr_pending,
4289 
4290  /*
4291  * Coalesce neighbouring writes, but nothing else. For that we iterate
4292  * through the, now sorted, array of pending flushes, and look forward to
4293  * find all neighbouring (or identical) writes.
4294  */
4295  for (i = 0; i < context->nr_pending; i++)
4296  {
4299  SMgrRelation reln;
4300  int ahead;
4301  BufferTag tag;
4302  Size nblocks = 1;
4303 
4304  cur = &context->pending_writebacks[i];
4305  tag = cur->tag;
4306 
4307  /*
4308  * Peek ahead, into following writeback requests, to see if they can
4309  * be combined with the current one.
4310  */
4311  for (ahead = 0; i + ahead + 1 < context->nr_pending; ahead++)
4312  {
4313  next = &context->pending_writebacks[i + ahead + 1];
4314 
4315  /* different file, stop */
4316  if (!RelFileNodeEquals(cur->tag.rnode, next->tag.rnode) ||
4317  cur->tag.forkNum != next->tag.forkNum)
4318  break;
4319 
4320  /* ok, block queued twice, skip */
4321  if (cur->tag.blockNum == next->tag.blockNum)
4322  continue;
4323 
4324  /* only merge consecutive writes */
4325  if (cur->tag.blockNum + 1 != next->tag.blockNum)
4326  break;
4327 
4328  nblocks++;
4329  cur = next;
4330  }
4331 
4332  i += ahead;
4333 
4334  /* and finally tell the kernel to write the data to storage */
4335  reln = smgropen(tag.rnode, InvalidBackendId);
4336  smgrwriteback(reln, tag.forkNum, tag.blockNum, nblocks);
4337  }
4338 
4339  context->nr_pending = 0;
4340 }
static int32 next
Definition: blutils.c:210
ForkNumber forkNum
Definition: buf_internals.h:94
struct cursor * cur
Definition: ecpg.c:28
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:660
static int buffertag_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4144
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:137
#define InvalidBackendId
Definition: backendid.h:23
size_t Size
Definition: c.h:356
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
int i
#define qsort(a, b, c, d)
Definition: port.h:440
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 4046 of file bufmgr.c.

References buftag::blockNum, errcontext, buftag::forkNum, MyBackendId, NULL, pfree(), relpathbackend, buftag::rnode, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

4047 {
4048  BufferDesc *bufHdr = (BufferDesc *) arg;
4049 
4050  if (bufHdr != NULL)
4051  {
4052  char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId,
4053  bufHdr->tag.forkNum);
4054 
4055  errcontext("writing block %u of relation %s",
4056  bufHdr->tag.blockNum, path);
4057  pfree(path);
4058  }
4059 }
BackendId MyBackendId
Definition: globals.c:72
ForkNumber forkNum
Definition: buf_internals.h:94
void pfree(void *pointer)
Definition: mcxt.c:950
#define NULL
Definition: c.h:229
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define errcontext
Definition: elog.h:164
void * arg
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:62
void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 3546 of file bufmgr.c.

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), and LWLockRelease().

Referenced by _bt_doinsert(), _bt_drop_lock_and_maybe_pin(), _bt_endpoint(), _bt_first(), _bt_getbuf(), _bt_getroot(), _bt_killitems(), _bt_moveright(), _bt_pagedel(), _bt_readnextpage(), _bt_relandgetbuf(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_getnewbuf(), _hash_init(), _hash_readnext(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), acquire_sample_rows(), bitgetpage(), blbulkdelete(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), brinbuild(), brinbuildempty(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_page_items(), bt_page_stats(), btree_xlog_delete_get_latestRemovedXid(), btvacuumpage(), checkXLogConsistency(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), copy_heap_data(), count_nondeletable_pages(), entryLoadMoreItems(), ExecCheckHeapTupleVisible(), fill_seq_with_data(), FreeSpaceMapTruncateRel(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetTupleForTrigger(), GetVisibilityMapPins(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuildempty(), gistbulkdelete(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuumcleanup(), hash_xlog_vacuum_get_latestRemovedXid(), hashbucketcleanup(), hashbulkdelete(), hashgettuple(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_hot_search(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_visible(), heapgetpage(), heapgettup(), index_fetch_heap(), IndexBuildHeapRangeScan(), initBloomState(), lazy_scan_heap(), LockBufferForCleanup(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), revmap_physical_extend(), RI_FKey_check(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), systable_recheck_tuple(), tablesample_getnext(), UnlockReleaseBuffer(), validate_index_heapscan(), visibilitymap_clear(), visibilitymap_set(), visibilitymap_truncate(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

3547 {
3548  BufferDesc *buf;
3549 
3550  Assert(BufferIsValid(buffer));
3551  if (BufferIsLocal(buffer))
3552  return; /* local buffers need no lock */
3553 
3554  buf = GetBufferDescriptor(buffer - 1);
3555 
3556  if (mode == BUFFER_LOCK_UNLOCK)
3558  else if (mode == BUFFER_LOCK_SHARE)
3560  else if (mode == BUFFER_LOCK_EXCLUSIVE)
3562  else
3563  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
3564 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:65
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
#define elog
Definition: elog.h:219
void LockBufferForCleanup ( Buffer  buffer)

Definition at line 3603 of file bufmgr.c.

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid, elog, ERROR, GetBufferDescriptor, GetPrivateRefCount(), InHotStandby, LocalRefCount, LockBuffer(), LockBufHdr(), MyProcPid, NULL, PG_WAIT_BUFFER_PIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), SetStartupBufferPinWaitBufId(), UnlockBufHdr, and BufferDesc::wait_backend_pid.

Referenced by btree_xlog_vacuum(), btvacuumpage(), btvacuumscan(), ginVacuumPostingTreeLeaves(), hashbulkdelete(), lazy_scan_heap(), ReadBuffer_common(), and XLogReadBufferForRedoExtended().

3604 {
3605  BufferDesc *bufHdr;
3606 
3607  Assert(BufferIsValid(buffer));
3609 
3610  if (BufferIsLocal(buffer))
3611  {
3612  /* There should be exactly one pin */
3613  if (LocalRefCount[-buffer - 1] != 1)
3614  elog(ERROR, "incorrect local pin count: %d",
3615  LocalRefCount[-buffer - 1]);
3616  /* Nobody else to wait for */
3617  return;
3618  }
3619 
3620  /* There should be exactly one local pin */
3621  if (GetPrivateRefCount(buffer) != 1)
3622  elog(ERROR, "incorrect local pin count: %d",
3623  GetPrivateRefCount(buffer));
3624 
3625  bufHdr = GetBufferDescriptor(buffer - 1);
3626 
3627  for (;;)
3628  {
3629  uint32 buf_state;
3630 
3631  /* Try to acquire lock */
3633  buf_state = LockBufHdr(bufHdr);
3634 
3635  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3636  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
3637  {
3638  /* Successfully acquired exclusive lock with pincount 1 */
3639  UnlockBufHdr(bufHdr, buf_state);
3640  return;
3641  }
3642  /* Failed, so mark myself as waiting for pincount 1 */
3643  if (buf_state & BM_PIN_COUNT_WAITER)
3644  {
3645  UnlockBufHdr(bufHdr, buf_state);
3646  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3647  elog(ERROR, "multiple backends attempting to wait for pincount 1");
3648  }
3649  bufHdr->wait_backend_pid = MyProcPid;
3650  PinCountWaitBuf = bufHdr;
3651  buf_state |= BM_PIN_COUNT_WAITER;
3652  UnlockBufHdr(bufHdr, buf_state);
3653  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3654 
3655  /* Wait to be signaled by UnpinBuffer() */
3656  if (InHotStandby)
3657  {
3658  /* Publish the bufid that Startup process waits on */
3659  SetStartupBufferPinWaitBufId(buffer - 1);
3660  /* Set alarm and then wait to be signaled by UnpinBuffer() */
3662  /* Reset the published bufid */
3664  }
3665  else
3667 
3668  /*
3669  * Remove flag marking us as waiter. Normally this will not be set
3670  * anymore, but ProcWaitForSignal() can return for other signals as
3671  * well. We take care to only reset the flag if we're the waiter, as
3672  * theoretically another backend could have started waiting. That's
3673  * impossible with the current usages due to table level locking, but
3674  * better be safe.
3675  */
3676  buf_state = LockBufHdr(bufHdr);
3677  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
3678  bufHdr->wait_backend_pid == MyProcPid)
3679  buf_state &= ~BM_PIN_COUNT_WAITER;
3680  UnlockBufHdr(bufHdr, buf_state);
3681 
3683  /* Loop back and try again */
3684  }
3685 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
int MyProcPid
Definition: globals.c:38
int wait_backend_pid
#define InHotStandby
Definition: xlog.h:74
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define ERROR
Definition: elog.h:43
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:434
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:611
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:268
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1739
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
#define PG_WAIT_BUFFER_PIN
Definition: pgstat.h:720
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define UnlockBufHdr(desc, s)
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:136
#define elog
Definition: elog.h:219
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:65
uint32 LockBufHdr ( BufferDesc desc)

Definition at line 4092 of file bufmgr.c.

References BM_LOCKED, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), GetBufferFromRing(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadBuffer_common(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBuffer(), and WaitIO().

4093 {
4094  SpinDelayStatus delayStatus;
4095  uint32 old_buf_state;
4096 
4097  init_local_spin_delay(&delayStatus);
4098 
4099  while (true)
4100  {
4101  /* set BM_LOCKED flag */
4102  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
4103  /* if it wasn't set before we're OK */
4104  if (!(old_buf_state & BM_LOCKED))
4105  break;
4106  perform_spin_delay(&delayStatus);
4107  }
4108  finish_spin_delay(&delayStatus);
4109  return old_buf_state | BM_LOCKED;
4110 }
#define init_local_spin_delay(status)
Definition: s_lock.h:997
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:175
unsigned int uint32
Definition: c.h:268
#define BM_LOCKED
Definition: buf_internals.h:58
pg_atomic_uint32 state
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:382
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:125
void MarkBufferDirty ( Buffer  buffer)

Definition at line 1450 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, BufferIsValid, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newroot(), _bt_restore_meta(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), AlterSequence(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_with_data(), FreeSpaceMapTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreateIndex(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistbulkdelete(), gistplacetopage(), gistRedoClearFollowRight(), gistRedoCreateIndex(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_clean(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_heap(), lazy_vacuum_page(), moveLeafs(), nextval_internal(), RelationAddExtraBlocks(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoCreateIndex(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_set(), visibilitymap_truncate(), writeListPage(), and XLogReadBufferForRedoExtended().

1451 {
1452  BufferDesc *bufHdr;
1453  uint32 buf_state;
1454  uint32 old_buf_state;
1455 
1456  if (!BufferIsValid(buffer))
1457  elog(ERROR, "bad buffer ID: %d", buffer);
1458 
1459  if (BufferIsLocal(buffer))
1460  {
1461  MarkLocalBufferDirty(buffer);
1462  return;
1463  }
1464 
1465  bufHdr = GetBufferDescriptor(buffer - 1);
1466 
1467  Assert(BufferIsPinned(buffer));
1469  LW_EXCLUSIVE));
1470 
1471  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
1472  for (;;)
1473  {
1474  if (old_buf_state & BM_LOCKED)
1475  old_buf_state = WaitBufHdrUnlocked(bufHdr);
1476 
1477  buf_state = old_buf_state;
1478 
1479  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1480  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
1481 
1482  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
1483  buf_state))
1484  break;
1485  }
1486 
1487  /*
1488  * If the buffer was not dirty already, do vacuum accounting.
1489  */
1490  if (!(old_buf_state & BM_DIRTY))
1491  {
1492  VacuumPageDirty++;
1494  if (VacuumCostActive)
1496  }
1497 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1849
int VacuumCostBalance
Definition: globals.c:138
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:321
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:130
#define ERROR
Definition: elog.h:43
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
long shared_blks_dirtied
Definition: instrument.h:23
unsigned int uint32
Definition: c.h:268
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:280
#define BM_LOCKED
Definition: buf_internals.h:58
int VacuumPageDirty
Definition: globals.c:136
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4120
pg_atomic_uint32 state
#define elog
Definition: elog.h:219
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:139
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 3379 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferGetPage, BufferIsLocal, BufferIsValid, PGXACT::delayChkpt, elog, ERROR, GetBufferDescriptor, GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyPgXact, PageSetLSN, pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), BufferUsage::shared_blks_dirtied, BufferDesc::state, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

3380 {
3381  BufferDesc *bufHdr;
3382  Page page = BufferGetPage(buffer);
3383 
3384  if (!BufferIsValid(buffer))
3385  elog(ERROR, "bad buffer ID: %d", buffer);
3386 
3387  if (BufferIsLocal(buffer))
3388  {
3389  MarkLocalBufferDirty(buffer);
3390  return;
3391  }
3392 
3393  bufHdr = GetBufferDescriptor(buffer - 1);
3394 
3395  Assert(GetPrivateRefCount(buffer) > 0);
3396  /* here, either share or exclusive lock is OK */
3398 
3399  /*
3400  * This routine might get called many times on the same page, if we are
3401  * making the first scan after commit of an xact that added/deleted many
3402  * tuples. So, be as quick as we can if the buffer is already dirty. We
3403  * do this by not acquiring spinlock if it looks like the status bits are
3404  * already set. Since we make this test unlocked, there's a chance we
3405  * might fail to notice that the flags have just been cleared, and failed
3406  * to reset them, due to memory-ordering issues. But since this function
3407  * is only intended to be used in cases where failing to write out the
3408  * data would be harmless anyway, it doesn't really matter.
3409  */
3410  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
3412  {
3414  bool dirtied = false;
3415  bool delayChkpt = false;
3416  uint32 buf_state;
3417 
3418  /*
3419  * If we need to protect hint bit updates from torn writes, WAL-log a
3420  * full page image of the page. This full page image is only necessary
3421  * if the hint bit update is the first change to the page since the
3422  * last checkpoint.
3423  *
3424  * We don't check full_page_writes here because that logic is included
3425  * when we call XLogInsert() since the value changes dynamically.
3426  */
3427  if (XLogHintBitIsNeeded() &&
3428  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
3429  {
3430  /*
3431  * If we're in recovery we cannot dirty a page because of a hint.
3432  * We can set the hint, just not dirty the page as a result so the
3433  * hint is lost when we evict the page or shutdown.
3434  *
3435  * See src/backend/storage/page/README for longer discussion.
3436  */
3437  if (RecoveryInProgress())
3438  return;
3439 
3440  /*
3441  * If the block is already dirty because we either made a change
3442  * or set a hint already, then we don't need to write a full page
3443  * image. Note that aggressive cleaning of blocks dirtied by hint
3444  * bit setting would increase the call rate. Bulk setting of hint
3445  * bits would reduce the call rate...
3446  *
3447  * We must issue the WAL record before we mark the buffer dirty.
3448  * Otherwise we might write the page before we write the WAL. That
3449  * causes a race condition, since a checkpoint might occur between
3450  * writing the WAL record and marking the buffer dirty. We solve
3451  * that with a kluge, but one that is already in use during
3452  * transaction commit to prevent race conditions. Basically, we
3453  * simply prevent the checkpoint WAL record from being written
3454  * until we have marked the buffer dirty. We don't start the
3455  * checkpoint flush until we have marked dirty, so our checkpoint
3456  * must flush the change to disk successfully or the checkpoint
3457  * never gets written, so crash recovery will fix.
3458  *
3459  * It's possible we may enter here without an xid, so it is
3460  * essential that CreateCheckpoint waits for virtual transactions
3461  * rather than full transactionids.
3462  */
3463  MyPgXact->delayChkpt = delayChkpt = true;
3464  lsn = XLogSaveBufferForHint(buffer, buffer_std);
3465  }
3466 
3467  buf_state = LockBufHdr(bufHdr);
3468 
3469  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3470 
3471  if (!(buf_state & BM_DIRTY))
3472  {
3473  dirtied = true; /* Means "will be dirtied by this action" */
3474 
3475  /*
3476  * Set the page LSN if we wrote a backup block. We aren't supposed
3477  * to set this when only holding a share lock but as long as we
3478  * serialise it somehow we're OK. We choose to set LSN while
3479  * holding the buffer header lock, which causes any reader of an
3480  * LSN who holds only a share lock to also obtain a buffer header
3481  * lock before using PageGetLSN(), which is enforced in
3482  * BufferGetLSNAtomic().
3483  *
3484  * If checksums are enabled, you might think we should reset the
3485  * checksum here. That will happen when the page is written
3486  * sometime later in this checkpoint cycle.
3487  */
3488  if (!XLogRecPtrIsInvalid(lsn))
3489  PageSetLSN(page, lsn);
3490  }
3491 
3492  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
3493  UnlockBufHdr(bufHdr, buf_state);
3494 
3495  if (delayChkpt)
3496  MyPgXact->delayChkpt = false;
3497 
3498  if (dirtied)
3499  {
3500  VacuumPageDirty++;
3502  if (VacuumCostActive)
3504  }
3505  }
3506 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define BM_PERMANENT
Definition: buf_internals.h:67
int VacuumCostBalance
Definition: globals.c:138
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1831
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:894
bool RecoveryInProgress(void)
Definition: xlog.c:7855
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:130
PGXACT * MyPgXact
Definition: proc.c:68
#define ERROR
Definition: elog.h:43
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
long shared_blks_dirtied
Definition: instrument.h:23
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:268
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
bool delayChkpt
Definition: proc.h:215
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:280
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
int VacuumPageDirty
Definition: globals.c:136
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
#define elog
Definition: elog.h:219
#define PageSetLSN(page, lsn)
Definition: bufpage.h:365
#define XLogHintBitIsNeeded()
Definition: xlog.h:156
Pointer Page
Definition: bufpage.h:74
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:139
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 252 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, NULL, PrivateRefCountEntry::refcount, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

253 {
255 
256  /* only allowed to be called when a reservation has been made */
258 
259  /* use up the reserved entry */
260  res = ReservedRefCountEntry;
262 
263  /* and fill it */
264  res->buffer = buffer;
265  res->refcount = 0;
266 
267  return res;
268 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:172
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 1571 of file bufmgr.c.

References Assert, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), NULL, pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservePrivateRefCountEntry(), ResourceOwnerRememberBuffer(), BufferDesc::state, and WaitBufHdrUnlocked().

Referenced by BufferAlloc().

1572 {
1574  bool result;
1575  PrivateRefCountEntry *ref;
1576 
1577  ref = GetPrivateRefCountEntry(b, true);
1578 
1579  if (ref == NULL)
1580  {
1581  uint32 buf_state;
1582  uint32 old_buf_state;
1583 
1585  ref = NewPrivateRefCountEntry(b);
1586 
1587  old_buf_state = pg_atomic_read_u32(&buf->state);
1588  for (;;)
1589  {
1590  if (old_buf_state & BM_LOCKED)
1591  old_buf_state = WaitBufHdrUnlocked(buf);
1592 
1593  buf_state = old_buf_state;
1594 
1595  /* increase refcount */
1596  buf_state += BUF_REFCOUNT_ONE;
1597 
1598  if (strategy == NULL)
1599  {
1600  /* Default case: increase usagecount unless already max. */
1602  buf_state += BUF_USAGECOUNT_ONE;
1603  }
1604  else
1605  {
1606  /*
1607  * Ring buffers shouldn't evict others from pool. Thus we
1608  * don't make usagecount more than 1.
1609  */
1610  if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
1611  buf_state += BUF_USAGECOUNT_ONE;
1612  }
1613 
1614  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
1615  buf_state))
1616  {
1617  result = (buf_state & BM_VALID) != 0;
1618  break;
1619  }
1620  }
1621  }
1622  else
1623  {
1624  /* If we previously pinned the buffer, it must surely be valid */
1625  result = true;
1626  }
1627 
1628  ref->refcount++;
1629  Assert(ref->refcount > 0);
1631  return result;
1632 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:278
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:321
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:855
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:252
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
unsigned int uint32
Definition: c.h:268
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define BM_VALID
Definition: buf_internals.h:60
int result
Definition: header.h:19
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define BufferDescriptorGetBuffer(bdesc)
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4120
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:77
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
pg_atomic_uint32 state
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:50
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 1656 of file bufmgr.c.

References Assert, BM_LOCKED, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), NULL, pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), BufferDesc::state, and UnlockBufHdr.

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushRelationBuffers(), and SyncOneBuffer().

1657 {
1658  Buffer b;
1659  PrivateRefCountEntry *ref;
1660  uint32 buf_state;
1661 
1662  /*
1663  * As explained, We don't expect any preexisting pins. That allows us to
1664  * manipulate the PrivateRefCount after releasing the spinlock
1665  */
1667 
1668  /*
1669  * Since we hold the buffer spinlock, we can update the buffer state and
1670  * release the lock in one operation.
1671  */
1672  buf_state = pg_atomic_read_u32(&buf->state);
1673  Assert(buf_state & BM_LOCKED);
1674  buf_state += BUF_REFCOUNT_ONE;
1675  UnlockBufHdr(buf, buf_state);
1676 
1677  b = BufferDescriptorGetBuffer(buf);
1678 
1679  ref = NewPrivateRefCountEntry(b);
1680  ref->refcount++;
1681 
1683 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:278
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:855
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:252
unsigned int uint32
Definition: c.h:268
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define BufferDescriptorGetBuffer(bdesc)
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
void PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 529 of file bufmgr.c.

References Assert, BlockNumberIsValid, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), ereport, errcode(), errmsg(), ERROR, INIT_BUFFERTAG, LocalPrefetchBuffer(), LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeBackend::node, RelationData::rd_smgr, RELATION_IS_OTHER_TEMP, RelationIsValid, RelationOpenSmgr, RelationUsesLocalBuffers, SMgrRelationData::smgr_rnode, and smgrprefetch().

Referenced by BitmapPrefetch(), count_nondeletable_pages(), and pg_prewarm().

530 {
531 #ifdef USE_PREFETCH
532  Assert(RelationIsValid(reln));
533  Assert(BlockNumberIsValid(blockNum));
534 
535  /* Open it at the smgr level if not already done */
536  RelationOpenSmgr(reln);
537 
538  if (RelationUsesLocalBuffers(reln))
539  {
540  /* see comments in ReadBufferExtended */
541  if (RELATION_IS_OTHER_TEMP(reln))
542  ereport(ERROR,
543  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
544  errmsg("cannot access temporary tables of other sessions")));
545 
546  /* pass it off to localbuf.c */
547  LocalPrefetchBuffer(reln->rd_smgr, forkNum, blockNum);
548  }
549  else
550  {
551  BufferTag newTag; /* identity of requested block */
552  uint32 newHash; /* hash value for newTag */
553  LWLock *newPartitionLock; /* buffer partition lock for it */
554  int buf_id;
555 
556  /* create a tag so we can lookup the buffer */
557  INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node,
558  forkNum, blockNum);
559 
560  /* determine its hash code and partition lock ID */
561  newHash = BufTableHashCode(&newTag);
562  newPartitionLock = BufMappingPartitionLock(newHash);
563 
564  /* see if the block is in the buffer pool already */
565  LWLockAcquire(newPartitionLock, LW_SHARED);
566  buf_id = BufTableLookup(&newTag, newHash);
567  LWLockRelease(newPartitionLock);
568 
569  /* If not in buffers, initiate prefetch */
570  if (buf_id < 0)
571  smgrprefetch(reln->rd_smgr, forkNum, blockNum);
572 
573  /*
574  * If the block *is* in buffers, we do nothing. This is not really
575  * ideal: the block might be just about to be evicted, which would be
576  * stupid since we know we are going to need it soon. But the only
577  * easy answer is to bump the usage_count, which does not seem like a
578  * great solution: when the caller does ultimately touch the block,
579  * usage_count would get bumped again, resulting in too much
580  * favoritism for blocks that are involved in a prefetch sequence. A
581  * real fix would involve some additional per-buffer state, and it's
582  * not clear that there's enough of a problem to justify that.
583  */
584  }
585 #endif /* USE_PREFETCH */
586 }
Definition: lwlock.h:32
#define BufMappingPartitionLock(hashcode)
void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:64
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:80
int errcode(int sqlerrcode)
Definition: elog.c:575
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:92
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
#define RelationOpenSmgr(relation)
Definition: rel.h:457
#define ERROR
Definition: elog.h:43
#define RelationIsValid(relation)
Definition: rel.h:386
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
unsigned int uint32
Definition: c.h:268
#define ereport(elevel, rest)
Definition: elog.h:122
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:675
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:530
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:509
int errmsg(const char *fmt,...)
Definition: elog.c:797
void smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:611
void PrintBufferLeakWarning ( Buffer  buffer)

Definition at line 2531 of file bufmgr.c.

References Assert, buftag::blockNum, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BufferIsLocal, BufferIsValid, elog, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, GetPrivateRefCount(), InvalidBackendId, LocalRefCount, MyBackendId, pfree(), pg_atomic_read_u32(), relpathbackend, buftag::rnode, BufferDesc::state, BufferDesc::tag, and WARNING.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResourceOwnerReleaseInternal().

2532 {
2533  BufferDesc *buf;
2534  int32 loccount;
2535  char *path;
2536  BackendId backend;
2537  uint32 buf_state;
2538 
2539  Assert(BufferIsValid(buffer));
2540  if (BufferIsLocal(buffer))
2541  {
2542  buf = GetLocalBufferDescriptor(-buffer - 1);
2543  loccount = LocalRefCount[-buffer - 1];
2544  backend = MyBackendId;
2545  }
2546  else
2547  {
2548  buf = GetBufferDescriptor(buffer - 1);
2549  loccount = GetPrivateRefCount(buffer);
2550  backend = InvalidBackendId;
2551  }
2552 
2553  /* theoretically we should lock the bufhdr here */
2554  path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
2555  buf_state = pg_atomic_read_u32(&buf->state);
2556  elog(WARNING,
2557  "buffer refcount leak: [%03d] "
2558  "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
2559  buffer, path,
2560  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
2561  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
2562  pfree(path);
2563 }
BackendId MyBackendId
Definition: globals.c:72
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
signed int int32
Definition: c.h:256
void pfree(void *pointer)
Definition: mcxt.c:950
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static char * buf
Definition: pg_test_fsync.c:65
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:268
#define WARNING
Definition: elog.h:40
#define InvalidBackendId
Definition: backendid.h:23
int BackendId
Definition: backendid.h:21
#define Assert(condition)
Definition: c.h:675
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
pg_atomic_uint32 state
#define elog
Definition: elog.h:219
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:62
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 594 of file bufmgr.c.

References MAIN_FORKNUM, NULL, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_getbuf(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinbuild(), brinGetTupleForHeapBlock(), brinRevmapInitialize(), bt_metap(), bt_page_items(), bt_page_stats(), fill_seq_with_data(), GetTupleForTrigger(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_hot_search(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), ReadBufferBI(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

595 {
596  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
597 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
#define NULL
Definition: c.h:229
static Buffer ReadBuffer_common ( SMgrRelation  reln,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool hit 
)
static

Definition at line 703 of file bufmgr.c.

References Assert, RelFileNodeBackend::backend, BufferUsage::blk_read_time, BM_VALID, BufferAlloc(), BufferDescriptorGetBuffer, BufferDescriptorGetContentLock, BufHdrGetBlock, CurrentResourceOwner, RelFileNode::dbNode, ereport, errcode(), errhint(), errmsg(), ERROR, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LocalBufferAlloc(), LocalBufHdrGetBlock, LockBufferForCleanup(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), MemSet, RelFileNodeBackend::node, P_NEW, PageIsNew, PageIsVerified(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_buffer_read_time, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelFileNode::relNode, relpath, ResourceOwnerEnlargeBuffers(), BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, SMgrRelationData::smgr_rnode, smgrextend(), SmgrIsTemp, smgrnblocks(), smgrread(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::state, TerminateBufferIO(), track_io_timing, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, VacuumPageHit, VacuumPageMiss, WARNING, and zero_damaged_pages.

Referenced by ReadBufferExtended(), and ReadBufferWithoutRelcache().

706 {
707  BufferDesc *bufHdr;
708  Block bufBlock;
709  bool found;
710  bool isExtend;
711  bool isLocalBuf = SmgrIsTemp(smgr);
712 
713  *hit = false;
714 
715  /* Make sure we will have room to remember the buffer pin */
717 
718  isExtend = (blockNum == P_NEW);
719 
720  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
721  smgr->smgr_rnode.node.spcNode,
722  smgr->smgr_rnode.node.dbNode,
723  smgr->smgr_rnode.node.relNode,
724  smgr->smgr_rnode.backend,
725  isExtend);
726 
727  /* Substitute proper block number if caller asked for P_NEW */
728  if (isExtend)
729  blockNum = smgrnblocks(smgr, forkNum);
730 
731  if (isLocalBuf)
732  {
733  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
734  if (found)
736  else
738  }
739  else
740  {
741  /*
742  * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
743  * not currently in memory.
744  */
745  bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
746  strategy, &found);
747  if (found)
749  else
751  }
752 
753  /* At this point we do NOT hold any locks. */
754 
755  /* if it was already in the buffer pool, we're done */
756  if (found)
757  {
758  if (!isExtend)
759  {
760  /* Just need to update stats before we exit */
761  *hit = true;
762  VacuumPageHit++;
763 
764  if (VacuumCostActive)
766 
767  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
768  smgr->smgr_rnode.node.spcNode,
769  smgr->smgr_rnode.node.dbNode,
770  smgr->smgr_rnode.node.relNode,
771  smgr->smgr_rnode.backend,
772  isExtend,
773  found);
774 
775  /*
776  * In RBM_ZERO_AND_LOCK mode the caller expects the page to be
777  * locked on return.
778  */
779  if (!isLocalBuf)
780  {
781  if (mode == RBM_ZERO_AND_LOCK)
783  LW_EXCLUSIVE);
784  else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
786  }
787 
788  return BufferDescriptorGetBuffer(bufHdr);
789  }
790 
791  /*
792  * We get here only in the corner case where we are trying to extend
793  * the relation but we found a pre-existing buffer marked BM_VALID.
794  * This can happen because mdread doesn't complain about reads beyond
795  * EOF (when zero_damaged_pages is ON) and so a previous attempt to
796  * read a block beyond EOF could have left a "valid" zero-filled
797  * buffer. Unfortunately, we have also seen this case occurring
798  * because of buggy Linux kernels that sometimes return an
799  * lseek(SEEK_END) result that doesn't account for a recent write. In
800  * that situation, the pre-existing buffer would contain valid data
801  * that we don't want to overwrite. Since the legitimate case should
802  * always have left a zero-filled buffer, complain if not PageIsNew.
803  */
804  bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
805  if (!PageIsNew((Page) bufBlock))
806  ereport(ERROR,
807  (errmsg("unexpected data beyond EOF in block %u of relation %s",
808  blockNum, relpath(smgr->smgr_rnode, forkNum)),
809  errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
810 
811  /*
812  * We *must* do smgrextend before succeeding, else the page will not
813  * be reserved by the kernel, and the next P_NEW call will decide to
814  * return the same page. Clear the BM_VALID bit, do the StartBufferIO
815  * call that BufferAlloc didn't, and proceed.
816  */
817  if (isLocalBuf)
818  {
819  /* Only need to adjust flags */
820  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
821 
822  Assert(buf_state & BM_VALID);
823  buf_state &= ~BM_VALID;
824  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
825  }
826  else
827  {
828  /*
829  * Loop to handle the very small possibility that someone re-sets
830  * BM_VALID between our clearing it and StartBufferIO inspecting
831  * it.
832  */
833  do
834  {
835  uint32 buf_state = LockBufHdr(bufHdr);
836 
837  Assert(buf_state & BM_VALID);
838  buf_state &= ~BM_VALID;
839  UnlockBufHdr(bufHdr, buf_state);
840  } while (!StartBufferIO(bufHdr, true));
841  }
842  }
843 
844  /*
845  * if we have gotten to this point, we have allocated a buffer for the
846  * page but its contents are not yet valid. IO_IN_PROGRESS is set for it,
847  * if it's a shared buffer.
848  *
849  * Note: if smgrextend fails, we will end up with a buffer that is
850  * allocated but not marked BM_VALID. P_NEW will still select the same
851  * block number (because the relation didn't get any longer on disk) and
852  * so future attempts to extend the relation will find the same buffer (if
853  * it's not been recycled) but come right back here to try smgrextend
854  * again.
855  */
856  Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
857 
858  bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
859 
860  if (isExtend)
861  {
862  /* new buffers are zero-filled */
863  MemSet((char *) bufBlock, 0, BLCKSZ);
864  /* don't set checksum for all-zero page */
865  smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false);
866 
867  /*
868  * NB: we're *not* doing a ScheduleBufferTagForWriteback here;
869  * although we're essentially performing a write. At least on linux
870  * doing so defeats the 'delayed allocation' mechanism, leading to
871  * increased file fragmentation.
872  */
873  }
874  else
875  {
876  /*
877  * Read in the page, unless the caller intends to overwrite it and
878  * just wants us to allocate a buffer.
879  */
880  if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
881  MemSet((char *) bufBlock, 0, BLCKSZ);
882  else
883  {
884  instr_time io_start,
885  io_time;
886 
887  if (track_io_timing)
888  INSTR_TIME_SET_CURRENT(io_start);
889 
890  smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
891 
892  if (track_io_timing)
893  {
894  INSTR_TIME_SET_CURRENT(io_time);
895  INSTR_TIME_SUBTRACT(io_time, io_start);
898  }
899 
900  /* check for garbage data */
901  if (!PageIsVerified((Page) bufBlock, blockNum))
902  {
903  if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
904  {
906  (errcode(ERRCODE_DATA_CORRUPTED),
907  errmsg("invalid page in block %u of relation %s; zeroing out page",
908  blockNum,
909  relpath(smgr->smgr_rnode, forkNum))));
910  MemSet((char *) bufBlock, 0, BLCKSZ);
911  }
912  else
913  ereport(ERROR,
914  (errcode(ERRCODE_DATA_CORRUPTED),
915  errmsg("invalid page in block %u of relation %s",
916  blockNum,
917  relpath(smgr->smgr_rnode, forkNum))));
918  }
919  }
920  }
921 
922  /*
923  * In RBM_ZERO_AND_LOCK mode, grab the buffer content lock before marking
924  * the page as valid, to make sure that no other backend sees the zeroed
925  * page before the caller has had a chance to initialize it.
926  *
927  * Since no-one else can be looking at the page contents yet, there is no
928  * difference between an exclusive lock and a cleanup-strength lock. (Note
929  * that we cannot use LockBuffer() or LockBufferForCleanup() here, because
930  * they assert that the buffer is already valid.)
931  */
932  if ((mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) &&
933  !isLocalBuf)
934  {
936  }
937 
938  if (isLocalBuf)
939  {
940  /* Only need to adjust flags */
941  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
942 
943  buf_state |= BM_VALID;
944  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
945  }
946  else
947  {
948  /* Set BM_VALID, terminate IO, and wake up any waiters */
949  TerminateBufferIO(bufHdr, false, BM_VALID);
950  }
951 
952  VacuumPageMiss++;
953  if (VacuumCostActive)
955 
956  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
957  smgr->smgr_rnode.node.spcNode,
958  smgr->smgr_rnode.node.dbNode,
959  smgr->smgr_rnode.node.relNode,
960  smgr->smgr_rnode.backend,
961  isExtend,
962  found);
963 
964  return BufferDescriptorGetBuffer(bufHdr);
965 }
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:61
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition: localbuf.c:103
long local_blks_hit
Definition: instrument.h:25
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3603
int errhint(const char *fmt,...)
Definition: elog.c:987
long local_blks_read
Definition: instrument.h:26
int VacuumCostBalance
Definition: globals.c:138
bool PageIsVerified(Page page, BlockNumber blkno)
Definition: bufpage.c:81
instr_time blk_read_time
Definition: instrument.h:31
int VacuumPageHit
Definition: globals.c:134
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
struct timeval instr_time
Definition: instr_time.h:147
long shared_blks_read
Definition: instrument.h:22
int errcode(int sqlerrcode)
Definition: elog.c:575
#define MemSet(start, val, len)
Definition: c.h:857
#define P_NEW
Definition: bufmgr.h:82
void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
Definition: smgr.c:625
#define SmgrIsTemp(smgr)
Definition: smgr.h:80
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:3875
#define ERROR
Definition: elog.h:43
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:167
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:155
unsigned int uint32
Definition: c.h:268
#define ereport(elevel, rest)
Definition: elog.h:122
int VacuumCostPageHit
Definition: globals.c:128
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
#define WARNING
Definition: elog.h:40
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:3942
#define BM_VALID
Definition: buf_internals.h:60
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:57
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition: bufmgr.c:987
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4092
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:672
#define Assert(condition)
Definition: c.h:675
#define pgstat_count_buffer_read_time(n)
Definition: pgstat.h:1254
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:202
#define BufferDescriptorGetBuffer(bdesc)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:153
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:600
#define PageIsNew(page)
Definition: bufpage.h:226
int errmsg(const char *fmt,...)
Definition: elog.c:797
long shared_blks_hit
Definition: instrument.h:21
#define UnlockBufHdr(desc, s)
#define relpath(rnode, forknum)
Definition: relpath.h:71
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:287
pg_atomic_uint32 state
int VacuumPageMiss
Definition: globals.c:135
int VacuumCostPageMiss
Definition: globals.c:129
bool track_io_timing
Definition: bufmgr.c:111
Pointer Page
Definition: bufpage.h:74
BufferUsage pgBufferUsage
Definition: instrument.c:20
void * Block
Definition: bufmgr.h:25
bool VacuumCostActive
Definition: globals.c:139
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
bool zero_damaged_pages
Definition: bufmgr.c:108
Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)

Definition at line 640 of file bufmgr.c.

References buf, ereport, errcode(), errmsg(), ERROR, pgstat_count_buffer_hit, pgstat_count_buffer_read, RelationData::rd_rel, RelationData::rd_smgr, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationOpenSmgr.

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), acquire_sample_rows(), blbulkdelete(), blgetbitmap(), blvacuumcleanup(), brin_vacuum_scan(), brinbuildempty(), btvacuumpage(), btvacuumscan(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), fsm_readbuf(), get_raw_page_internal(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistbuildempty(), gistbulkdelete(), gistvacuumcleanup(), hashbulkdelete(), heapgetpage(), lazy_scan_heap(), lazy_vacuum_heap(), palloc_btree_page(), pg_prewarm(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), spgvacuumpage(), statapprox_heap(), and vm_readbuf().

642 {
643  bool hit;
644  Buffer buf;
645 
646  /* Open it at the smgr level if not already done */
647  RelationOpenSmgr(reln);
648 
649  /*
650  * Reject attempts to read non-local temporary relations; we would be
651  * likely to get wrong data since we have no visibility into the owning
652  * session's local buffers.
653  */
654  if (RELATION_IS_OTHER_TEMP(reln))
655  ereport(ERROR,
656  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
657  errmsg("cannot access temporary tables of other sessions")));
658 
659  /*
660  * Read the buffer, and update pgstat counters to reflect a cache hit or
661  * miss.
662  */
664  buf = ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence,
665  forkNum, blockNum, mode, strategy, &hit);
666  if (hit)
668  return buf;
669 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
int errcode(int sqlerrcode)
Definition: elog.c:575
Form_pg_class rd_rel
Definition: rel.h:113
#define RelationOpenSmgr(relation)
Definition: rel.h:457
#define ERROR
Definition: elog.h:43
#define pgstat_count_buffer_read(rel)
Definition: pgstat.h:1244
static char * buf
Definition: pg_test_fsync.c:65
#define ereport(elevel, rest)
Definition: elog.h:122
#define pgstat_count_buffer_hit(rel)
Definition: pgstat.h:1249
static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
Definition: bufmgr.c:703
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:530
int errmsg(const char *fmt,...)
Definition: elog.c:797
int Buffer
Definition: buf.h:23
Buffer ReadBufferWithoutRelcache ( RelFileNode  rnode,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)

Definition at line 682 of file bufmgr.c.

References Assert, InRecovery, InvalidBackendId, ReadBuffer_common(), RELPERSISTENCE_PERMANENT, and smgropen().

Referenced by XLogReadBufferExtended().

685 {
686  bool hit;
687 
688  SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
689 
691 
692  return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
693  mode, strategy, &hit);
694 }
bool InRecovery
Definition: xlog.c:192
#define RELPERSISTENCE_PERMANENT
Definition: pg_class.h:170
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:137
#define InvalidBackendId
Definition: backendid.h:23
static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
Definition: bufmgr.c:703
#define Assert(condition)
Definition: c.h:675
BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 2788 of file bufmgr.c.

References RelationData::rd_smgr, RelationOpenSmgr, and smgrnblocks().

Referenced by _hash_getnewbuf(), _hash_init(), get_raw_page_internal(), and pg_prewarm().

2789 {
2790  /* Open it at the smgr level if not already done */
2791  RelationOpenSmgr(relation);
2792 
2793  return smgrnblocks(relation->rd_smgr, forkNum);
2794 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
#define RelationOpenSmgr(relation)
Definition: rel.h:457
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:672
Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 1513 of file bufmgr.c.

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid, CurrentResourceOwner, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, LocalRefCount, MAIN_FORKNUM, RelationData::rd_node, ReadBuffer(), RelFileNodeEquals, ResourceOwnerForgetBuffer(), buftag::rnode, BufferDesc::tag, and UnpinBuffer().

Referenced by _bt_relandgetbuf(), bitgetpage(), ginFindLeafPage(), and index_fetch_heap().

1516 {
1517  ForkNumber forkNum = MAIN_FORKNUM;
1518  BufferDesc *bufHdr;
1519 
1520  if (BufferIsValid(buffer))
1521  {
1522  Assert(BufferIsPinned(buffer));
1523  if (BufferIsLocal(buffer))
1524  {
1525  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1526  if (bufHdr->tag.blockNum == blockNum &&
1527  RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
1528  bufHdr->tag.forkNum == forkNum)
1529  return buffer;
1531  LocalRefCount[-buffer - 1]--;
1532  }
1533  else
1534  {
1535  bufHdr = GetBufferDescriptor(buffer - 1);
1536  /* we have pin, so it's ok to examine tag without spinlock */
1537  if (bufHdr->tag.blockNum == blockNum &&
1538  RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
1539  bufHdr->tag.forkNum == forkNum)
1540  return buffer;
1541  UnpinBuffer(bufHdr, true);
1542  }
1543  }
1544 
1545  return ReadBuffer(relation, blockNum);
1546 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
ForkNumber forkNum
Definition: buf_internals.h:94
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
ForkNumber
Definition: relpath.h:24
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1694
RelFileNode rd_node
Definition: rel.h:85
#define Assert(condition)
Definition: c.h:675
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
int32 * LocalRefCount
Definition: localbuf.c:45
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:869
void ReleaseBuffer ( Buffer  buffer)

Definition at line 3309 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsValid, CurrentResourceOwner, elog, ERROR, GetBufferDescriptor, LocalRefCount, ResourceOwnerForgetBuffer(), and UnpinBuffer().

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_getbuf(), _bt_pagedel(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), AfterTriggerExecute(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), EvalPlanQualFetch(), EvalPlanQualFetchRowMarks(), ExecCheckTIDVisible(), ExecClearTuple(), ExecDelete(), ExecEndIndexOnlyScan(), ExecLockRows(), ExecMaterializeSlot(), ExecOnConflictUpdate(), ExecStoreMinimalTuple(), ExecStoreTuple(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_vacuum_page(), get_raw_page_internal(), GetRecordedFreeSpace(), GetTupleForTrigger(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_hot_search(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapgetpage(), heapgettup(), heapgettup_pagemode(), index_endscan(), index_getnext_tid(), index_rescan(), lazy_scan_heap(), lazy_vacuum_heap(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pg_visibility_map_summary(), pgstatindex_impl(), ReadBufferBI(), RelationFindReplTupleByIndex(), RelationFindReplTupleSeq(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), ResourceOwnerReleaseInternal(), revmap_get_buffer(), revmap_physical_extend(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), tablesample_getnext(), terminate_brin_buildstate(), TidNext(), UnlockReleaseBuffer(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

3310 {
3311  if (!BufferIsValid(buffer))
3312  elog(ERROR, "bad buffer ID: %d", buffer);
3313 
3314  if (BufferIsLocal(buffer))
3315  {
3317 
3318  Assert(LocalRefCount[-buffer - 1] > 0);
3319  LocalRefCount[-buffer - 1]--;
3320  return;
3321  }
3322 
3323  UnpinBuffer(GetBufferDescriptor(buffer - 1), true);
3324 }
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
#define ERROR
Definition: elog.h:43