PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/timestamp.h"
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)   LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define DROP_RELS_BSEARCH_THRESHOLD   20
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 

Typedefs

typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static Buffer ReadBuffer_common (SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf, bool fixOwner)
 
static void BufferSync (int flags)
 
static uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *flush_context)
 
static void WaitIO (BufferDesc *buf)
 
static bool StartBufferIO (BufferDesc *buf, bool forInput)
 
static void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rnode_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const void *p1, const void *p2)
 
static int ckpt_buforder_comparator (const void *pa, const void *pb)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
bool ComputeIoConcurrency (int io_concurrency, double *target)
 
void PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static void InvalidateBuffer (BufferDesc *buf)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferPoolAccess (void)
 
void InitBufferPoolBackend (void)
 
void PrintBufferLeakWarning (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
void BufmgrCommit (void)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelFileNodeBuffers (RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
 
void DropRelFileNodesAllBuffers (RelFileNodeBackend *rnodes, int nnodes)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, int mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
void AbortBufferIO (void)
 
uint32 LockBufHdr (BufferDesc *desc)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *context)
 
void TestForOldSnapshot_impl (Snapshot snapshot, Relation relation)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = 0
 
int checkpoint_flush_after = 0
 
int bgwriter_flush_after = 0
 
int backend_flush_after = 0
 
int target_prefetch_pages = 0
 
static BufferDescInProgressBuf = NULL
 
static bool IsForInput
 
static BufferDescPinCountWaitBuf = NULL
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static PrivateRefCountEntryReservedRefCountEntry = NULL
 

Macro Definition Documentation

#define BUF_REUSABLE   0x02

Definition at line 66 of file bufmgr.c.

Referenced by BgBufferSync(), and SyncOneBuffer().

#define BUF_WRITTEN   0x01

Definition at line 65 of file bufmgr.c.

Referenced by BgBufferSync(), BufferSync(), and SyncOneBuffer().

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 58 of file bufmgr.c.

Referenced by BufferAlloc(), and FlushBuffer().

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
#define false
Definition: c.h:207
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
int32 * LocalRefCount
Definition: localbuf.c:45

Definition at line 419 of file bufmgr.c.

Referenced by BufferGetBlockNumber(), BufferGetLSNAtomic(), BufferGetTag(), BufferIsPermanent(), FlushOneBuffer(), IncrBufferRefCount(), MarkBufferDirty(), and ReleaseAndReadBuffer().

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 57 of file bufmgr.c.

Referenced by FlushBuffer(), and ReadBuffer_common().

#define DROP_RELS_BSEARCH_THRESHOLD   20

Definition at line 68 of file bufmgr.c.

Referenced by DropRelFileNodesAllBuffers().

#define LocalBufHdrGetBlock (   bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 61 of file bufmgr.c.

Referenced by FlushRelationBuffers(), and ReadBuffer_common().

#define REFCOUNT_ARRAY_ENTRIES   8

Typedef Documentation

Function Documentation

void AbortBufferIO ( void  )

Definition at line 3957 of file bufmgr.c.

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_VALID, BufferDescriptorGetIOLock, ereport, errcode(), errdetail(), errmsg(), buftag::forkNum, InProgressBuf, IsForInput, LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), pfree(), relpathperm, buftag::rnode, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

3958 {
3960 
3961  if (buf)
3962  {
3963  uint32 buf_state;
3964 
3965  /*
3966  * Since LWLockReleaseAll has already been called, we're not holding
3967  * the buffer's io_in_progress_lock. We have to re-acquire it so that
3968  * we can use TerminateBufferIO. Anyone who's executing WaitIO on the
3969  * buffer will be in a busy spin until we succeed in doing this.
3970  */
3972 
3973  buf_state = LockBufHdr(buf);
3974  Assert(buf_state & BM_IO_IN_PROGRESS);
3975  if (IsForInput)
3976  {
3977  Assert(!(buf_state & BM_DIRTY));
3978 
3979  /* We'd better not think buffer is valid yet */
3980  Assert(!(buf_state & BM_VALID));
3981  UnlockBufHdr(buf, buf_state);
3982  }
3983  else
3984  {
3985  Assert(buf_state & BM_DIRTY);
3986  UnlockBufHdr(buf, buf_state);
3987  /* Issue notice if this is not the first failure... */
3988  if (buf_state & BM_IO_ERROR)
3989  {
3990  /* Buffer is pinned, so we can read tag without spinlock */
3991  char *path;
3992 
3993  path = relpathperm(buf->tag.rnode, buf->tag.forkNum);
3994  ereport(WARNING,
3995  (errcode(ERRCODE_IO_ERROR),
3996  errmsg("could not write block %u of %s",
3997  buf->tag.blockNum, path),
3998  errdetail("Multiple failures --- write error might be permanent.")));
3999  pfree(path);
4000  }
4001  }
4002  TerminateBufferIO(buf, false, BM_IO_ERROR);
4003  }
4004 }
#define relpathperm(rnode, forknum)
Definition: relpath.h:67
ForkNumber forkNum
Definition: buf_internals.h:94
int errcode(int sqlerrcode)
Definition: elog.c:575
#define BM_DIRTY
Definition: buf_internals.h:59
#define BufferDescriptorGetIOLock(bdesc)
static BufferDesc * InProgressBuf
Definition: bufmgr.c:132
void pfree(void *pointer)
Definition: mcxt.c:992
static char * buf
Definition: pg_test_fsync.c:65
int errdetail(const char *fmt,...)
Definition: elog.c:873
unsigned int uint32
Definition: c.h:265
static bool IsForInput
Definition: bufmgr.c:133
#define ereport(elevel, rest)
Definition: elog.h:122
#define WARNING
Definition: elog.h:40
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:3925
#define BM_VALID
Definition: buf_internals.h:60
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define Assert(condition)
Definition: c.h:671
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define UnlockBufHdr(desc, s)
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:62
void AtEOXact_Buffers ( bool  isCommit)

Definition at line 2398 of file bufmgr.c.

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

2399 {
2401 
2402  AtEOXact_LocalBuffers(isCommit);
2403 
2405 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:170
#define Assert(condition)
Definition: c.h:671
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2473
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:572
static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 2454 of file bufmgr.c.

References AbortBufferIO(), AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferPoolBackend().

2455 {
2456  AbortBufferIO();
2457  UnlockBuffers();
2458 
2460 
2461  /* localbuf.c needs a chance too */
2463 }
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:583
void UnlockBuffers(void)
Definition: bufmgr.c:3501
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:2473
void AbortBufferIO(void)
Definition: bufmgr.c:3957
bool BgBufferSync ( WritebackContext wb_context)

Definition at line 2028 of file bufmgr.c.

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, BgWriterStats, BUF_REUSABLE, BUF_WRITTEN, CurrentResourceOwner, DEBUG1, DEBUG2, elog, PgStat_MsgBgWriter::m_buf_alloc, PgStat_MsgBgWriter::m_buf_written_clean, PgStat_MsgBgWriter::m_maxwritten_clean, NBuffers, ResourceOwnerEnlargeBuffers(), StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

2029 {
2030  /* info obtained from freelist.c */
2031  int strategy_buf_id;
2032  uint32 strategy_passes;
2033  uint32 recent_alloc;
2034 
2035  /*
2036  * Information saved between calls so we can determine the strategy
2037  * point's advance rate and avoid scanning already-cleaned buffers.
2038  */
2039  static bool saved_info_valid = false;
2040  static int prev_strategy_buf_id;
2041  static uint32 prev_strategy_passes;
2042  static int next_to_clean;
2043  static uint32 next_passes;
2044 
2045  /* Moving averages of allocation rate and clean-buffer density */
2046  static float smoothed_alloc = 0;
2047  static float smoothed_density = 10.0;
2048 
2049  /* Potentially these could be tunables, but for now, not */
2050  float smoothing_samples = 16;
2051  float scan_whole_pool_milliseconds = 120000.0;
2052 
2053  /* Used to compute how far we scan ahead */
2054  long strategy_delta;
2055  int bufs_to_lap;
2056  int bufs_ahead;
2057  float scans_per_alloc;
2058  int reusable_buffers_est;
2059  int upcoming_alloc_est;
2060  int min_scan_buffers;
2061 
2062  /* Variables for the scanning loop proper */
2063  int num_to_scan;
2064  int num_written;
2065  int reusable_buffers;
2066 
2067  /* Variables for final smoothed_density update */
2068  long new_strategy_delta;
2069  uint32 new_recent_alloc;
2070 
2071  /*
2072  * Find out where the freelist clock sweep currently is, and how many
2073  * buffer allocations have happened since our last call.
2074  */
2075  strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
2076 
2077  /* Report buffer alloc counts to pgstat */
2078  BgWriterStats.m_buf_alloc += recent_alloc;
2079 
2080  /*
2081  * If we're not running the LRU scan, just stop after doing the stats
2082  * stuff. We mark the saved state invalid so that we can recover sanely
2083  * if LRU scan is turned back on later.
2084  */
2085  if (bgwriter_lru_maxpages <= 0)
2086  {
2087  saved_info_valid = false;
2088  return true;
2089  }
2090 
2091  /*
2092  * Compute strategy_delta = how many buffers have been scanned by the
2093  * clock sweep since last time. If first time through, assume none. Then
2094  * see if we are still ahead of the clock sweep, and if so, how many
2095  * buffers we could scan before we'd catch up with it and "lap" it. Note:
2096  * weird-looking coding of xxx_passes comparisons are to avoid bogus
2097  * behavior when the passes counts wrap around.
2098  */
2099  if (saved_info_valid)
2100  {
2101  int32 passes_delta = strategy_passes - prev_strategy_passes;
2102 
2103  strategy_delta = strategy_buf_id - prev_strategy_buf_id;
2104  strategy_delta += (long) passes_delta *NBuffers;
2105 
2106  Assert(strategy_delta >= 0);
2107 
2108  if ((int32) (next_passes - strategy_passes) > 0)
2109  {
2110  /* we're one pass ahead of the strategy point */
2111  bufs_to_lap = strategy_buf_id - next_to_clean;
2112 #ifdef BGW_DEBUG
2113  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2114  next_passes, next_to_clean,
2115  strategy_passes, strategy_buf_id,
2116  strategy_delta, bufs_to_lap);
2117 #endif
2118  }
2119  else if (next_passes == strategy_passes &&
2120  next_to_clean >= strategy_buf_id)
2121  {
2122  /* on same pass, but ahead or at least not behind */
2123  bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
2124 #ifdef BGW_DEBUG
2125  elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
2126  next_passes, next_to_clean,
2127  strategy_passes, strategy_buf_id,
2128  strategy_delta, bufs_to_lap);
2129 #endif
2130  }
2131  else
2132  {
2133  /*
2134  * We're behind, so skip forward to the strategy point and start
2135  * cleaning from there.
2136  */
2137 #ifdef BGW_DEBUG
2138  elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
2139  next_passes, next_to_clean,
2140  strategy_passes, strategy_buf_id,
2141  strategy_delta);
2142 #endif
2143  next_to_clean = strategy_buf_id;
2144  next_passes = strategy_passes;
2145  bufs_to_lap = NBuffers;
2146  }
2147  }
2148  else
2149  {
2150  /*
2151  * Initializing at startup or after LRU scanning had been off. Always
2152  * start at the strategy point.
2153  */
2154 #ifdef BGW_DEBUG
2155  elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
2156  strategy_passes, strategy_buf_id);
2157 #endif
2158  strategy_delta = 0;
2159  next_to_clean = strategy_buf_id;
2160  next_passes = strategy_passes;
2161  bufs_to_lap = NBuffers;
2162  }
2163 
2164  /* Update saved info for next time */
2165  prev_strategy_buf_id = strategy_buf_id;
2166  prev_strategy_passes = strategy_passes;
2167  saved_info_valid = true;
2168 
2169  /*
2170  * Compute how many buffers had to be scanned for each new allocation, ie,
2171  * 1/density of reusable buffers, and track a moving average of that.
2172  *
2173  * If the strategy point didn't move, we don't update the density estimate
2174  */
2175  if (strategy_delta > 0 && recent_alloc > 0)
2176  {
2177  scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
2178  smoothed_density += (scans_per_alloc - smoothed_density) /
2179  smoothing_samples;
2180  }
2181 
2182  /*
2183  * Estimate how many reusable buffers there are between the current
2184  * strategy point and where we've scanned ahead to, based on the smoothed
2185  * density estimate.
2186  */
2187  bufs_ahead = NBuffers - bufs_to_lap;
2188  reusable_buffers_est = (float) bufs_ahead / smoothed_density;
2189 
2190  /*
2191  * Track a moving average of recent buffer allocations. Here, rather than
2192  * a true average we want a fast-attack, slow-decline behavior: we
2193  * immediately follow any increase.
2194  */
2195  if (smoothed_alloc <= (float) recent_alloc)
2196  smoothed_alloc = recent_alloc;
2197  else
2198  smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
2199  smoothing_samples;
2200 
2201  /* Scale the estimate by a GUC to allow more aggressive tuning. */
2202  upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
2203 
2204  /*
2205  * If recent_alloc remains at zero for many cycles, smoothed_alloc will
2206  * eventually underflow to zero, and the underflows produce annoying
2207  * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
2208  * zero, there's no point in tracking smaller and smaller values of
2209  * smoothed_alloc, so just reset it to exactly zero to avoid this
2210  * syndrome. It will pop back up as soon as recent_alloc increases.
2211  */
2212  if (upcoming_alloc_est == 0)
2213  smoothed_alloc = 0;
2214 
2215  /*
2216  * Even in cases where there's been little or no buffer allocation
2217  * activity, we want to make a small amount of progress through the buffer
2218  * cache so that as many reusable buffers as possible are clean after an
2219  * idle period.
2220  *
2221  * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
2222  * the BGW will be called during the scan_whole_pool time; slice the
2223  * buffer pool into that many sections.
2224  */
2225  min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
2226 
2227  if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
2228  {
2229 #ifdef BGW_DEBUG
2230  elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
2231  upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
2232 #endif
2233  upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
2234  }
2235 
2236  /*
2237  * Now write out dirty reusable buffers, working forward from the
2238  * next_to_clean point, until we have lapped the strategy scan, or cleaned
2239  * enough buffers to match our estimate of the next cycle's allocation
2240  * requirements, or hit the bgwriter_lru_maxpages limit.
2241  */
2242 
2243  /* Make sure we can handle the pin inside SyncOneBuffer */
2245 
2246  num_to_scan = bufs_to_lap;
2247  num_written = 0;
2248  reusable_buffers = reusable_buffers_est;
2249 
2250  /* Execute the LRU scan */
2251  while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
2252  {
2253  int sync_state = SyncOneBuffer(next_to_clean, true,
2254  wb_context);
2255 
2256  if (++next_to_clean >= NBuffers)
2257  {
2258  next_to_clean = 0;
2259  next_passes++;
2260  }
2261  num_to_scan--;
2262 
2263  if (sync_state & BUF_WRITTEN)
2264  {
2265  reusable_buffers++;
2266  if (++num_written >= bgwriter_lru_maxpages)
2267  {
2269  break;
2270  }
2271  }
2272  else if (sync_state & BUF_REUSABLE)
2273  reusable_buffers++;
2274  }
2275 
2276  BgWriterStats.m_buf_written_clean += num_written;
2277 
2278 #ifdef BGW_DEBUG
2279  elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
2280  recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
2281  smoothed_density, reusable_buffers_est, upcoming_alloc_est,
2282  bufs_to_lap - num_to_scan,
2283  num_written,
2284  reusable_buffers - reusable_buffers_est);
2285 #endif
2286 
2287  /*
2288  * Consider the above scan as being like a new allocation scan.
2289  * Characterize its density and update the smoothed one based on it. This
2290  * effectively halves the moving average period in cases where both the
2291  * strategy and the background writer are doing some useful scanning,
2292  * which is helpful because a long memory isn't as desirable on the
2293  * density estimates.
2294  */
2295  new_strategy_delta = bufs_to_lap - num_to_scan;
2296  new_recent_alloc = reusable_buffers - reusable_buffers_est;
2297  if (new_strategy_delta > 0 && new_recent_alloc > 0)
2298  {
2299  scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
2300  smoothed_density += (scans_per_alloc - smoothed_density) /
2301  smoothing_samples;
2302 
2303 #ifdef BGW_DEBUG
2304  elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
2305  new_recent_alloc, new_strategy_delta,
2306  scans_per_alloc, smoothed_density);
2307 #endif
2308  }
2309 
2310  /* Return true if OK to hibernate */
2311  return (bufs_to_lap == 0 && recent_alloc == 0);
2312 }
PgStat_Counter m_buf_alloc
Definition: pgstat.h:420
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:378
#define DEBUG1
Definition: elog.h:25
int BgWriterDelay
Definition: bgwriter.c:67
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
PgStat_Counter m_maxwritten_clean
Definition: pgstat.h:417
PgStat_Counter m_buf_written_clean
Definition: pgstat.h:416
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:127
double bgwriter_lru_multiplier
Definition: bufmgr.c:110
signed int int32
Definition: c.h:253
#define BUF_REUSABLE
Definition: bufmgr.c:66
int bgwriter_lru_maxpages
Definition: bufmgr.c:109
#define DEBUG2
Definition: elog.h:24
unsigned int uint32
Definition: c.h:265
#define BUF_WRITTEN
Definition: bufmgr.c:65
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *flush_context)
Definition: bufmgr.c:2331
#define Assert(condition)
Definition: c.h:671
int NBuffers
Definition: globals.c:122
#define elog
Definition: elog.h:219
static BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 987 of file bufmgr.c.

References Assert, BackendWritebackContext, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_FLAG_MASK, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BUF_USAGECOUNT_ONE, BufferDescriptorGetContentLock, BufferGetLSN, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), RelFileNode::dbNode, FALSE, FlushBuffer(), GetBufferDescriptor, INIT_BUFFERTAG, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), RelFileNodeBackend::node, NULL, PinBuffer(), PinBuffer_Locked(), RelFileNode::relNode, RELPERSISTENCE_PERMANENT, ReservePrivateRefCountEntry(), ScheduleBufferTagForWriteback(), SMgrRelationData::smgr_rnode, RelFileNode::spcNode, StartBufferIO(), StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, TRUE, UnlockBufHdr, UnpinBuffer(), and XLogNeedsFlush().

Referenced by ReadBuffer_common().

991 {
992  BufferTag newTag; /* identity of requested block */
993  uint32 newHash; /* hash value for newTag */
994  LWLock *newPartitionLock; /* buffer partition lock for it */
995  BufferTag oldTag; /* previous identity of selected buffer */
996  uint32 oldHash; /* hash value for oldTag */
997  LWLock *oldPartitionLock; /* buffer partition lock for it */
998  uint32 oldFlags;
999  int buf_id;
1000  BufferDesc *buf;
1001  bool valid;
1002  uint32 buf_state;
1003 
1004  /* create a tag so we can lookup the buffer */
1005  INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
1006 
1007  /* determine its hash code and partition lock ID */
1008  newHash = BufTableHashCode(&newTag);
1009  newPartitionLock = BufMappingPartitionLock(newHash);
1010 
1011  /* see if the block is in the buffer pool already */
1012  LWLockAcquire(newPartitionLock, LW_SHARED);
1013  buf_id = BufTableLookup(&newTag, newHash);
1014  if (buf_id >= 0)
1015  {
1016  /*
1017  * Found it. Now, pin the buffer so no one can steal it from the
1018  * buffer pool, and check to see if the correct data has been loaded
1019  * into the buffer.
1020  */
1021  buf = GetBufferDescriptor(buf_id);
1022 
1023  valid = PinBuffer(buf, strategy);
1024 
1025  /* Can release the mapping lock as soon as we've pinned it */
1026  LWLockRelease(newPartitionLock);
1027 
1028  *foundPtr = TRUE;
1029 
1030  if (!valid)
1031  {
1032  /*
1033  * We can only get here if (a) someone else is still reading in
1034  * the page, or (b) a previous read attempt failed. We have to
1035  * wait for any active read attempt to finish, and then set up our
1036  * own read attempt if the page is still not BM_VALID.
1037  * StartBufferIO does it all.
1038  */
1039  if (StartBufferIO(buf, true))
1040  {
1041  /*
1042  * If we get here, previous attempts to read the buffer must
1043  * have failed ... but we shall bravely try again.
1044  */
1045  *foundPtr = FALSE;
1046  }
1047  }
1048 
1049  return buf;
1050  }
1051 
1052  /*
1053  * Didn't find it in the buffer pool. We'll have to initialize a new
1054  * buffer. Remember to unlock the mapping lock while doing the work.
1055  */
1056  LWLockRelease(newPartitionLock);
1057 
1058  /* Loop here in case we have to try another victim buffer */
1059  for (;;)
1060  {
1061  /*
1062  * Ensure, while the spinlock's not yet held, that there's a free
1063  * refcount entry.
1064  */
1066 
1067  /*
1068  * Select a victim buffer. The buffer is returned with its header
1069  * spinlock still held!
1070  */
1071  buf = StrategyGetBuffer(strategy, &buf_state);
1072 
1073  Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
1074 
1075  /* Must copy buffer flags while we still hold the spinlock */
1076  oldFlags = buf_state & BUF_FLAG_MASK;
1077 
1078  /* Pin the buffer and then release the buffer spinlock */
1079  PinBuffer_Locked(buf);
1080 
1081  /*
1082  * If the buffer was dirty, try to write it out. There is a race
1083  * condition here, in that someone might dirty it after we released it
1084  * above, or even while we are writing it out (since our share-lock
1085  * won't prevent hint-bit updates). We will recheck the dirty bit
1086  * after re-locking the buffer header.
1087  */
1088  if (oldFlags & BM_DIRTY)
1089  {
1090  /*
1091  * We need a share-lock on the buffer contents to write it out
1092  * (else we might write invalid data, eg because someone else is
1093  * compacting the page contents while we write). We must use a
1094  * conditional lock acquisition here to avoid deadlock. Even
1095  * though the buffer was not pinned (and therefore surely not
1096  * locked) when StrategyGetBuffer returned it, someone else could
1097  * have pinned and exclusive-locked it by the time we get here. If
1098  * we try to get the lock unconditionally, we'd block waiting for
1099  * them; if they later block waiting for us, deadlock ensues.
1100  * (This has been observed to happen when two backends are both
1101  * trying to split btree index pages, and the second one just
1102  * happens to be trying to split the page the first one got from
1103  * StrategyGetBuffer.)
1104  */
1106  LW_SHARED))
1107  {
1108  /*
1109  * If using a nondefault strategy, and writing the buffer
1110  * would require a WAL flush, let the strategy decide whether
1111  * to go ahead and write/reuse the buffer or to choose another
1112  * victim. We need lock to inspect the page LSN, so this
1113  * can't be done inside StrategyGetBuffer.
1114  */
1115  if (strategy != NULL)
1116  {
1117  XLogRecPtr lsn;
1118 
1119  /* Read the LSN while holding buffer header lock */
1120  buf_state = LockBufHdr(buf);
1121  lsn = BufferGetLSN(buf);
1122  UnlockBufHdr(buf, buf_state);
1123 
1124  if (XLogNeedsFlush(lsn) &&
1125  StrategyRejectBuffer(strategy, buf))
1126  {
1127  /* Drop lock/pin and loop around for another buffer */
1129  UnpinBuffer(buf, true);
1130  continue;
1131  }
1132  }
1133 
1134  /* OK, do the I/O */
1135  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
1136  smgr->smgr_rnode.node.spcNode,
1137  smgr->smgr_rnode.node.dbNode,
1138  smgr->smgr_rnode.node.relNode);
1139 
1140  FlushBuffer(buf, NULL);
1142 
1144  &buf->tag);
1145 
1146  TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
1147  smgr->smgr_rnode.node.spcNode,
1148  smgr->smgr_rnode.node.dbNode,
1149  smgr->smgr_rnode.node.relNode);
1150  }
1151  else
1152  {
1153  /*
1154  * Someone else has locked the buffer, so give it up and loop
1155  * back to get another one.
1156  */
1157  UnpinBuffer(buf, true);
1158  continue;
1159  }
1160  }
1161 
1162  /*
1163  * To change the association of a valid buffer, we'll need to have
1164  * exclusive lock on both the old and new mapping partitions.
1165  */
1166  if (oldFlags & BM_TAG_VALID)
1167  {
1168  /*
1169  * Need to compute the old tag's hashcode and partition lock ID.
1170  * XXX is it worth storing the hashcode in BufferDesc so we need
1171  * not recompute it here? Probably not.
1172  */
1173  oldTag = buf->tag;
1174  oldHash = BufTableHashCode(&oldTag);
1175  oldPartitionLock = BufMappingPartitionLock(oldHash);
1176 
1177  /*
1178  * Must lock the lower-numbered partition first to avoid
1179  * deadlocks.
1180  */
1181  if (oldPartitionLock < newPartitionLock)
1182  {
1183  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1184  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1185  }
1186  else if (oldPartitionLock > newPartitionLock)
1187  {
1188  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1189  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1190  }
1191  else
1192  {
1193  /* only one partition, only one lock */
1194  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1195  }
1196  }
1197  else
1198  {
1199  /* if it wasn't valid, we need only the new partition */
1200  LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
1201  /* remember we have no old-partition lock or tag */
1202  oldPartitionLock = NULL;
1203  /* this just keeps the compiler quiet about uninit variables */
1204  oldHash = 0;
1205  }
1206 
1207  /*
1208  * Try to make a hashtable entry for the buffer under its new tag.
1209  * This could fail because while we were writing someone else
1210  * allocated another buffer for the same block we want to read in.
1211  * Note that we have not yet removed the hashtable entry for the old
1212  * tag.
1213  */
1214  buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
1215 
1216  if (buf_id >= 0)
1217  {
1218  /*
1219  * Got a collision. Someone has already done what we were about to
1220  * do. We'll just handle this as if it were found in the buffer
1221  * pool in the first place. First, give up the buffer we were
1222  * planning to use.
1223  */
1224  UnpinBuffer(buf, true);
1225 
1226  /* Can give up that buffer's mapping partition lock now */
1227  if (oldPartitionLock != NULL &&
1228  oldPartitionLock != newPartitionLock)
1229  LWLockRelease(oldPartitionLock);
1230 
1231  /* remaining code should match code at top of routine */
1232 
1233  buf = GetBufferDescriptor(buf_id);
1234 
1235  valid = PinBuffer(buf, strategy);
1236 
1237  /* Can release the mapping lock as soon as we've pinned it */
1238  LWLockRelease(newPartitionLock);
1239 
1240  *foundPtr = TRUE;
1241 
1242  if (!valid)
1243  {
1244  /*
1245  * We can only get here if (a) someone else is still reading
1246  * in the page, or (b) a previous read attempt failed. We
1247  * have to wait for any active read attempt to finish, and
1248  * then set up our own read attempt if the page is still not
1249  * BM_VALID. StartBufferIO does it all.
1250  */
1251  if (StartBufferIO(buf, true))
1252  {
1253  /*
1254  * If we get here, previous attempts to read the buffer
1255  * must have failed ... but we shall bravely try again.
1256  */
1257  *foundPtr = FALSE;
1258  }
1259  }
1260 
1261  return buf;
1262  }
1263 
1264  /*
1265  * Need to lock the buffer header too in order to change its tag.
1266  */
1267  buf_state = LockBufHdr(buf);
1268 
1269  /*
1270  * Somebody could have pinned or re-dirtied the buffer while we were
1271  * doing the I/O and making the new hashtable entry. If so, we can't
1272  * recycle this buffer; we must undo everything we've done and start
1273  * over with a new victim buffer.
1274  */
1275  oldFlags = buf_state & BUF_FLAG_MASK;
1276  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(oldFlags & BM_DIRTY))
1277  break;
1278 
1279  UnlockBufHdr(buf, buf_state);
1280  BufTableDelete(&newTag, newHash);
1281  if (oldPartitionLock != NULL &&
1282  oldPartitionLock != newPartitionLock)
1283  LWLockRelease(oldPartitionLock);
1284  LWLockRelease(newPartitionLock);
1285  UnpinBuffer(buf, true);
1286  }
1287 
1288  /*
1289  * Okay, it's finally safe to rename the buffer.
1290  *
1291  * Clearing BM_VALID here is necessary, clearing the dirtybits is just
1292  * paranoia. We also reset the usage_count since any recency of use of
1293  * the old content is no longer relevant. (The usage_count starts out at
1294  * 1 so that the buffer can survive one clock-sweep pass.)
1295  */
1296  buf->tag = newTag;
1297  buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED |
1300  if (relpersistence == RELPERSISTENCE_PERMANENT)
1301  buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE;
1302  else
1303  buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
1304 
1305  UnlockBufHdr(buf, buf_state);
1306 
1307  if (oldPartitionLock != NULL)
1308  {
1309  BufTableDelete(&oldTag, oldHash);
1310  if (oldPartitionLock != newPartitionLock)
1311  LWLockRelease(oldPartitionLock);
1312  }
1313 
1314  LWLockRelease(newPartitionLock);
1315 
1316  /*
1317  * Buffer contents are currently invalid. Try to get the io_in_progress
1318  * lock. If StartBufferIO returns false, then someone else managed to
1319  * read it before we did, so there's nothing left for BufferAlloc() to do.
1320  */
1321  if (StartBufferIO(buf, true))
1322  *foundPtr = FALSE;
1323  else
1324  *foundPtr = TRUE;
1325 
1326  return buf;
1327 }
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
Definition: bufmgr.c:1566
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
Definition: freelist.c:184
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:150
Definition: lwlock.h:32
#define BM_PERMANENT
Definition: buf_internals.h:67
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3065
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:80
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:92
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2648
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define RELPERSISTENCE_PERMANENT
Definition: pg_class.h:170
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:3858
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:120
#define FALSE
Definition: c.h:218
void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
Definition: bufmgr.c:4225
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1282
static char * buf
Definition: pg_test_fsync.c:65
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:265
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1677
#define BM_VALID
Definition: buf_internals.h:60
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf)
Definition: freelist.c:669
RelFileNode node
Definition: relfilenode.h:74
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:671
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1639
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
#define BM_IO_ERROR
Definition: buf_internals.h:63
BufferTag tag
#define UnlockBufHdr(desc, s)
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
#define TRUE
Definition: c.h:214
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:58
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 2588 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, GetBufferDescriptor, GetLocalBufferDescriptor, and BufferDesc::tag.

Referenced by _bt_checkpage(), _bt_endpoint(), _bt_finish_split(), _bt_first(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newroot(), _bt_pagedel(), _bt_readnextpage(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_split(), _bt_unlink_halfdead_page(), _bt_walk_left(), _hash_addovflpage(), _hash_checkpage(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_step(), allocNewBuffer(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), CheckForSerializableConflictIn(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_with_data(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoCreateIndex(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistformdownlink(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRedoCreateIndex(), gistRelocateBuildBuffersOnSplit(), heap_delete(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_is_all_visible(), heap_prune_chain(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), index_getnext(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), ReadBufferBI(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgRedoCreateIndex(), spgSplitNodeAction(), spgWalk(), terminate_brin_buildstate(), vacuumLeafPage(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and XLogReadBufferExtended().

2589 {
2590  BufferDesc *bufHdr;
2591 
2592  Assert(BufferIsPinned(buffer));
2593 
2594  if (BufferIsLocal(buffer))
2595  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2596  else
2597  bufHdr = GetBufferDescriptor(buffer - 1);
2598 
2599  /* pinned, so OK to read tag without spinlock */
2600  return bufHdr->tag.blockNum;
2601 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
BufferTag tag
XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 2815 of file bufmgr.c.

References Assert, BufferGetPage, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, LockBufHdr(), PageGetLSN, UnlockBufHdr, and XLogHintBitIsNeeded.

Referenced by gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

2816 {
2817  BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
2818  char *page = BufferGetPage(buffer);
2819  XLogRecPtr lsn;
2820  uint32 buf_state;
2821 
2822  /*
2823  * If we don't need locking for correctness, fastpath out.
2824  */
2825  if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
2826  return PageGetLSN(page);
2827 
2828  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2829  Assert(BufferIsValid(buffer));
2830  Assert(BufferIsPinned(buffer));
2831 
2832  buf_state = LockBufHdr(bufHdr);
2833  lsn = PageGetLSN(page);
2834  UnlockBufHdr(bufHdr, buf_state);
2835 
2836  return lsn;
2837 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:265
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define PageGetLSN(page)
Definition: bufpage.h:363
#define UnlockBufHdr(desc, s)
#define XLogHintBitIsNeeded()
Definition: xlog.h:156
void BufferGetTag ( Buffer  buffer,
RelFileNode rnode,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 2609 of file bufmgr.c.

References Assert, buftag::blockNum, BufferIsLocal, BufferIsPinned, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, buftag::rnode, and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

2611 {
2612  BufferDesc *bufHdr;
2613 
2614  /* Do the same checks as BufferGetBlockNumber. */
2615  Assert(BufferIsPinned(buffer));
2616 
2617  if (BufferIsLocal(buffer))
2618  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
2619  else
2620  bufHdr = GetBufferDescriptor(buffer - 1);
2621 
2622  /* pinned, so OK to read tag without spinlock */
2623  *rnode = bufHdr->tag.rnode;
2624  *forknum = bufHdr->tag.forkNum;
2625  *blknum = bufHdr->tag.blockNum;
2626 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
bool BufferIsPermanent ( Buffer  buffer)

Definition at line 2785 of file bufmgr.c.

References Assert, BM_PERMANENT, BufferIsLocal, BufferIsPinned, BufferIsValid, GetBufferDescriptor, pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

2786 {
2787  BufferDesc *bufHdr;
2788 
2789  /* Local buffers are used only for temp relations. */
2790  if (BufferIsLocal(buffer))
2791  return false;
2792 
2793  /* Make sure we've got a real buffer, and that we hold a pin on it. */
2794  Assert(BufferIsValid(buffer));
2795  Assert(BufferIsPinned(buffer));
2796 
2797  /*
2798  * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
2799  * need not bother with the buffer header spinlock. Even if someone else
2800  * changes the buffer header state while we're doing this, the state is
2801  * changed atomically, so we'll read the old value or the new value, but
2802  * not random garbage.
2803  */
2804  bufHdr = GetBufferDescriptor(buffer - 1);
2805  return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
2806 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
#define BM_PERMANENT
Definition: buf_internals.h:67
#define GetBufferDescriptor(id)
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
pg_atomic_uint32 state
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static void BufferSync ( int  flags)
static

Definition at line 1761 of file bufmgr.c.

References Assert, BgWriterStats, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_ALL, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), ckpt_buforder_comparator(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, CurrentResourceOwner, DatumGetPointer, buftag::forkNum, CkptSortItem::forkNum, GetBufferDescriptor, i, CkptTsStatus::index, InvalidOid, IssuePendingWritebacks(), LockBufHdr(), PgStat_MsgBgWriter::m_buf_written_checkpoints, NBuffers, NULL, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), pfree(), pg_atomic_read_u32(), PointerGetDatum, CkptTsStatus::progress, CkptTsStatus::progress_slice, qsort, RelFileNode::relNode, CkptSortItem::relNode, repalloc(), ResourceOwnerEnlargeBuffers(), buftag::rnode, RelFileNode::spcNode, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdr, and WritebackContextInit().

Referenced by CheckPointBuffers().

1762 {
1763  uint32 buf_state;
1764  int buf_id;
1765  int num_to_scan;
1766  int num_spaces;
1767  int num_processed;
1768  int num_written;
1769  CkptTsStatus *per_ts_stat = NULL;
1770  Oid last_tsid;
1771  binaryheap *ts_heap;
1772  int i;
1773  int mask = BM_DIRTY;
1774  WritebackContext wb_context;
1775 
1776  /* Make sure we can handle the pin inside SyncOneBuffer */
1778 
1779  /*
1780  * Unless this is a shutdown checkpoint or we have been explicitly told,
1781  * we write only permanent, dirty buffers. But at shutdown or end of
1782  * recovery, we write all dirty buffers.
1783  */
1786  mask |= BM_PERMANENT;
1787 
1788  /*
1789  * Loop over all buffers, and mark the ones that need to be written with
1790  * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
1791  * can estimate how much work needs to be done.
1792  *
1793  * This allows us to write only those pages that were dirty when the
1794  * checkpoint began, and not those that get dirtied while it proceeds.
1795  * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
1796  * later in this function, or by normal backends or the bgwriter cleaning
1797  * scan, the flag is cleared. Any buffer dirtied after this point won't
1798  * have the flag set.
1799  *
1800  * Note that if we fail to write some buffer, we may leave buffers with
1801  * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
1802  * certainly need to be written for the next checkpoint attempt, too.
1803  */
1804  num_to_scan = 0;
1805  for (buf_id = 0; buf_id < NBuffers; buf_id++)
1806  {
1807  BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
1808 
1809  /*
1810  * Header spinlock is enough to examine BM_DIRTY, see comment in
1811  * SyncOneBuffer.
1812  */
1813  buf_state = LockBufHdr(bufHdr);
1814 
1815  if ((buf_state & mask) == mask)
1816  {
1817  CkptSortItem *item;
1818 
1819  buf_state |= BM_CHECKPOINT_NEEDED;
1820 
1821  item = &CkptBufferIds[num_to_scan++];
1822  item->buf_id = buf_id;
1823  item->tsId = bufHdr->tag.rnode.spcNode;
1824  item->relNode = bufHdr->tag.rnode.relNode;
1825  item->forkNum = bufHdr->tag.forkNum;
1826  item->blockNum = bufHdr->tag.blockNum;
1827  }
1828 
1829  UnlockBufHdr(bufHdr, buf_state);
1830  }
1831 
1832  if (num_to_scan == 0)
1833  return; /* nothing to do */
1834 
1836 
1837  TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
1838 
1839  /*
1840  * Sort buffers that need to be written to reduce the likelihood of random
1841  * IO. The sorting is also important for the implementation of balancing
1842  * writes between tablespaces. Without balancing writes we'd potentially
1843  * end up writing to the tablespaces one-by-one; possibly overloading the
1844  * underlying system.
1845  */
1846  qsort(CkptBufferIds, num_to_scan, sizeof(CkptSortItem),
1848 
1849  num_spaces = 0;
1850 
1851  /*
1852  * Allocate progress status for each tablespace with buffers that need to
1853  * be flushed. This requires the to-be-flushed array to be sorted.
1854  */
1855  last_tsid = InvalidOid;
1856  for (i = 0; i < num_to_scan; i++)
1857  {
1858  CkptTsStatus *s;
1859  Oid cur_tsid;
1860 
1861  cur_tsid = CkptBufferIds[i].tsId;
1862 
1863  /*
1864  * Grow array of per-tablespace status structs, every time a new
1865  * tablespace is found.
1866  */
1867  if (last_tsid == InvalidOid || last_tsid != cur_tsid)
1868  {
1869  Size sz;
1870 
1871  num_spaces++;
1872 
1873  /*
1874  * Not worth adding grow-by-power-of-2 logic here - even with a
1875  * few hundred tablespaces this should be fine.
1876  */
1877  sz = sizeof(CkptTsStatus) * num_spaces;
1878 
1879  if (per_ts_stat == NULL)
1880  per_ts_stat = (CkptTsStatus *) palloc(sz);
1881  else
1882  per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
1883 
1884  s = &per_ts_stat[num_spaces - 1];
1885  memset(s, 0, sizeof(*s));
1886  s->tsId = cur_tsid;
1887 
1888  /*
1889  * The first buffer in this tablespace. As CkptBufferIds is sorted
1890  * by tablespace all (s->num_to_scan) buffers in this tablespace
1891  * will follow afterwards.
1892  */
1893  s->index = i;
1894 
1895  /*
1896  * progress_slice will be determined once we know how many buffers
1897  * are in each tablespace, i.e. after this loop.
1898  */
1899 
1900  last_tsid = cur_tsid;
1901  }
1902  else
1903  {
1904  s = &per_ts_stat[num_spaces - 1];
1905  }
1906 
1907  s->num_to_scan++;
1908  }
1909 
1910  Assert(num_spaces > 0);
1911 
1912  /*
1913  * Build a min-heap over the write-progress in the individual tablespaces,
1914  * and compute how large a portion of the total progress a single
1915  * processed buffer is.
1916  */
1917  ts_heap = binaryheap_allocate(num_spaces,
1919  NULL);
1920 
1921  for (i = 0; i < num_spaces; i++)
1922  {
1923  CkptTsStatus *ts_stat = &per_ts_stat[i];
1924 
1925  ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
1926 
1927  binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
1928  }
1929 
1930  binaryheap_build(ts_heap);
1931 
1932  /*
1933  * Iterate through to-be-checkpointed buffers and write the ones (still)
1934  * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
1935  * tablespaces; otherwise the sorting would lead to only one tablespace
1936  * receiving writes at a time, making inefficient use of the hardware.
1937  */
1938  num_processed = 0;
1939  num_written = 0;
1940  while (!binaryheap_empty(ts_heap))
1941  {
1942  BufferDesc *bufHdr = NULL;
1943  CkptTsStatus *ts_stat = (CkptTsStatus *)
1945 
1946  buf_id = CkptBufferIds[ts_stat->index].buf_id;
1947  Assert(buf_id != -1);
1948 
1949  bufHdr = GetBufferDescriptor(buf_id);
1950 
1951  num_processed++;
1952 
1953  /*
1954  * We don't need to acquire the lock here, because we're only looking
1955  * at a single bit. It's possible that someone else writes the buffer
1956  * and clears the flag right after we check, but that doesn't matter
1957  * since SyncOneBuffer will then do nothing. However, there is a
1958  * further race condition: it's conceivable that between the time we
1959  * examine the bit here and the time SyncOneBuffer acquires the lock,
1960  * someone else not only wrote the buffer but replaced it with another
1961  * page and dirtied it. In that improbable case, SyncOneBuffer will
1962  * write the buffer though we didn't need to. It doesn't seem worth
1963  * guarding against this, though.
1964  */
1966  {
1967  if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
1968  {
1969  TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
1971  num_written++;
1972  }
1973  }
1974 
1975  /*
1976  * Measure progress independent of actually having to flush the buffer
1977  * - otherwise writing become unbalanced.
1978  */
1979  ts_stat->progress += ts_stat->progress_slice;
1980  ts_stat->num_scanned++;
1981  ts_stat->index++;
1982 
1983  /* Have all the buffers from the tablespace been processed? */
1984  if (ts_stat->num_scanned == ts_stat->num_to_scan)
1985  {
1986  binaryheap_remove_first(ts_heap);
1987  }
1988  else
1989  {
1990  /* update heap with the new progress */
1991  binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
1992  }
1993 
1994  /*
1995  * Sleep to throttle our I/O rate.
1996  */
1997  CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
1998  }
1999 
2000  /* issue all pending flushes */
2001  IssuePendingWritebacks(&wb_context);
2002 
2003  pfree(per_ts_stat);
2004  per_ts_stat = NULL;
2005  binaryheap_free(ts_heap);
2006 
2007  /*
2008  * Update checkpoint statistics. As noted above, this doesn't include
2009  * buffers written by other backends or bgwriter scan.
2010  */
2011  CheckpointStats.ckpt_bufs_written += num_written;
2012 
2013  TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
2014 }
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:677
PgStat_Counter m_buf_written_checkpoints
Definition: pgstat.h:415
#define BM_PERMANENT
Definition: buf_internals.h:67
#define CHECKPOINT_FLUSH_ALL
Definition: xlog.h:181
Oid tsId
Definition: bufmgr.c:86
#define binaryheap_empty(h)
Definition: binaryheap.h:52
ForkNumber forkNum
Definition: buf_internals.h:94
#define PointerGetDatum(X)
Definition: postgres.h:564
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:66
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:4190
PgStat_MsgBgWriter BgWriterStats
Definition: pgstat.c:127
int checkpoint_flush_after
Definition: bufmgr.c:118
void binaryheap_replace_first(binaryheap *heap, Datum d)
Definition: binaryheap.c:204
unsigned int Oid
Definition: postgres_ext.h:31
#define BM_DIRTY
Definition: buf_internals.h:59
void binaryheap_add_unordered(binaryheap *heap, Datum d)
Definition: binaryheap.c:110
void IssuePendingWritebacks(WritebackContext *context)
Definition: bufmgr.c:4259
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:4213
void pfree(void *pointer)
Definition: mcxt.c:992
double float8
Definition: c.h:378
Datum binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:159
int num_to_scan
Definition: bufmgr.c:99
float8 progress_slice
Definition: bufmgr.c:96
int index
Definition: bufmgr.c:104
float8 progress
Definition: bufmgr.c:95
static int ckpt_buforder_comparator(const void *pa, const void *pb)
Definition: bufmgr.c:4158
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:176
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:265
#define BUF_WRITTEN
Definition: bufmgr.c:65
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
int ckpt_bufs_written
Definition: xlog.h:205
BlockNumber blockNum
#define InvalidOid
Definition: postgres_ext.h:36
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:126
const symbol * s
Definition: header.h:17
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *flush_context)
Definition: bufmgr.c:2331
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
CheckpointStatsData CheckpointStats
Definition: xlog.c:172
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
size_t Size
Definition: c.h:353
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:69
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:33
#define DatumGetPointer(X)
Definition: postgres.h:557
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:891
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
pg_atomic_uint32 state
Datum binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:174
int num_scanned
Definition: bufmgr.c:101
#define qsort(a, b, c, d)
Definition: port.h:440
ForkNumber forkNum
struct CkptTsStatus CkptTsStatus
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:175
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static int buffertag_comparator ( const void *  p1,
const void *  p2 
)
static

Definition at line 4127 of file bufmgr.c.

References buftag::blockNum, buftag::forkNum, buftag::rnode, and rnode_comparator().

Referenced by IssuePendingWritebacks().

4128 {
4129  const BufferTag *ba = (const BufferTag *) a;
4130  const BufferTag *bb = (const BufferTag *) b;
4131  int ret;
4132 
4133  ret = rnode_comparator(&ba->rnode, &bb->rnode);
4134 
4135  if (ret != 0)
4136  return ret;
4137 
4138  if (ba->forkNum < bb->forkNum)
4139  return -1;
4140  if (ba->forkNum > bb->forkNum)
4141  return 1;
4142 
4143  if (ba->blockNum < bb->blockNum)
4144  return -1;
4145  if (ba->blockNum > bb->blockNum)
4146  return 1;
4147 
4148  return 0;
4149 }
ForkNumber forkNum
Definition: buf_internals.h:94
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4048
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
void BufmgrCommit ( void  )

Definition at line 2574 of file bufmgr.c.

Referenced by PrepareTransaction(), and RecordTransactionCommit().

2575 {
2576  /* Nothing to do in bufmgr anymore... */
2577 }
static void CheckForBufferLeaks ( void  )
static

Definition at line 2473 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, NULL, PrintBufferLeakWarning(), PrivateRefCountArray, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

2474 {
2475 #ifdef USE_ASSERT_CHECKING
2476  int RefCountErrors = 0;
2477  PrivateRefCountEntry *res;
2478  int i;
2479 
2480  /* check the array */
2481  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
2482  {
2483  res = &PrivateRefCountArray[i];
2484 
2485  if (res->buffer != InvalidBuffer)
2486  {
2488  RefCountErrors++;
2489  }
2490  }
2491 
2492  /* if necessary search the hash */
2494  {
2495  HASH_SEQ_STATUS hstat;
2496 
2498  while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
2499  {
2501  RefCountErrors++;
2502  }
2503 
2504  }
2505 
2506  Assert(RefCountErrors == 0);
2507 #endif
2508 }
void PrintBufferLeakWarning(Buffer buffer)
Definition: bufmgr.c:2514
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:170
#define InvalidBuffer
Definition: buf.h:25
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:168
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:77
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1353
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1343
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:169
int i
void CheckPointBuffers ( int  flags)

Definition at line 2557 of file bufmgr.c.

References BufferSync(), CheckpointStats, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and smgrsync().

Referenced by CheckPointGuts().

2558 {
2559  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
2561  BufferSync(flags);
2563  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
2564  smgrsync();
2566  TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
2567 }
TimestampTz ckpt_sync_end_t
Definition: xlog.h:202
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1569
static void BufferSync(int flags)
Definition: bufmgr.c:1761
CheckpointStatsData CheckpointStats
Definition: xlog.c:172
TimestampTz ckpt_write_t
Definition: xlog.h:200
TimestampTz ckpt_sync_t
Definition: xlog.h:201
void smgrsync(void)
Definition: smgr.c:759
static int ckpt_buforder_comparator ( const void *  pa,
const void *  pb 
)
static

Definition at line 4158 of file bufmgr.c.

References CkptSortItem::blockNum, CkptSortItem::forkNum, CkptSortItem::relNode, and CkptSortItem::tsId.

Referenced by BufferSync().

4159 {
4160  const CkptSortItem *a = (CkptSortItem *) pa;
4161  const CkptSortItem *b = (CkptSortItem *) pb;
4162 
4163  /* compare tablespace */
4164  if (a->tsId < b->tsId)
4165  return -1;
4166  else if (a->tsId > b->tsId)
4167  return 1;
4168  /* compare relation */
4169  if (a->relNode < b->relNode)
4170  return -1;
4171  else if (a->relNode > b->relNode)
4172  return 1;
4173  /* compare fork */
4174  else if (a->forkNum < b->forkNum)
4175  return -1;
4176  else if (a->forkNum > b->forkNum)
4177  return 1;
4178  /* compare block number */
4179  else if (a->blockNum < b->blockNum)
4180  return -1;
4181  else /* should not be the same block ... */
4182  return 1;
4183 }
BlockNumber blockNum
ForkNumber forkNum
bool ComputeIoConcurrency ( int  io_concurrency,
double *  target 
)

Definition at line 467 of file bufmgr.c.

References i, Max, MAX_IO_CONCURRENCY, and Min.

Referenced by check_effective_io_concurrency(), and ExecInitBitmapHeapScan().

468 {
469  double new_prefetch_pages = 0.0;
470  int i;
471 
472  /*
473  * Make sure the io_concurrency value is within valid range; it may have
474  * been forced with a manual pg_tablespace update.
475  */
476  io_concurrency = Min(Max(io_concurrency, 0), MAX_IO_CONCURRENCY);
477 
478  /*----------
479  * The user-visible GUC parameter is the number of drives (spindles),
480  * which we need to translate to a number-of-pages-to-prefetch target.
481  * The target value is stashed in *extra and then assigned to the actual
482  * variable by assign_effective_io_concurrency.
483  *
484  * The expected number of prefetch pages needed to keep N drives busy is:
485  *
486  * drives | I/O requests
487  * -------+----------------
488  * 1 | 1
489  * 2 | 2/1 + 2/2 = 3
490  * 3 | 3/1 + 3/2 + 3/3 = 5 1/2
491  * 4 | 4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
492  * n | n * H(n)
493  *
494  * This is called the "coupon collector problem" and H(n) is called the
495  * harmonic series. This could be approximated by n * ln(n), but for
496  * reasonable numbers of drives we might as well just compute the series.
497  *
498  * Alternatively we could set the target to the number of pages necessary
499  * so that the expected number of active spindles is some arbitrary
500  * percentage of the total. This sounds the same but is actually slightly
501  * different. The result ends up being ln(1-P)/ln((n-1)/n) where P is
502  * that desired fraction.
503  *
504  * Experimental results show that both of these formulas aren't aggressive
505  * enough, but we don't really have any better proposals.
506  *
507  * Note that if io_concurrency = 0 (disabled), we must set target = 0.
508  *----------
509  */
510 
511  for (i = 1; i <= io_concurrency; i++)
512  new_prefetch_pages += (double) io_concurrency / (double) i;
513 
514  *target = new_prefetch_pages;
515 
516  /* This range check shouldn't fail, but let's be paranoid */
517  return (new_prefetch_pages >= 0.0 && new_prefetch_pages < (double) INT_MAX);
518 }
#define MAX_IO_CONCURRENCY
Definition: bufmgr.h:79
#define Min(x, y)
Definition: c.h:802
#define Max(x, y)
Definition: c.h:796
int i
bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 3555 of file bufmgr.c.

References Assert, buf, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, GetBufferDescriptor, LW_EXCLUSIVE, and LWLockConditionalAcquire().

Referenced by _bt_getbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

3556 {
3557  BufferDesc *buf;
3558 
3559  Assert(BufferIsValid(buffer));
3560  if (BufferIsLocal(buffer))
3561  return true; /* act as though we got it */
3562 
3563  buf = GetBufferDescriptor(buffer - 1);
3564 
3566  LW_EXCLUSIVE);
3567 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1282
static char * buf
Definition: pg_test_fsync.c:65
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 3701 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid, ConditionalLockBuffer(), GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), PrivateRefCountEntry::refcount, and UnlockBufHdr.

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), lazy_scan_heap(), and lazy_vacuum_heap().

3702 {
3703  BufferDesc *bufHdr;
3704  uint32 buf_state,
3705  refcount;
3706 
3707  Assert(BufferIsValid(buffer));
3708 
3709  if (BufferIsLocal(buffer))
3710  {
3711  refcount = LocalRefCount[-buffer - 1];
3712  /* There should be exactly one pin */
3713  Assert(refcount > 0);
3714  if (refcount != 1)
3715  return false;
3716  /* Nobody else to wait for */
3717  return true;
3718  }
3719 
3720  /* There should be exactly one local pin */
3721  refcount = GetPrivateRefCount(buffer);
3722  Assert(refcount);
3723  if (refcount != 1)
3724  return false;
3725 
3726  /* Try to acquire lock */
3727  if (!ConditionalLockBuffer(buffer))
3728  return false;
3729 
3730  bufHdr = GetBufferDescriptor(buffer - 1);
3731  buf_state = LockBufHdr(bufHdr);
3732  refcount = BUF_STATE_GET_REFCOUNT(buf_state);
3733 
3734  Assert(refcount > 0);
3735  if (refcount == 1)
3736  {
3737  /* Successfully acquired exclusive lock with pincount 1 */
3738  UnlockBufHdr(bufHdr, buf_state);
3739  return true;
3740  }
3741 
3742  /* Failed, so release the lock */
3743  UnlockBufHdr(bufHdr, buf_state);
3744  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3745  return false;
3746 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:265
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:3555
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3529
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
void DropDatabaseBuffers ( Oid  dbid)

Definition at line 3026 of file bufmgr.c.

References RelFileNode::dbNode, GetBufferDescriptor, i, InvalidateBuffer(), LockBufHdr(), NBuffers, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by dbase_redo(), dropdb(), and movedb().

3027 {
3028  int i;
3029 
3030  /*
3031  * We needn't consider local buffers, since by assumption the target
3032  * database isn't our own.
3033  */
3034 
3035  for (i = 0; i < NBuffers; i++)
3036  {
3037  BufferDesc *bufHdr = GetBufferDescriptor(i);
3038  uint32 buf_state;
3039 
3040  /*
3041  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3042  * and saves some cycles.
3043  */
3044  if (bufHdr->tag.rnode.dbNode != dbid)
3045  continue;
3046 
3047  buf_state = LockBufHdr(bufHdr);
3048  if (bufHdr->tag.rnode.dbNode == dbid)
3049  InvalidateBuffer(bufHdr); /* releases spinlock */
3050  else
3051  UnlockBufHdr(bufHdr, buf_state);
3052  }
3053 }
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1347
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:265
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
void DropRelFileNodeBuffers ( RelFileNodeBackend  rnode,
ForkNumber  forkNum,
BlockNumber  firstDelBlock 
)

Definition at line 2866 of file bufmgr.c.

References RelFileNodeBackend::backend, buftag::blockNum, DropRelFileNodeLocalBuffers(), buftag::forkNum, GetBufferDescriptor, i, InvalidateBuffer(), LockBufHdr(), MyBackendId, NBuffers, RelFileNodeBackend::node, RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrdounlinkfork(), and smgrtruncate().

2868 {
2869  int i;
2870 
2871  /* If it's a local relation, it's localbuf.c's problem. */
2872  if (RelFileNodeBackendIsTemp(rnode))
2873  {
2874  if (rnode.backend == MyBackendId)
2875  DropRelFileNodeLocalBuffers(rnode.node, forkNum, firstDelBlock);
2876  return;
2877  }
2878 
2879  for (i = 0; i < NBuffers; i++)
2880  {
2881  BufferDesc *bufHdr = GetBufferDescriptor(i);
2882  uint32 buf_state;
2883 
2884  /*
2885  * We can make this a tad faster by prechecking the buffer tag before
2886  * we attempt to lock the buffer; this saves a lot of lock
2887  * acquisitions in typical cases. It should be safe because the
2888  * caller must have AccessExclusiveLock on the relation, or some other
2889  * reason to be certain that no one is loading new pages of the rel
2890  * into the buffer pool. (Otherwise we might well miss such pages
2891  * entirely.) Therefore, while the tag might be changing while we
2892  * look at it, it can't be changing *to* a value we care about, only
2893  * *away* from such a value. So false negatives are impossible, and
2894  * false positives are safe because we'll recheck after getting the
2895  * buffer lock.
2896  *
2897  * We could check forkNum and blockNum as well as the rnode, but the
2898  * incremental win from doing so seems small.
2899  */
2900  if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
2901  continue;
2902 
2903  buf_state = LockBufHdr(bufHdr);
2904  if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
2905  bufHdr->tag.forkNum == forkNum &&
2906  bufHdr->tag.blockNum >= firstDelBlock)
2907  InvalidateBuffer(bufHdr); /* releases spinlock */
2908  else
2909  UnlockBufHdr(bufHdr, buf_state);
2910  }
2911 }
BackendId MyBackendId
Definition: globals.c:72
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
ForkNumber forkNum
Definition: buf_internals.h:94
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1347
void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber firstDelBlock)
Definition: localbuf.c:320
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:265
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
BackendId backend
Definition: relfilenode.h:75
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
void DropRelFileNodesAllBuffers ( RelFileNodeBackend rnodes,
int  nnodes 
)

Definition at line 2923 of file bufmgr.c.

References DROP_RELS_BSEARCH_THRESHOLD, DropRelFileNodeAllLocalBuffers(), GetBufferDescriptor, i, InvalidateBuffer(), LockBufHdr(), MyBackendId, NBuffers, RelFileNodeBackend::node, NULL, palloc(), pfree(), pg_qsort(), RelFileNodeBackendIsTemp, RelFileNodeEquals, buftag::rnode, rnode_comparator(), BufferDesc::tag, and UnlockBufHdr.

Referenced by smgrdounlink(), and smgrdounlinkall().

2924 {
2925  int i,
2926  n = 0;
2927  RelFileNode *nodes;
2928  bool use_bsearch;
2929 
2930  if (nnodes == 0)
2931  return;
2932 
2933  nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */
2934 
2935  /* If it's a local relation, it's localbuf.c's problem. */
2936  for (i = 0; i < nnodes; i++)
2937  {
2938  if (RelFileNodeBackendIsTemp(rnodes[i]))
2939  {
2940  if (rnodes[i].backend == MyBackendId)
2941  DropRelFileNodeAllLocalBuffers(rnodes[i].node);
2942  }
2943  else
2944  nodes[n++] = rnodes[i].node;
2945  }
2946 
2947  /*
2948  * If there are no non-local relations, then we're done. Release the
2949  * memory and return.
2950  */
2951  if (n == 0)
2952  {
2953  pfree(nodes);
2954  return;
2955  }
2956 
2957  /*
2958  * For low number of relations to drop just use a simple walk through, to
2959  * save the bsearch overhead. The threshold to use is rather a guess than
2960  * an exactly determined value, as it depends on many factors (CPU and RAM
2961  * speeds, amount of shared buffers etc.).
2962  */
2963  use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;
2964 
2965  /* sort the list of rnodes if necessary */
2966  if (use_bsearch)
2967  pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
2968 
2969  for (i = 0; i < NBuffers; i++)
2970  {
2971  RelFileNode *rnode = NULL;
2972  BufferDesc *bufHdr = GetBufferDescriptor(i);
2973  uint32 buf_state;
2974 
2975  /*
2976  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
2977  * and saves some cycles.
2978  */
2979 
2980  if (!use_bsearch)
2981  {
2982  int j;
2983 
2984  for (j = 0; j < n; j++)
2985  {
2986  if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
2987  {
2988  rnode = &nodes[j];
2989  break;
2990  }
2991  }
2992  }
2993  else
2994  {
2995  rnode = bsearch((const void *) &(bufHdr->tag.rnode),
2996  nodes, n, sizeof(RelFileNode),
2998  }
2999 
3000  /* buffer doesn't belong to any of the given relfilenodes; skip it */
3001  if (rnode == NULL)
3002  continue;
3003 
3004  buf_state = LockBufHdr(bufHdr);
3005  if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
3006  InvalidateBuffer(bufHdr); /* releases spinlock */
3007  else
3008  UnlockBufHdr(bufHdr, buf_state);
3009  }
3010 
3011  pfree(nodes);
3012 }
BackendId MyBackendId
Definition: globals.c:72
#define RelFileNodeBackendIsTemp(rnode)
Definition: relfilenode.h:78
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:1347
#define DROP_RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:68
void DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
Definition: localbuf.c:367
void pfree(void *pointer)
Definition: mcxt.c:992
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:265
static int rnode_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4048
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define NULL
Definition: c.h:226
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
Definition: qsort.c:113
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
void * palloc(Size size)
Definition: mcxt.c:891
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln 
)
static

Definition at line 2648 of file bufmgr.c.

References ErrorContextCallback::arg, BufferUsage::blk_write_time, buftag::blockNum, BM_JUST_DIRTIED, BM_PERMANENT, BufferGetLSN, BufHdrGetBlock, ErrorContextCallback::callback, RelFileNode::dbNode, error_context_stack, buftag::forkNum, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBackendId, LockBufHdr(), RelFileNodeBackend::node, NULL, PageSetChecksumCopy(), pgBufferUsage, pgstat_count_buffer_write_time, ErrorContextCallback::previous, RelFileNode::relNode, buftag::rnode, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rnode, smgropen(), smgrwrite(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdr, and XLogFlush().

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushOneBuffer(), FlushRelationBuffers(), and SyncOneBuffer().

2649 {
2650  XLogRecPtr recptr;
2651  ErrorContextCallback errcallback;
2652  instr_time io_start,
2653  io_time;
2654  Block bufBlock;
2655  char *bufToWrite;
2656  uint32 buf_state;
2657 
2658  /*
2659  * Acquire the buffer's io_in_progress lock. If StartBufferIO returns
2660  * false, then someone else flushed the buffer before we could, so we need
2661  * not do anything.
2662  */
2663  if (!StartBufferIO(buf, false))
2664  return;
2665 
2666  /* Setup error traceback support for ereport() */
2668  errcallback.arg = (void *) buf;
2669  errcallback.previous = error_context_stack;
2670  error_context_stack = &errcallback;
2671 
2672  /* Find smgr relation for buffer */
2673  if (reln == NULL)
2674  reln = smgropen(buf->tag.rnode, InvalidBackendId);
2675 
2676  TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
2677  buf->tag.blockNum,
2678  reln->smgr_rnode.node.spcNode,
2679  reln->smgr_rnode.node.dbNode,
2680  reln->smgr_rnode.node.relNode);
2681 
2682  buf_state = LockBufHdr(buf);
2683 
2684  /*
2685  * Run PageGetLSN while holding header lock, since we don't have the
2686  * buffer locked exclusively in all cases.
2687  */
2688  recptr = BufferGetLSN(buf);
2689 
2690  /* To check if block content changes while flushing. - vadim 01/17/97 */
2691  buf_state &= ~BM_JUST_DIRTIED;
2692  UnlockBufHdr(buf, buf_state);
2693 
2694  /*
2695  * Force XLOG flush up to buffer's LSN. This implements the basic WAL
2696  * rule that log updates must hit disk before any of the data-file changes
2697  * they describe do.
2698  *
2699  * However, this rule does not apply to unlogged relations, which will be
2700  * lost after a crash anyway. Most unlogged relation pages do not bear
2701  * LSNs since we never emit WAL records for them, and therefore flushing
2702  * up through the buffer LSN would be useless, but harmless. However,
2703  * GiST indexes use LSNs internally to track page-splits, and therefore
2704  * unlogged GiST pages bear "fake" LSNs generated by
2705  * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
2706  * LSN counter could advance past the WAL insertion point; and if it did
2707  * happen, attempting to flush WAL through that location would fail, with
2708  * disastrous system-wide consequences. To make sure that can't happen,
2709  * skip the flush if the buffer isn't permanent.
2710  */
2711  if (buf_state & BM_PERMANENT)
2712  XLogFlush(recptr);
2713 
2714  /*
2715  * Now it's safe to write buffer to disk. Note that no one else should
2716  * have been able to write it while we were busy with log flushing because
2717  * we have the io_in_progress lock.
2718  */
2719  bufBlock = BufHdrGetBlock(buf);
2720 
2721  /*
2722  * Update page checksum if desired. Since we have only shared lock on the
2723  * buffer, other processes might be updating hint bits in it, so we must
2724  * copy the page to private storage if we do checksumming.
2725  */
2726  bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
2727 
2728  if (track_io_timing)
2729  INSTR_TIME_SET_CURRENT(io_start);
2730 
2731  /*
2732  * bufToWrite is either the shared buffer or a copy, as appropriate.
2733  */
2734  smgrwrite(reln,
2735  buf->tag.forkNum,
2736  buf->tag.blockNum,
2737  bufToWrite,
2738  false);
2739 
2740  if (track_io_timing)
2741  {
2742  INSTR_TIME_SET_CURRENT(io_time);
2743  INSTR_TIME_SUBTRACT(io_time, io_start);
2746  }
2747 
2749 
2750  /*
2751  * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
2752  * end the io_in_progress state.
2753  */
2754  TerminateBufferIO(buf, true, 0);
2755 
2756  TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(buf->tag.forkNum,
2757  buf->tag.blockNum,
2758  reln->smgr_rnode.node.spcNode,
2759  reln->smgr_rnode.node.dbNode,
2760  reln->smgr_rnode.node.relNode);
2761 
2762  /* Pop the error context stack */
2763  error_context_stack = errcallback.previous;
2764 }
#define BM_PERMANENT
Definition: buf_internals.h:67
ForkNumber forkNum
Definition: buf_internals.h:94
struct timeval instr_time
Definition: instr_time.h:147
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1143
struct ErrorContextCallback * previous
Definition: elog.h:238
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2745
ErrorContextCallback * error_context_stack
Definition: elog.c:88
long shared_blks_written
Definition: instrument.h:24
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:3858
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:167
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:155
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:265
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:137
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:3925
#define InvalidBackendId
Definition: backendid.h:23
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:57
RelFileNode node
Definition: relfilenode.h:74
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:202
instr_time blk_write_time
Definition: instrument.h:32
#define pgstat_count_buffer_write_time(n)
Definition: pgstat.h:1177
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:153
BufferTag tag
void(* callback)(void *arg)
Definition: elog.h:239
#define UnlockBufHdr(desc, s)
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:58
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4010
bool track_io_timing
Definition: bufmgr.c:111
Pointer Page
Definition: bufpage.h:74
BufferUsage pgBufferUsage
Definition: instrument.c:20
void * Block
Definition: bufmgr.h:25
void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 3229 of file bufmgr.c.

References BM_DIRTY, BM_VALID, BufferDescriptorGetContentLock, CurrentResourceOwner, RelFileNode::dbNode, FlushBuffer(), GetBufferDescriptor, i, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NULL, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by dbase_redo().

3230 {
3231  int i;
3232  BufferDesc *bufHdr;
3233 
3234  /* Make sure we can handle the pin inside the loop */
3236 
3237  for (i = 0; i < NBuffers; i++)
3238  {
3239  uint32 buf_state;
3240 
3241  bufHdr = GetBufferDescriptor(i);
3242 
3243  /*
3244  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3245  * and saves some cycles.
3246  */
3247  if (bufHdr->tag.rnode.dbNode != dbid)
3248  continue;
3249 
3251 
3252  buf_state = LockBufHdr(bufHdr);
3253  if (bufHdr->tag.rnode.dbNode == dbid &&
3254  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3255  {
3256  PinBuffer_Locked(bufHdr);
3258  FlushBuffer(bufHdr, NULL);
3260  UnpinBuffer(bufHdr, true);
3261  }
3262  else
3263  UnlockBufHdr(bufHdr, buf_state);
3264  }
3265 }
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
#define BM_DIRTY
Definition: buf_internals.h:59
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2648
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define GetBufferDescriptor(id)
unsigned int uint32
Definition: c.h:265
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1677
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
#define BM_VALID
Definition: buf_internals.h:60
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define NULL
Definition: c.h:226
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1639
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
void FlushOneBuffer ( Buffer  buffer)

Definition at line 3272 of file bufmgr.c.

References Assert, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, FlushBuffer(), GetBufferDescriptor, LWLockHeldByMe(), and NULL.

Referenced by XLogReadBufferForRedoExtended().

3273 {
3274  BufferDesc *bufHdr;
3275 
3276  /* currently not needed, but no fundamental reason not to support */
3277  Assert(!BufferIsLocal(buffer));
3278 
3279  Assert(BufferIsPinned(buffer));
3280 
3281  bufHdr = GetBufferDescriptor(buffer - 1);
3282 
3284 
3285  FlushBuffer(bufHdr, NULL);
3286 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1830
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2648
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
void FlushRelationBuffers ( Relation  rel)

Definition at line 3131 of file bufmgr.c.

References ErrorContextCallback::arg, buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_VALID, BufferDescriptorGetContentLock, ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushBuffer(), buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, i, local_buffer_write_error_callback(), LocalBufHdrGetBlock, LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), NBuffers, NLocBuffer, PageSetChecksumInplace(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), PinBuffer_Locked(), ErrorContextCallback::previous, RelationData::rd_node, RelationData::rd_smgr, RelationOpenSmgr, RelationUsesLocalBuffers, RelFileNodeEquals, ReservePrivateRefCountEntry(), ResourceOwnerEnlargeBuffers(), buftag::rnode, smgrwrite(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr, and UnpinBuffer().

Referenced by ATExecSetTableSpace(), and heap_sync().

3132 {
3133  int i;
3134  BufferDesc *bufHdr;
3135 
3136  /* Open rel at the smgr level if not already done */
3137  RelationOpenSmgr(rel);
3138 
3139  if (RelationUsesLocalBuffers(rel))
3140  {
3141  for (i = 0; i < NLocBuffer; i++)
3142  {
3143  uint32 buf_state;
3144 
3145  bufHdr = GetLocalBufferDescriptor(i);
3146  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3147  ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
3148  (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3149  {
3150  ErrorContextCallback errcallback;
3151  Page localpage;
3152 
3153  localpage = (char *) LocalBufHdrGetBlock(bufHdr);
3154 
3155  /* Setup error traceback support for ereport() */
3157  errcallback.arg = (void *) bufHdr;
3158  errcallback.previous = error_context_stack;
3159  error_context_stack = &errcallback;
3160 
3161  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
3162 
3163  smgrwrite(rel->rd_smgr,
3164  bufHdr->tag.forkNum,
3165  bufHdr->tag.blockNum,
3166  localpage,
3167  false);
3168 
3169  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
3170  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
3171 
3172  /* Pop the error context stack */
3173  error_context_stack = errcallback.previous;
3174  }
3175  }
3176 
3177  return;
3178  }
3179 
3180  /* Make sure we can handle the pin inside the loop */
3182 
3183  for (i = 0; i < NBuffers; i++)
3184  {
3185  uint32 buf_state;
3186 
3187  bufHdr = GetBufferDescriptor(i);
3188 
3189  /*
3190  * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
3191  * and saves some cycles.
3192  */
3193  if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
3194  continue;
3195 
3197 
3198  buf_state = LockBufHdr(bufHdr);
3199  if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
3200  (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
3201  {
3202  PinBuffer_Locked(bufHdr);
3204  FlushBuffer(bufHdr, rel->rd_smgr);
3206  UnpinBuffer(bufHdr, true);
3207  }
3208  else
3209  UnlockBufHdr(bufHdr, buf_state);
3210  }
3211 }
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:61
ForkNumber forkNum
Definition: buf_internals.h:94
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:4029
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
#define GetLocalBufferDescriptor(id)
#define BM_DIRTY
Definition: buf_internals.h:59
struct ErrorContextCallback * previous
Definition: elog.h:238
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln)
Definition: bufmgr.c:2648
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
ErrorContextCallback * error_context_stack
Definition: elog.c:88
#define RelationOpenSmgr(relation)
Definition: rel.h:457
int NLocBuffer
Definition: localbuf.c:41
void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:647
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
unsigned int uint32
Definition: c.h:265
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1677
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
#define BM_VALID
Definition: buf_internals.h:60
RelFileNode rd_node
Definition: rel.h:85
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:1639
void PageSetChecksumInplace(Page page, BlockNumber blkno)
Definition: bufpage.c:1172
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:509
BufferTag tag
void(* callback)(void *arg)
Definition: elog.h:239
#define UnlockBufHdr(desc, s)
int i
int NBuffers
Definition: globals.c:122
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:287
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
pg_atomic_uint32 state
Pointer Page
Definition: bufpage.h:74
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 381 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, and REFCOUNT_ARRAY_ENTRIES.

Referenced by UnpinBuffer().

382 {
383  Assert(ref->refcount == 0);
384 
385  if (ref >= &PrivateRefCountArray[0] &&
387  {
388  ref->buffer = InvalidBuffer;
389 
390  /*
391  * Mark the just used entry as reserved - in many scenarios that
392  * allows us to avoid ever having to search the array/hash for free
393  * entries.
394  */
395  ReservedRefCountEntry = ref;
396  }
397  else
398  {
399  bool found;
400  Buffer buffer = ref->buffer;
401 
403  (void *) &buffer,
404  HASH_REMOVE,
405  &found);
406  Assert(found);
409  }
410 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:170
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:885
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:168
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:172
#define Assert(condition)
Definition: c.h:671
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:77
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:169
int Buffer
Definition: buf.h:23
static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 358 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsValid, GetPrivateRefCountEntry(), NULL, and PrivateRefCountEntry::refcount.

Referenced by ConditionalLockBufferForCleanup(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), and PrintBufferLeakWarning().

359 {
361 
362  Assert(BufferIsValid(buffer));
363  Assert(!BufferIsLocal(buffer));
364 
365  /*
366  * Not moving the entry - that's ok for the current users, but we might
367  * want to change this one day.
368  */
369  ref = GetPrivateRefCountEntry(buffer, false);
370 
371  if (ref == NULL)
372  return 0;
373  return ref->refcount;
374 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:278
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 278 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid, free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, NULL, PrivateRefCountArray, PrivateRefCountOverflowed, PrivateRefCountEntry::refcount, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountEntry, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBuffer().

279 {
281  int i;
282 
283  Assert(BufferIsValid(buffer));
284  Assert(!BufferIsLocal(buffer));
285 
286  /*
287  * First search for references in the array, that'll be sufficient in the
288  * majority of cases.
289  */
290  for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
291  {
292  res = &PrivateRefCountArray[i];
293 
294  if (res->buffer == buffer)
295  return res;
296  }
297 
298  /*
299  * By here we know that the buffer, if already pinned, isn't residing in
300  * the array.
301  *
302  * Only look up the buffer in the hashtable if we've previously overflowed
303  * into it.
304  */
305  if (PrivateRefCountOverflowed == 0)
306  return NULL;
307 
309  (void *) &buffer,
310  HASH_FIND,
311  NULL);
312 
313  if (res == NULL)
314  return NULL;
315  else if (!do_move)
316  {
317  /* caller doesn't want us to move the hash entry into the array */
318  return res;
319  }
320  else
321  {
322  /* move buffer from hashtable into the free array slot */
323  bool found;
325 
326  /* Ensure there's a free array slot */
328 
329  /* Use up the reserved slot */
331  free = ReservedRefCountEntry;
333  Assert(free->buffer == InvalidBuffer);
334 
335  /* and fill it */
336  free->buffer = buffer;
337  free->refcount = res->refcount;
338 
339  /* delete from hashtable */
341  (void *) &buffer,
342  HASH_REMOVE,
343  &found);
344  Assert(found);
347 
348  return free;
349  }
350 }
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:170
#define InvalidBuffer
Definition: buf.h:25
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:885
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:168
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:172
#define free(a)
Definition: header.h:60
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:77
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:169
int i
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 3675 of file bufmgr.c.

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and RecoveryConflictInterrupt().

3676 {
3677  int bufid = GetStartupBufferPinWaitBufId();
3678 
3679  /*
3680  * If we get woken slowly then it's possible that the Startup process was
3681  * already woken by other backends before we got here. Also possible that
3682  * we get here by multiple interrupts or interrupts at inappropriate
3683  * times, so make sure we do nothing if the bufid is not set.
3684  */
3685  if (bufid < 0)
3686  return false;
3687 
3688  if (GetPrivateRefCount(bufid + 1) > 0)
3689  return true;
3690 
3691  return false;
3692 }
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:623
void IncrBufferRefCount ( Buffer  buffer)

Definition at line 3330 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, GetPrivateRefCountEntry(), LocalRefCount, NULL, PrivateRefCountEntry::refcount, ResourceOwnerEnlargeBuffers(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ExecStoreTuple(), ReadBufferBI(), scanPostingTree(), and startScanEntry().

3331 {
3332  Assert(BufferIsPinned(buffer));
3335  if (BufferIsLocal(buffer))
3336  LocalRefCount[-buffer - 1]++;
3337  else
3338  {
3339  PrivateRefCountEntry *ref;
3340 
3341  ref = GetPrivateRefCountEntry(buffer, true);
3342  Assert(ref != NULL);
3343  ref->refcount++;
3344  }
3345 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:278
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:855
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
int32 * LocalRefCount
Definition: localbuf.c:45
void InitBufferPoolAccess ( void  )

Definition at line 2420 of file bufmgr.c.

References HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MemSet, and PrivateRefCountArray.

Referenced by BaseInit().

2421 {
2422  HASHCTL hash_ctl;
2423 
2424  memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
2425 
2426  MemSet(&hash_ctl, 0, sizeof(hash_ctl));
2427  hash_ctl.keysize = sizeof(int32);
2428  hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
2429 
2430  PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
2431  HASH_ELEM | HASH_BLOBS);
2432 }
struct PrivateRefCountEntry PrivateRefCountEntry
#define HASH_ELEM
Definition: hsearch.h:87
Size entrysize
Definition: hsearch.h:73
#define MemSet(start, val, len)
Definition: c.h:853
signed int int32
Definition: c.h:253
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:168
#define HASH_BLOBS
Definition: hsearch.h:88
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:301
Size keysize
Definition: hsearch.h:72
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:169
void InitBufferPoolBackend ( void  )

Definition at line 2444 of file bufmgr.c.

References AtProcExit_Buffers(), and on_shmem_exit().

Referenced by AuxiliaryProcessMain(), and InitPostgres().

2445 {
2447 }
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:348
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:2454
static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 1347 of file bufmgr.c.

References Assert, BM_LOCKED, BM_TAG_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer, BUFFERTAGS_EQUAL, BufMappingPartitionLock, BufTableDelete(), BufTableHashCode(), CLEAR_BUFFERTAG, elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, StrategyFreeBuffer(), BufferDesc::tag, UnlockBufHdr, and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelFileNodeBuffers(), and DropRelFileNodesAllBuffers().

1348 {
1349  BufferTag oldTag;
1350  uint32 oldHash; /* hash value for oldTag */
1351  LWLock *oldPartitionLock; /* buffer partition lock for it */
1352  uint32 oldFlags;
1353  uint32 buf_state;
1354 
1355  /* Save the original buffer tag before dropping the spinlock */
1356  oldTag = buf->tag;
1357 
1358  buf_state = pg_atomic_read_u32(&buf->state);
1359  Assert(buf_state & BM_LOCKED);
1360  UnlockBufHdr(buf, buf_state);
1361 
1362  /*
1363  * Need to compute the old tag's hashcode and partition lock ID. XXX is it
1364  * worth storing the hashcode in BufferDesc so we need not recompute it
1365  * here? Probably not.
1366  */
1367  oldHash = BufTableHashCode(&oldTag);
1368  oldPartitionLock = BufMappingPartitionLock(oldHash);
1369 
1370 retry:
1371 
1372  /*
1373  * Acquire exclusive mapping lock in preparation for changing the buffer's
1374  * association.
1375  */
1376  LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
1377 
1378  /* Re-lock the buffer header */
1379  buf_state = LockBufHdr(buf);
1380 
1381  /* If it's changed while we were waiting for lock, do nothing */
1382  if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
1383  {
1384  UnlockBufHdr(buf, buf_state);
1385  LWLockRelease(oldPartitionLock);
1386  return;
1387  }
1388 
1389  /*
1390  * We assume the only reason for it to be pinned is that someone else is
1391  * flushing the page out. Wait for them to finish. (This could be an
1392  * infinite loop if the refcount is messed up... it would be nice to time
1393  * out after awhile, but there seems no way to be sure how many loops may
1394  * be needed. Note that if the other guy has pinned the buffer but not
1395  * yet done StartBufferIO, WaitIO will fall through and we'll effectively
1396  * be busy-looping here.)
1397  */
1398  if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
1399  {
1400  UnlockBufHdr(buf, buf_state);
1401  LWLockRelease(oldPartitionLock);
1402  /* safety check: should definitely not be our *own* pin */
1404  elog(ERROR, "buffer is pinned in InvalidateBuffer");
1405  WaitIO(buf);
1406  goto retry;
1407  }
1408 
1409  /*
1410  * Clear out the buffer's tag and flags. We must do this to ensure that
1411  * linear scans of the buffer array don't think the buffer is valid.
1412  */
1413  oldFlags = buf_state & BUF_FLAG_MASK;
1414  CLEAR_BUFFERTAG(buf->tag);
1415  buf_state &= ~(BUF_FLAG_MASK | BUF_USAGECOUNT_MASK);
1416  UnlockBufHdr(buf, buf_state);
1417 
1418  /*
1419  * Remove the buffer from the lookup hashtable, if it was in there.
1420  */
1421  if (oldFlags & BM_TAG_VALID)
1422  BufTableDelete(&oldTag, oldHash);
1423 
1424  /*
1425  * Done with mapping lock.
1426  */
1427  LWLockRelease(oldPartitionLock);
1428 
1429  /*
1430  * Insert the buffer at the head of the list of free buffers.
1431  */
1432  StrategyFreeBuffer(buf);
1433 }
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:150
Definition: lwlock.h:32
#define BufMappingPartitionLock(hashcode)
#define BM_TAG_VALID
Definition: buf_internals.h:61
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:3811
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:80
void StrategyFreeBuffer(BufferDesc *buf)
Definition: freelist.c:347
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define ERROR
Definition: elog.h:43
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:265
#define BUFFERTAGS_EQUAL(a, b)
#define BM_LOCKED
Definition: buf_internals.h:58
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define Assert(condition)
Definition: c.h:671
#define CLEAR_BUFFERTAG(a)
Definition: buf_internals.h:98
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:43
#define BufferDescriptorGetBuffer(bdesc)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
BufferTag tag
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
#define elog
Definition: elog.h:219
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 3757 of file bufmgr.c.

References Assert, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, GetBufferDescriptor, GetPrivateRefCount(), LocalRefCount, LockBufHdr(), LW_EXCLUSIVE, LWLockHeldByMeInMode(), and UnlockBufHdr.

Referenced by _hash_doinsert(), _hash_expandtable(), and hashbucketcleanup().

3758 {
3759  BufferDesc *bufHdr;
3760  uint32 buf_state;
3761 
3762  Assert(BufferIsValid(buffer));
3763 
3764  if (BufferIsLocal(buffer))
3765  {
3766  /* There should be exactly one pin */
3767  if (LocalRefCount[-buffer - 1] != 1)
3768  return false;
3769  /* Nobody else to wait for */
3770  return true;
3771  }
3772 
3773  /* There should be exactly one local pin */
3774  if (GetPrivateRefCount(buffer) != 1)
3775  return false;
3776 
3777  bufHdr = GetBufferDescriptor(buffer - 1);
3778 
3779  /* caller must hold exclusive lock on buffer */
3781  LW_EXCLUSIVE));
3782 
3783  buf_state = LockBufHdr(bufHdr);
3784 
3785  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3786  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
3787  {
3788  /* pincount is OK. */
3789  UnlockBufHdr(bufHdr, buf_state);
3790  return true;
3791  }
3792 
3793  UnlockBufHdr(bufHdr, buf_state);
3794  return false;
3795 }
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1848
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:265
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define UnlockBufHdr(desc, s)
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
void IssuePendingWritebacks ( WritebackContext context)

Definition at line 4259 of file bufmgr.c.

References buftag::blockNum, buffertag_comparator(), cur, buftag::forkNum, i, InvalidBackendId, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, qsort, RelFileNodeEquals, buftag::rnode, smgropen(), smgrwriteback(), and PendingWriteback::tag.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

4260 {
4261  int i;
4262 
4263  if (context->nr_pending == 0)
4264  return;
4265 
4266  /*
4267  * Executing the writes in-order can make them a lot faster, and allows to
4268  * merge writeback requests to consecutive blocks into larger writebacks.
4269  */
4270  qsort(&context->pending_writebacks, context->nr_pending,
4272 
4273  /*
4274  * Coalesce neighbouring writes, but nothing else. For that we iterate
4275  * through the, now sorted, array of pending flushes, and look forward to
4276  * find all neighbouring (or identical) writes.
4277  */
4278  for (i = 0; i < context->nr_pending; i++)
4279  {
4282  SMgrRelation reln;
4283  int ahead;
4284  BufferTag tag;
4285  Size nblocks = 1;
4286 
4287  cur = &context->pending_writebacks[i];
4288  tag = cur->tag;
4289 
4290  /*
4291  * Peek ahead, into following writeback requests, to see if they can
4292  * be combined with the current one.
4293  */
4294  for (ahead = 0; i + ahead + 1 < context->nr_pending; ahead++)
4295  {
4296  next = &context->pending_writebacks[i + ahead + 1];
4297 
4298  /* different file, stop */
4299  if (!RelFileNodeEquals(cur->tag.rnode, next->tag.rnode) ||
4300  cur->tag.forkNum != next->tag.forkNum)
4301  break;
4302 
4303  /* ok, block queued twice, skip */
4304  if (cur->tag.blockNum == next->tag.blockNum)
4305  continue;
4306 
4307  /* only merge consecutive writes */
4308  if (cur->tag.blockNum + 1 != next->tag.blockNum)
4309  break;
4310 
4311  nblocks++;
4312  cur = next;
4313  }
4314 
4315  i += ahead;
4316 
4317  /* and finally tell the kernel to write the data to storage */
4318  reln = smgropen(tag.rnode, InvalidBackendId);
4319  smgrwriteback(reln, tag.forkNum, tag.blockNum, nblocks);
4320  }
4321 
4322  context->nr_pending = 0;
4323 }
static int32 next
Definition: blutils.c:210
ForkNumber forkNum
Definition: buf_internals.h:94
struct cursor * cur
Definition: ecpg.c:28
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:660
static int buffertag_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:4127
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:137
#define InvalidBackendId
Definition: backendid.h:23
size_t Size
Definition: c.h:353
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
int i
#define qsort(a, b, c, d)
Definition: port.h:440
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 4029 of file bufmgr.c.

References buftag::blockNum, errcontext, buftag::forkNum, MyBackendId, NULL, pfree(), relpathbackend, buftag::rnode, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

4030 {
4031  BufferDesc *bufHdr = (BufferDesc *) arg;
4032 
4033  if (bufHdr != NULL)
4034  {
4035  char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId,
4036  bufHdr->tag.forkNum);
4037 
4038  errcontext("writing block %u of relation %s",
4039  bufHdr->tag.blockNum, path);
4040  pfree(path);
4041  }
4042 }
BackendId MyBackendId
Definition: globals.c:72
ForkNumber forkNum
Definition: buf_internals.h:94
void pfree(void *pointer)
Definition: mcxt.c:992
#define NULL
Definition: c.h:226
BlockNumber blockNum
Definition: buf_internals.h:95
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
#define errcontext
Definition: elog.h:164
void * arg
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:62
void LockBuffer ( Buffer  buffer,
int  mode 
)

Definition at line 3529 of file bufmgr.c.

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsValid, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), and LWLockRelease().

Referenced by _bt_doinsert(), _bt_drop_lock_and_maybe_pin(), _bt_endpoint(), _bt_first(), _bt_getbuf(), _bt_getroot(), _bt_killitems(), _bt_moveright(), _bt_pagedel(), _bt_readnextpage(), _bt_relandgetbuf(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_getnewbuf(), _hash_getovflpage(), _hash_metapinit(), _hash_readnext(), _hash_readprev(), _hash_splitbucket_guts(), _hash_squeezebucket(), acquire_sample_rows(), bitgetpage(), blbulkdelete(), blgetbitmap(), blinsert(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), brinbuild(), brinbuildempty(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_page_items(), bt_page_stats(), btree_xlog_delete_get_latestRemovedXid(), btvacuumpage(), checkXLogConsistency(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), copy_heap_data(), count_nondeletable_pages(), entryLoadMoreItems(), ExecCheckHeapTupleVisible(), fill_seq_with_data(), FreeSpaceMapTruncateRel(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetTupleForTrigger(), GetVisibilityMapPins(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistbuildempty(), gistbulkdelete(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuumcleanup(), hashbucketcleanup(), hashbulkdelete(), hashgettuple(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_hot_search(), heap_inplace_update(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_page_prune_opt(), heap_update(), heap_xlog_visible(), heapgetpage(), heapgettup(), index_fetch_heap(), IndexBuildHeapRangeScan(), initBloomState(), lazy_scan_heap(), LockBufferForCleanup(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddExtraBlocks(), RelationGetBufferForTuple(), revmap_physical_extend(), RI_FKey_check(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), systable_recheck_tuple(), tablesample_getnext(), UnlockReleaseBuffer(), validate_index_heapscan(), visibilitymap_clear(), visibilitymap_set(), visibilitymap_truncate(), XLogReadBufferExtended(), XLogReadBufferForRedoExtended(), and XLogRecordPageWithFreeSpace().

3530 {
3531  BufferDesc *buf;
3532 
3533  Assert(BufferIsValid(buffer));
3534  if (BufferIsLocal(buffer))
3535  return; /* local buffers need no lock */
3536 
3537  buf = GetBufferDescriptor(buffer - 1);
3538 
3539  if (mode == BUFFER_LOCK_UNLOCK)
3541  else if (mode == BUFFER_LOCK_SHARE)
3543  else if (mode == BUFFER_LOCK_EXCLUSIVE)
3545  else
3546  elog(ERROR, "unrecognized buffer lock mode: %d", mode);
3547 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define ERROR
Definition: elog.h:43
static char * buf
Definition: pg_test_fsync.c:65
#define GetBufferDescriptor(id)
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:88
#define elog
Definition: elog.h:219
void LockBufferForCleanup ( Buffer  buffer)

Definition at line 3586 of file bufmgr.c.

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid, elog, ERROR, GetBufferDescriptor, GetPrivateRefCount(), InHotStandby, LocalRefCount, LockBuffer(), LockBufHdr(), MyProcPid, NULL, PG_WAIT_BUFFER_PIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), SetStartupBufferPinWaitBufId(), UnlockBufHdr, and BufferDesc::wait_backend_pid.

Referenced by btree_xlog_vacuum(), btvacuumpage(), btvacuumscan(), ginVacuumPostingTreeLeaves(), hashbulkdelete(), lazy_scan_heap(), ReadBuffer_common(), and XLogReadBufferForRedoExtended().

3587 {
3588  BufferDesc *bufHdr;
3589 
3590  Assert(BufferIsValid(buffer));
3592 
3593  if (BufferIsLocal(buffer))
3594  {
3595  /* There should be exactly one pin */
3596  if (LocalRefCount[-buffer - 1] != 1)
3597  elog(ERROR, "incorrect local pin count: %d",
3598  LocalRefCount[-buffer - 1]);
3599  /* Nobody else to wait for */
3600  return;
3601  }
3602 
3603  /* There should be exactly one local pin */
3604  if (GetPrivateRefCount(buffer) != 1)
3605  elog(ERROR, "incorrect local pin count: %d",
3606  GetPrivateRefCount(buffer));
3607 
3608  bufHdr = GetBufferDescriptor(buffer - 1);
3609 
3610  for (;;)
3611  {
3612  uint32 buf_state;
3613 
3614  /* Try to acquire lock */
3616  buf_state = LockBufHdr(bufHdr);
3617 
3618  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3619  if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
3620  {
3621  /* Successfully acquired exclusive lock with pincount 1 */
3622  UnlockBufHdr(bufHdr, buf_state);
3623  return;
3624  }
3625  /* Failed, so mark myself as waiting for pincount 1 */
3626  if (buf_state & BM_PIN_COUNT_WAITER)
3627  {
3628  UnlockBufHdr(bufHdr, buf_state);
3629  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3630  elog(ERROR, "multiple backends attempting to wait for pincount 1");
3631  }
3632  bufHdr->wait_backend_pid = MyProcPid;
3633  PinCountWaitBuf = bufHdr;
3634  buf_state |= BM_PIN_COUNT_WAITER;
3635  UnlockBufHdr(bufHdr, buf_state);
3636  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3637 
3638  /* Wait to be signaled by UnpinBuffer() */
3639  if (InHotStandby)
3640  {
3641  /* Publish the bufid that Startup process waits on */
3642  SetStartupBufferPinWaitBufId(buffer - 1);
3643  /* Set alarm and then wait to be signaled by UnpinBuffer() */
3645  /* Reset the published bufid */
3647  }
3648  else
3650 
3651  /*
3652  * Remove flag marking us as waiter. Normally this will not be set
3653  * anymore, but ProcWaitForSignal() can return for other signals as
3654  * well. We take care to only reset the flag if we're the waiter, as
3655  * theoretically another backend could have started waiting. That's
3656  * impossible with the current usages due to table level locking, but
3657  * better be safe.
3658  */
3659  buf_state = LockBufHdr(bufHdr);
3660  if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
3661  bufHdr->wait_backend_pid == MyProcPid)
3662  buf_state &= ~BM_PIN_COUNT_WAITER;
3663  UnlockBufHdr(bufHdr, buf_state);
3664 
3666  /* Loop back and try again */
3667  }
3668 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
int MyProcPid
Definition: globals.c:38
int wait_backend_pid
#define InHotStandby
Definition: xlog.h:74
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:89
#define ERROR
Definition: elog.h:43
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:434
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:611
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:265
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1739
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3529
#define PG_WAIT_BUFFER_PIN
Definition: pgstat.h:720
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define UnlockBufHdr(desc, s)
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:136
#define elog
Definition: elog.h:219
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:65
uint32 LockBufHdr ( BufferDesc desc)

Definition at line 4075 of file bufmgr.c.

References BM_LOCKED, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), DropDatabaseBuffers(), DropRelFileNodeBuffers(), DropRelFileNodesAllBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), GetBufferFromRing(), InvalidateBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), pg_buffercache_pages(), ReadBuffer_common(), StartBufferIO(), StrategyGetBuffer(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), UnpinBuffer(), and WaitIO().

4076 {
4077  SpinDelayStatus delayStatus;
4078  uint32 old_buf_state;
4079 
4080  init_local_spin_delay(&delayStatus);
4081 
4082  while (true)
4083  {
4084  /* set BM_LOCKED flag */
4085  old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
4086  /* if it wasn't set before we're OK */
4087  if (!(old_buf_state & BM_LOCKED))
4088  break;
4089  perform_spin_delay(&delayStatus);
4090  }
4091  finish_spin_delay(&delayStatus);
4092  return old_buf_state | BM_LOCKED;
4093 }
#define init_local_spin_delay(status)
Definition: s_lock.h:997
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:175
unsigned int uint32
Definition: c.h:265
#define BM_LOCKED
Definition: buf_internals.h:58
pg_atomic_uint32 state
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:382
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:125
void MarkBufferDirty ( Buffer  buffer)

Definition at line 1445 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferIsLocal, BufferIsPinned, BufferIsValid, elog, ERROR, GetBufferDescriptor, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newroot(), _bt_restore_meta(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_getovflpage(), _hash_initbitmap(), _hash_metapinit(), _hash_splitbucket_guts(), _hash_squeezebucket(), addLeafTuple(), AlterSequence(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), do_setval(), doPickSplit(), fill_seq_with_data(), FreeSpaceMapTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreateIndex(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistbulkdelete(), gistplacetopage(), gistRedoClearFollowRight(), gistRedoCreateIndex(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_inplace_update(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune(), heap_update(), heap_xlog_clean(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_freeze_page(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_heap(), lazy_vacuum_page(), moveLeafs(), nextval_internal(), RelationAddExtraBlocks(), revmap_physical_extend(), saveNodeLink(), seq_redo(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoCreateIndex(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_set(), visibilitymap_truncate(), writeListPage(), and XLogReadBufferForRedoExtended().

1446 {
1447  BufferDesc *bufHdr;
1448  uint32 buf_state;
1449  uint32 old_buf_state;
1450 
1451  if (!BufferIsValid(buffer))
1452  elog(ERROR, "bad buffer ID: %d", buffer);
1453 
1454  if (BufferIsLocal(buffer))
1455  {
1456  MarkLocalBufferDirty(buffer);
1457  return;
1458  }
1459 
1460  bufHdr = GetBufferDescriptor(buffer - 1);
1461 
1462  Assert(BufferIsPinned(buffer));
1464  LW_EXCLUSIVE));
1465 
1466  old_buf_state = pg_atomic_read_u32(&bufHdr->state);
1467  for (;;)
1468  {
1469  if (old_buf_state & BM_LOCKED)
1470  old_buf_state = WaitBufHdrUnlocked(bufHdr);
1471 
1472  buf_state = old_buf_state;
1473 
1474  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
1475  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
1476 
1477  if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
1478  buf_state))
1479  break;
1480  }
1481 
1482  /*
1483  * If the buffer was not dirty already, do vacuum accounting.
1484  */
1485  if (!(old_buf_state & BM_DIRTY))
1486  {
1487  VacuumPageDirty++;
1489  if (VacuumCostActive)
1491  }
1492 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1848
int VacuumCostBalance
Definition: globals.c:138
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:321
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:130
#define ERROR
Definition: elog.h:43
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
long shared_blks_dirtied
Definition: instrument.h:23
unsigned int uint32
Definition: c.h:265
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:280
#define BM_LOCKED
Definition: buf_internals.h:58
int VacuumPageDirty
Definition: globals.c:136
#define BufferDescriptorGetContentLock(bdesc)
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4103
pg_atomic_uint32 state
#define elog
Definition: elog.h:219
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:139
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 3362 of file bufmgr.c.

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock, BufferGetPage, BufferIsLocal, BufferIsValid, PGXACT::delayChkpt, elog, ERROR, GetBufferDescriptor, GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), LWLockHeldByMe(), MarkLocalBufferDirty(), MyPgXact, PageSetLSN, pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), BufferUsage::shared_blks_dirtied, BufferDesc::state, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, VacuumPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsInvalid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), brin_start_evacuating_page(), btvacuumpage(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), hashgettuple(), heap_page_prune(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

3363 {
3364  BufferDesc *bufHdr;
3365  Page page = BufferGetPage(buffer);
3366 
3367  if (!BufferIsValid(buffer))
3368  elog(ERROR, "bad buffer ID: %d", buffer);
3369 
3370  if (BufferIsLocal(buffer))
3371  {
3372  MarkLocalBufferDirty(buffer);
3373  return;
3374  }
3375 
3376  bufHdr = GetBufferDescriptor(buffer - 1);
3377 
3378  Assert(GetPrivateRefCount(buffer) > 0);
3379  /* here, either share or exclusive lock is OK */
3381 
3382  /*
3383  * This routine might get called many times on the same page, if we are
3384  * making the first scan after commit of an xact that added/deleted many
3385  * tuples. So, be as quick as we can if the buffer is already dirty. We
3386  * do this by not acquiring spinlock if it looks like the status bits are
3387  * already set. Since we make this test unlocked, there's a chance we
3388  * might fail to notice that the flags have just been cleared, and failed
3389  * to reset them, due to memory-ordering issues. But since this function
3390  * is only intended to be used in cases where failing to write out the
3391  * data would be harmless anyway, it doesn't really matter.
3392  */
3393  if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
3395  {
3397  bool dirtied = false;
3398  bool delayChkpt = false;
3399  uint32 buf_state;
3400 
3401  /*
3402  * If we need to protect hint bit updates from torn writes, WAL-log a
3403  * full page image of the page. This full page image is only necessary
3404  * if the hint bit update is the first change to the page since the
3405  * last checkpoint.
3406  *
3407  * We don't check full_page_writes here because that logic is included
3408  * when we call XLogInsert() since the value changes dynamically.
3409  */
3410  if (XLogHintBitIsNeeded() &&
3411  (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT))
3412  {
3413  /*
3414  * If we're in recovery we cannot dirty a page because of a hint.
3415  * We can set the hint, just not dirty the page as a result so the
3416  * hint is lost when we evict the page or shutdown.
3417  *
3418  * See src/backend/storage/page/README for longer discussion.
3419  */
3420  if (RecoveryInProgress())
3421  return;
3422 
3423  /*
3424  * If the block is already dirty because we either made a change
3425  * or set a hint already, then we don't need to write a full page
3426  * image. Note that aggressive cleaning of blocks dirtied by hint
3427  * bit setting would increase the call rate. Bulk setting of hint
3428  * bits would reduce the call rate...
3429  *
3430  * We must issue the WAL record before we mark the buffer dirty.
3431  * Otherwise we might write the page before we write the WAL. That
3432  * causes a race condition, since a checkpoint might occur between
3433  * writing the WAL record and marking the buffer dirty. We solve
3434  * that with a kluge, but one that is already in use during
3435  * transaction commit to prevent race conditions. Basically, we
3436  * simply prevent the checkpoint WAL record from being written
3437  * until we have marked the buffer dirty. We don't start the
3438  * checkpoint flush until we have marked dirty, so our checkpoint
3439  * must flush the change to disk successfully or the checkpoint
3440  * never gets written, so crash recovery will fix.
3441  *
3442  * It's possible we may enter here without an xid, so it is
3443  * essential that CreateCheckpoint waits for virtual transactions
3444  * rather than full transactionids.
3445  */
3446  MyPgXact->delayChkpt = delayChkpt = true;
3447  lsn = XLogSaveBufferForHint(buffer, buffer_std);
3448  }
3449 
3450  buf_state = LockBufHdr(bufHdr);
3451 
3452  Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3453 
3454  if (!(buf_state & BM_DIRTY))
3455  {
3456  dirtied = true; /* Means "will be dirtied by this action" */
3457 
3458  /*
3459  * Set the page LSN if we wrote a backup block. We aren't supposed
3460  * to set this when only holding a share lock but as long as we
3461  * serialise it somehow we're OK. We choose to set LSN while
3462  * holding the buffer header lock, which causes any reader of an
3463  * LSN who holds only a share lock to also obtain a buffer header
3464  * lock before using PageGetLSN(), which is enforced in
3465  * BufferGetLSNAtomic().
3466  *
3467  * If checksums are enabled, you might think we should reset the
3468  * checksum here. That will happen when the page is written
3469  * sometime later in this checkpoint cycle.
3470  */
3471  if (!XLogRecPtrIsInvalid(lsn))
3472  PageSetLSN(page, lsn);
3473  }
3474 
3475  buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
3476  UnlockBufHdr(bufHdr, buf_state);
3477 
3478  if (delayChkpt)
3479  MyPgXact->delayChkpt = false;
3480 
3481  if (dirtied)
3482  {
3483  VacuumPageDirty++;
3485  if (VacuumCostActive)
3487  }
3488  }
3489 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define BM_PERMANENT
Definition: buf_internals.h:67
int VacuumCostBalance
Definition: globals.c:138
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1830
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:894
bool RecoveryInProgress(void)
Definition: xlog.c:7805
#define BM_DIRTY
Definition: buf_internals.h:59
int VacuumCostPageDirty
Definition: globals.c:130
PGXACT * MyPgXact
Definition: proc.c:68
#define ERROR
Definition: elog.h:43
#define GetBufferDescriptor(id)
#define BM_JUST_DIRTIED
Definition: buf_internals.h:64
long shared_blks_dirtied
Definition: instrument.h:23
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:265
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
bool delayChkpt
Definition: proc.h:210
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:280
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
int VacuumPageDirty
Definition: globals.c:136
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
#define elog
Definition: elog.h:219
#define PageSetLSN(page, lsn)
Definition: bufpage.h:365
#define XLogHintBitIsNeeded()
Definition: xlog.h:156
Pointer Page
Definition: bufpage.h:74
BufferUsage pgBufferUsage
Definition: instrument.c:20
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
bool VacuumCostActive
Definition: globals.c:139
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 252 of file bufmgr.c.

References Assert, PrivateRefCountEntry::buffer, NULL, PrivateRefCountEntry::refcount, and ReservedRefCountEntry.

Referenced by PinBuffer(), and PinBuffer_Locked().

253 {
255 
256  /* only allowed to be called when a reservation has been made */
258 
259  /* use up the reserved entry */
260  res = ReservedRefCountEntry;
262 
263  /* and fill it */
264  res->buffer = buffer;
265  res->refcount = 0;
266 
267  return res;
268 }
static PrivateRefCountEntry * ReservedRefCountEntry
Definition: bufmgr.c:172
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy 
)
static

Definition at line 1566 of file bufmgr.c.

References Assert, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), NULL, pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ReservePrivateRefCountEntry(), ResourceOwnerRememberBuffer(), BufferDesc::state, and WaitBufHdrUnlocked().

Referenced by BufferAlloc().

1567 {
1569  bool result;
1570  PrivateRefCountEntry *ref;
1571 
1572  ref = GetPrivateRefCountEntry(b, true);
1573 
1574  if (ref == NULL)
1575  {
1576  uint32 buf_state;
1577  uint32 old_buf_state;
1578 
1580  ref = NewPrivateRefCountEntry(b);
1581 
1582  old_buf_state = pg_atomic_read_u32(&buf->state);
1583  for (;;)
1584  {
1585  if (old_buf_state & BM_LOCKED)
1586  old_buf_state = WaitBufHdrUnlocked(buf);
1587 
1588  buf_state = old_buf_state;
1589 
1590  /* increase refcount */
1591  buf_state += BUF_REFCOUNT_ONE;
1592 
1593  /* increase usagecount unless already max */
1594  if (BUF_STATE_GET_USAGECOUNT(buf_state) != BM_MAX_USAGE_COUNT)
1595  buf_state += BUF_USAGECOUNT_ONE;
1596 
1597  if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
1598  buf_state))
1599  {
1600  result = (buf_state & BM_VALID) != 0;
1601  break;
1602  }
1603  }
1604  }
1605  else
1606  {
1607  /* If we previously pinned the buffer, it must surely be valid */
1608  result = true;
1609  }
1610 
1611  ref->refcount++;
1612  Assert(ref->refcount > 0);
1614  return result;
1615 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:278
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:321
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:855
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:252
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:44
unsigned int uint32
Definition: c.h:265
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define BM_VALID
Definition: buf_internals.h:60
int result
Definition: header.h:19
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define BufferDescriptorGetBuffer(bdesc)
static uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:4103
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:77
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:186
pg_atomic_uint32 state
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:50
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 1639 of file bufmgr.c.

References Assert, BM_LOCKED, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer, CurrentResourceOwner, GetPrivateRefCountEntry(), NewPrivateRefCountEntry(), NULL, pg_atomic_read_u32(), PrivateRefCountEntry::refcount, ResourceOwnerRememberBuffer(), BufferDesc::state, and UnlockBufHdr.

Referenced by BufferAlloc(), FlushDatabaseBuffers(), FlushRelationBuffers(), and SyncOneBuffer().

1640 {
1641  Buffer b;
1642  PrivateRefCountEntry *ref;
1643  uint32 buf_state;
1644 
1645  /*
1646  * As explained, We don't expect any preexisting pins. That allows us to
1647  * manipulate the PrivateRefCount after releasing the spinlock
1648  */
1650 
1651  /*
1652  * Since we hold the buffer spinlock, we can update the buffer state and
1653  * release the lock in one operation.
1654  */
1655  buf_state = pg_atomic_read_u32(&buf->state);
1656  Assert(buf_state & BM_LOCKED);
1657  buf_state += BUF_REFCOUNT_ONE;
1658  UnlockBufHdr(buf, buf_state);
1659 
1660  b = BufferDescriptorGetBuffer(buf);
1661 
1662  ref = NewPrivateRefCountEntry(b);
1663  ref->refcount++;
1664 
1666 }
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:278
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:855
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:252
unsigned int uint32
Definition: c.h:265
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:41
#define BM_LOCKED
Definition: buf_internals.h:58
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define BufferDescriptorGetBuffer(bdesc)
#define UnlockBufHdr(desc, s)
pg_atomic_uint32 state
int Buffer
Definition: buf.h:23
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
void PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 529 of file bufmgr.c.

References Assert, BlockNumberIsValid, BufMappingPartitionLock, BufTableHashCode(), BufTableLookup(), ereport, errcode(), errmsg(), ERROR, INIT_BUFFERTAG, LocalPrefetchBuffer(), LW_SHARED, LWLockAcquire(), LWLockRelease(), RelFileNodeBackend::node, RelationData::rd_smgr, RELATION_IS_OTHER_TEMP, RelationIsValid, RelationOpenSmgr, RelationUsesLocalBuffers, SMgrRelationData::smgr_rnode, and smgrprefetch().

Referenced by BitmapHeapNext(), count_nondeletable_pages(), and pg_prewarm().

530 {
531 #ifdef USE_PREFETCH
532  Assert(RelationIsValid(reln));
533  Assert(BlockNumberIsValid(blockNum));
534 
535  /* Open it at the smgr level if not already done */
536  RelationOpenSmgr(reln);
537 
538  if (RelationUsesLocalBuffers(reln))
539  {
540  /* see comments in ReadBufferExtended */
541  if (RELATION_IS_OTHER_TEMP(reln))
542  ereport(ERROR,
543  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
544  errmsg("cannot access temporary tables of other sessions")));
545 
546  /* pass it off to localbuf.c */
547  LocalPrefetchBuffer(reln->rd_smgr, forkNum, blockNum);
548  }
549  else
550  {
551  BufferTag newTag; /* identity of requested block */
552  uint32 newHash; /* hash value for newTag */
553  LWLock *newPartitionLock; /* buffer partition lock for it */
554  int buf_id;
555 
556  /* create a tag so we can lookup the buffer */
557  INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node,
558  forkNum, blockNum);
559 
560  /* determine its hash code and partition lock ID */
561  newHash = BufTableHashCode(&newTag);
562  newPartitionLock = BufMappingPartitionLock(newHash);
563 
564  /* see if the block is in the buffer pool already */
565  LWLockAcquire(newPartitionLock, LW_SHARED);
566  buf_id = BufTableLookup(&newTag, newHash);
567  LWLockRelease(newPartitionLock);
568 
569  /* If not in buffers, initiate prefetch */
570  if (buf_id < 0)
571  smgrprefetch(reln->rd_smgr, forkNum, blockNum);
572 
573  /*
574  * If the block *is* in buffers, we do nothing. This is not really
575  * ideal: the block might be just about to be evicted, which would be
576  * stupid since we know we are going to need it soon. But the only
577  * easy answer is to bump the usage_count, which does not seem like a
578  * great solution: when the caller does ultimately touch the block,
579  * usage_count would get bumped again, resulting in too much
580  * favoritism for blocks that are involved in a prefetch sequence. A
581  * real fix would involve some additional per-buffer state, and it's
582  * not clear that there's enough of a problem to justify that.
583  */
584  }
585 #endif /* USE_PREFETCH */
586 }
Definition: lwlock.h:32
#define BufMappingPartitionLock(hashcode)
void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:64
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:80
int errcode(int sqlerrcode)
Definition: elog.c:575
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:92
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
#define RelationOpenSmgr(relation)
Definition: rel.h:457
#define ERROR
Definition: elog.h:43
#define RelationIsValid(relation)
Definition: rel.h:386
RelFileNodeBackend smgr_rnode
Definition: smgr.h:43
unsigned int uint32
Definition: c.h:265
#define ereport(elevel, rest)
Definition: elog.h:122
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
RelFileNode node
Definition: relfilenode.h:74
#define Assert(condition)
Definition: c.h:671
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:530
#define INIT_BUFFERTAG(a, xx_rnode, xx_forkNum, xx_blockNum)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:509
int errmsg(const char *fmt,...)
Definition: elog.c:797
void smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:611
void PrintBufferLeakWarning ( Buffer  buffer)

Definition at line 2514 of file bufmgr.c.

References Assert, buftag::blockNum, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BufferIsLocal, BufferIsValid, elog, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, GetPrivateRefCount(), InvalidBackendId, LocalRefCount, MyBackendId, pfree(), pg_atomic_read_u32(), relpathbackend, buftag::rnode, BufferDesc::state, BufferDesc::tag, and WARNING.

Referenced by CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResourceOwnerReleaseInternal().

2515 {
2516  BufferDesc *buf;
2517  int32 loccount;
2518  char *path;
2519  BackendId backend;
2520  uint32 buf_state;
2521 
2522  Assert(BufferIsValid(buffer));
2523  if (BufferIsLocal(buffer))
2524  {
2525  buf = GetLocalBufferDescriptor(-buffer - 1);
2526  loccount = LocalRefCount[-buffer - 1];
2527  backend = MyBackendId;
2528  }
2529  else
2530  {
2531  buf = GetBufferDescriptor(buffer - 1);
2532  loccount = GetPrivateRefCount(buffer);
2533  backend = InvalidBackendId;
2534  }
2535 
2536  /* theoretically we should lock the bufhdr here */
2537  path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum);
2538  buf_state = pg_atomic_read_u32(&buf->state);
2539  elog(WARNING,
2540  "buffer refcount leak: [%03d] "
2541  "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
2542  buffer, path,
2543  buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
2544  BUF_STATE_GET_REFCOUNT(buf_state), loccount);
2545  pfree(path);
2546 }
BackendId MyBackendId
Definition: globals.c:72
ForkNumber forkNum
Definition: buf_internals.h:94
#define GetLocalBufferDescriptor(id)
signed int int32
Definition: c.h:253
void pfree(void *pointer)
Definition: mcxt.c:992
#define BUF_FLAG_MASK
Definition: buf_internals.h:46
static char * buf
Definition: pg_test_fsync.c:65
#define GetBufferDescriptor(id)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:358
unsigned int uint32
Definition: c.h:265
#define WARNING
Definition: elog.h:40
#define InvalidBackendId
Definition: backendid.h:23
int BackendId
Definition: backendid.h:21
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
pg_atomic_uint32 state
#define elog
Definition: elog.h:219
#define relpathbackend(rnode, backend, forknum)
Definition: relpath.h:62
int32 * LocalRefCount
Definition: localbuf.c:45
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 594 of file bufmgr.c.

References MAIN_FORKNUM, NULL, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_getbuf(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinbuild(), brinGetTupleForHeapBlock(), brinRevmapInitialize(), bt_metap(), bt_page_items(), bt_page_stats(), fill_seq_with_data(), GetTupleForTrigger(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_hot_search(), heap_inplace_update(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), ReadBufferBI(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

595 {
596  return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
597 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:640
#define NULL
Definition: c.h:226
static Buffer ReadBuffer_common ( SMgrRelation  reln,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool hit 
)
static

Definition at line 703 of file bufmgr.c.

References Assert, RelFileNodeBackend::backend, BufferUsage::blk_read_time, BM_VALID, BufferAlloc(), BufferDescriptorGetBuffer, BufferDescriptorGetContentLock, BufHdrGetBlock, CurrentResourceOwner, RelFileNode::dbNode, ereport, errcode(), errhint(), errmsg(), ERROR, INSTR_TIME_ADD, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LocalBufferAlloc(), LocalBufHdrGetBlock, LockBufferForCleanup(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), MemSet, RelFileNodeBackend::node, P_NEW, PageIsNew, PageIsVerified(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), pgBufferUsage, pgstat_count_buffer_read_time, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelFileNode::relNode, relpath, ResourceOwnerEnlargeBuffers(), BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, SMgrRelationData::smgr_rnode, smgrextend(), SmgrIsTemp, smgrnblocks(), smgrread(), RelFileNode::spcNode, StartBufferIO(), BufferDesc::state, TerminateBufferIO(), track_io_timing, UnlockBufHdr, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, VacuumPageHit, VacuumPageMiss, WARNING, and zero_damaged_pages.

Referenced by ReadBufferExtended(), and ReadBufferWithoutRelcache().

706 {
707  BufferDesc *bufHdr;
708  Block bufBlock;
709  bool found;
710  bool isExtend;
711  bool isLocalBuf = SmgrIsTemp(smgr);
712 
713  *hit = false;
714 
715  /* Make sure we will have room to remember the buffer pin */
717 
718  isExtend = (blockNum == P_NEW);
719 
720  TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
721  smgr->smgr_rnode.node.spcNode,
722  smgr->smgr_rnode.node.dbNode,
723  smgr->smgr_rnode.node.relNode,
724  smgr->smgr_rnode.backend,
725  isExtend);
726 
727  /* Substitute proper block number if caller asked for P_NEW */
728  if (isExtend)
729  blockNum = smgrnblocks(smgr, forkNum);
730 
731  if (isLocalBuf)
732  {
733  bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
734  if (found)
736  else
738  }
739  else
740  {
741  /*
742  * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
743  * not currently in memory.
744  */
745  bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
746  strategy, &found);
747  if (found)
749  else
751  }
752 
753  /* At this point we do NOT hold any locks. */
754 
755  /* if it was already in the buffer pool, we're done */
756  if (found)
757  {
758  if (!isExtend)
759  {
760  /* Just need to update stats before we exit */
761  *hit = true;
762  VacuumPageHit++;
763 
764  if (VacuumCostActive)
766 
767  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
768  smgr->smgr_rnode.node.spcNode,
769  smgr->smgr_rnode.node.dbNode,
770  smgr->smgr_rnode.node.relNode,
771  smgr->smgr_rnode.backend,
772  isExtend,
773  found);
774 
775  /*
776  * In RBM_ZERO_AND_LOCK mode the caller expects the page to be
777  * locked on return.
778  */
779  if (!isLocalBuf)
780  {
781  if (mode == RBM_ZERO_AND_LOCK)
783  LW_EXCLUSIVE);
784  else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
786  }
787 
788  return BufferDescriptorGetBuffer(bufHdr);
789  }
790 
791  /*
792  * We get here only in the corner case where we are trying to extend
793  * the relation but we found a pre-existing buffer marked BM_VALID.
794  * This can happen because mdread doesn't complain about reads beyond
795  * EOF (when zero_damaged_pages is ON) and so a previous attempt to
796  * read a block beyond EOF could have left a "valid" zero-filled
797  * buffer. Unfortunately, we have also seen this case occurring
798  * because of buggy Linux kernels that sometimes return an
799  * lseek(SEEK_END) result that doesn't account for a recent write. In
800  * that situation, the pre-existing buffer would contain valid data
801  * that we don't want to overwrite. Since the legitimate case should
802  * always have left a zero-filled buffer, complain if not PageIsNew.
803  */
804  bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
805  if (!PageIsNew((Page) bufBlock))
806  ereport(ERROR,
807  (errmsg("unexpected data beyond EOF in block %u of relation %s",
808  blockNum, relpath(smgr->smgr_rnode, forkNum)),
809  errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
810 
811  /*
812  * We *must* do smgrextend before succeeding, else the page will not
813  * be reserved by the kernel, and the next P_NEW call will decide to
814  * return the same page. Clear the BM_VALID bit, do the StartBufferIO
815  * call that BufferAlloc didn't, and proceed.
816  */
817  if (isLocalBuf)
818  {
819  /* Only need to adjust flags */
820  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
821 
822  Assert(buf_state & BM_VALID);
823  buf_state &= ~BM_VALID;
824  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
825  }
826  else
827  {
828  /*
829  * Loop to handle the very small possibility that someone re-sets
830  * BM_VALID between our clearing it and StartBufferIO inspecting
831  * it.
832  */
833  do
834  {
835  uint32 buf_state = LockBufHdr(bufHdr);
836 
837  Assert(buf_state & BM_VALID);
838  buf_state &= ~BM_VALID;
839  UnlockBufHdr(bufHdr, buf_state);
840  } while (!StartBufferIO(bufHdr, true));
841  }
842  }
843 
844  /*
845  * if we have gotten to this point, we have allocated a buffer for the
846  * page but its contents are not yet valid. IO_IN_PROGRESS is set for it,
847  * if it's a shared buffer.
848  *
849  * Note: if smgrextend fails, we will end up with a buffer that is
850  * allocated but not marked BM_VALID. P_NEW will still select the same
851  * block number (because the relation didn't get any longer on disk) and
852  * so future attempts to extend the relation will find the same buffer (if
853  * it's not been recycled) but come right back here to try smgrextend
854  * again.
855  */
856  Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
857 
858  bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
859 
860  if (isExtend)
861  {
862  /* new buffers are zero-filled */
863  MemSet((char *) bufBlock, 0, BLCKSZ);
864  /* don't set checksum for all-zero page */
865  smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false);
866 
867  /*
868  * NB: we're *not* doing a ScheduleBufferTagForWriteback here;
869  * although we're essentially performing a write. At least on linux
870  * doing so defeats the 'delayed allocation' mechanism, leading to
871  * increased file fragmentation.
872  */
873  }
874  else
875  {
876  /*
877  * Read in the page, unless the caller intends to overwrite it and
878  * just wants us to allocate a buffer.
879  */
880  if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
881  MemSet((char *) bufBlock, 0, BLCKSZ);
882  else
883  {
884  instr_time io_start,
885  io_time;
886 
887  if (track_io_timing)
888  INSTR_TIME_SET_CURRENT(io_start);
889 
890  smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
891 
892  if (track_io_timing)
893  {
894  INSTR_TIME_SET_CURRENT(io_time);
895  INSTR_TIME_SUBTRACT(io_time, io_start);
898  }
899 
900  /* check for garbage data */
901  if (!PageIsVerified((Page) bufBlock, blockNum))
902  {
903  if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
904  {
906  (errcode(ERRCODE_DATA_CORRUPTED),
907  errmsg("invalid page in block %u of relation %s; zeroing out page",
908  blockNum,
909  relpath(smgr->smgr_rnode, forkNum))));
910  MemSet((char *) bufBlock, 0, BLCKSZ);
911  }
912  else
913  ereport(ERROR,
914  (errcode(ERRCODE_DATA_CORRUPTED),
915  errmsg("invalid page in block %u of relation %s",
916  blockNum,
917  relpath(smgr->smgr_rnode, forkNum))));
918  }
919  }
920  }
921 
922  /*
923  * In RBM_ZERO_AND_LOCK mode, grab the buffer content lock before marking
924  * the page as valid, to make sure that no other backend sees the zeroed
925  * page before the caller has had a chance to initialize it.
926  *
927  * Since no-one else can be looking at the page contents yet, there is no
928  * difference between an exclusive lock and a cleanup-strength lock. (Note
929  * that we cannot use LockBuffer() or LockBufferForCleanup() here, because
930  * they assert that the buffer is already valid.)
931  */
932  if ((mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) &&
933  !isLocalBuf)
934  {
936  }
937 
938  if (isLocalBuf)
939  {
940  /* Only need to adjust flags */
941  uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
942 
943  buf_state |= BM_VALID;
944  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
945  }
946  else
947  {
948  /* Set BM_VALID, terminate IO, and wake up any waiters */
949  TerminateBufferIO(bufHdr, false, BM_VALID);
950  }
951 
952  VacuumPageMiss++;
953  if (VacuumCostActive)
955 
956  TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
957  smgr->smgr_rnode.node.spcNode,
958  smgr->smgr_rnode.node.dbNode,
959  smgr->smgr_rnode.node.relNode,
960  smgr->smgr_rnode.backend,
961  isExtend,
962  found);
963 
964  return BufferDescriptorGetBuffer(bufHdr);
965 }
#define LocalBufHdrGetBlock(bufHdr)
Definition: bufmgr.c:61
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition: localbuf.c:103
long local_blks_hit
Definition: instrument.h:25
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3586
int errhint(const char *fmt,...)
Definition: elog.c:987
long local_blks_read
Definition: instrument.h:26
int VacuumCostBalance
Definition: globals.c:138
bool PageIsVerified(Page page, BlockNumber blkno)
Definition: bufpage.c:81
instr_time blk_read_time
Definition: instrument.h:31
int VacuumPageHit
Definition: globals.c:134
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
struct timeval instr_time
Definition: instr_time.h:147
long shared_blks_read
Definition: instrument.h:22
int errcode(int sqlerrcode)
Definition: elog.c:575
#define MemSet(start, val, len)
Definition: c.h:853
#define P_NEW
Definition: bufmgr.h:82
void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
Definition: smgr.c:625
#define SmgrIsTemp(smgr)
Definition: smgr.h:80
static bool StartBufferIO(BufferDesc *buf, bool forInput)
Definition: bufmgr.c:3858
#define ERROR
Definition: elog.h:43
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:167
#define INSTR_TIME_ADD(x, y)
Definition: instr_time.h:155
unsigned int uint32
Definition: c.h:265
#define ereport(elevel, rest)
Definition: elog.h:122
int VacuumCostPageHit
Definition: globals.c:128
void ResourceOwnerEnlargeBuffers(ResourceOwner owner)
Definition: resowner.c:839
#define WARNING
Definition: elog.h:40
static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
Definition: bufmgr.c:3925
#define BM_VALID
Definition: buf_internals.h:60
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:57
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition: bufmgr.c:987
#define BufferDescriptorGetContentLock(bdesc)
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:4075
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:672
#define Assert(condition)
Definition: c.h:671
#define pgstat_count_buffer_read_time(n)
Definition: pgstat.h:1175
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:202
#define BufferDescriptorGetBuffer(bdesc)
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:153
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
Definition: smgr.c:600
#define PageIsNew(page)
Definition: bufpage.h:226
int errmsg(const char *fmt,...)
Definition: elog.c:797
long shared_blks_hit
Definition: instrument.h:21
#define UnlockBufHdr(desc, s)
#define relpath(rnode, forknum)
Definition: relpath.h:71
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:287
pg_atomic_uint32 state
int VacuumPageMiss
Definition: globals.c:135
int VacuumCostPageMiss
Definition: globals.c:129
bool track_io_timing
Definition: bufmgr.c:111
Pointer Page
Definition: bufpage.h:74
BufferUsage pgBufferUsage
Definition: instrument.c:20
void * Block
Definition: bufmgr.h:25
bool VacuumCostActive
Definition: globals.c:139
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:251
bool zero_damaged_pages
Definition: bufmgr.c:108
Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)

Definition at line 640 of file bufmgr.c.

References buf, ereport, errcode(), errmsg(), ERROR, pgstat_count_buffer_hit, pgstat_count_buffer_read, RelationData::rd_rel, RelationData::rd_smgr, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationOpenSmgr.

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), acquire_sample_rows(), blbulkdelete(), blgetbitmap(), blvacuumcleanup(), brin_vacuum_scan(), brinbuildempty(), btvacuumpage(), btvacuumscan(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), fsm_readbuf(), get_raw_page_internal(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistbuildempty(), gistbulkdelete(), gistvacuumcleanup(), hashbulkdelete(), heapgetpage(), lazy_scan_heap(), lazy_vacuum_heap(), pg_prewarm(), pgstat_btree_page(), pgstat_gist_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), spgvacuumpage(), statapprox_heap(), and vm_readbuf().

642 {
643  bool hit;
644  Buffer buf;
645 
646  /* Open it at the smgr level if not already done */
647  RelationOpenSmgr(reln);
648 
649  /*
650  * Reject attempts to read non-local temporary relations; we would be
651  * likely to get wrong data since we have no visibility into the owning
652  * session's local buffers.
653  */
654  if (RELATION_IS_OTHER_TEMP(reln))
655  ereport(ERROR,
656  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
657  errmsg("cannot access temporary tables of other sessions")));
658 
659  /*
660  * Read the buffer, and update pgstat counters to reflect a cache hit or
661  * miss.
662  */
664  buf = ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence,
665  forkNum, blockNum, mode, strategy, &hit);
666  if (hit)
668  return buf;
669 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
int errcode(int sqlerrcode)
Definition: elog.c:575
Form_pg_class rd_rel
Definition: rel.h:113
#define RelationOpenSmgr(relation)
Definition: rel.h:457
#define ERROR
Definition: elog.h:43
#define pgstat_count_buffer_read(rel)
Definition: pgstat.h:1165
static char * buf
Definition: pg_test_fsync.c:65
#define ereport(elevel, rest)
Definition: elog.h:122
#define pgstat_count_buffer_hit(rel)
Definition: pgstat.h:1170
static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
Definition: bufmgr.c:703
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:530
int errmsg(const char *fmt,...)
Definition: elog.c:797
int Buffer
Definition: buf.h:23
Buffer ReadBufferWithoutRelcache ( RelFileNode  rnode,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)

Definition at line 682 of file bufmgr.c.

References Assert, InRecovery, InvalidBackendId, ReadBuffer_common(), RELPERSISTENCE_PERMANENT, and smgropen().

Referenced by XLogReadBufferExtended().

685 {
686  bool hit;
687 
688  SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
689 
691 
692  return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
693  mode, strategy, &hit);
694 }
bool InRecovery
Definition: xlog.c:191
#define RELPERSISTENCE_PERMANENT
Definition: pg_class.h:170
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:137
#define InvalidBackendId
Definition: backendid.h:23
static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
Definition: bufmgr.c:703
#define Assert(condition)
Definition: c.h:671
BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 2771 of file bufmgr.c.

References RelationData::rd_smgr, RelationOpenSmgr, and smgrnblocks().

Referenced by _hash_getnewbuf(), _hash_metapinit(), get_raw_page_internal(), and pg_prewarm().

2772 {
2773  /* Open it at the smgr level if not already done */
2774  RelationOpenSmgr(relation);
2775 
2776  return smgrnblocks(relation->rd_smgr, forkNum);
2777 }
struct SMgrRelationData * rd_smgr
Definition: rel.h:87
#define RelationOpenSmgr(relation)
Definition: rel.h:457
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:672
Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 1508 of file bufmgr.c.

References Assert, buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid, CurrentResourceOwner, buftag::forkNum, GetBufferDescriptor, GetLocalBufferDescriptor, LocalRefCount, MAIN_FORKNUM, RelationData::rd_node, ReadBuffer(), RelFileNodeEquals, ResourceOwnerForgetBuffer(), buftag::rnode, BufferDesc::tag, and UnpinBuffer().

Referenced by _bt_relandgetbuf(), bitgetpage(), ginFindLeafPage(), and index_fetch_heap().

1511 {
1512  ForkNumber forkNum = MAIN_FORKNUM;
1513  BufferDesc *bufHdr;
1514 
1515  if (BufferIsValid(buffer))
1516  {
1517  Assert(BufferIsPinned(buffer));
1518  if (BufferIsLocal(buffer))
1519  {
1520  bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1521  if (bufHdr->tag.blockNum == blockNum &&
1522  RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
1523  bufHdr->tag.forkNum == forkNum)
1524  return buffer;
1526  LocalRefCount[-buffer - 1]--;
1527  }
1528  else
1529  {
1530  bufHdr = GetBufferDescriptor(buffer - 1);
1531  /* we have pin, so it's ok to examine tag without spinlock */
1532  if (bufHdr->tag.blockNum == blockNum &&
1533  RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
1534  bufHdr->tag.forkNum == forkNum)
1535  return buffer;
1536  UnpinBuffer(bufHdr, true);
1537  }
1538  }
1539 
1540  return ReadBuffer(relation, blockNum);
1541 }
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:419
ForkNumber forkNum
Definition: buf_internals.h:94
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
#define GetLocalBufferDescriptor(id)
#define GetBufferDescriptor(id)
ForkNumber
Definition: relpath.h:24
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1677
RelFileNode rd_node
Definition: rel.h:85
#define Assert(condition)
Definition: c.h:671
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:594
#define BufferIsLocal(buffer)
Definition: buf.h:37
BlockNumber blockNum
Definition: buf_internals.h:95
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
RelFileNode rnode
Definition: buf_internals.h:93
BufferTag tag
int32 * LocalRefCount
Definition: localbuf.c:45
#define RelFileNodeEquals(node1, node2)
Definition: relfilenode.h:88
void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:869
void ReleaseBuffer ( Buffer  buffer)

Definition at line 3292 of file bufmgr.c.

References Assert, BufferIsLocal, BufferIsValid, CurrentResourceOwner, elog, ERROR, GetBufferDescriptor, LocalRefCount, ResourceOwnerForgetBuffer(), and UnpinBuffer().

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_getbuf(), _bt_pagedel(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), AfterTriggerExecute(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), EvalPlanQualFetch(), EvalPlanQualFetchRowMarks(), ExecCheckTIDVisible(), ExecClearTuple(), ExecDelete(), ExecEndIndexOnlyScan(), ExecLockRows(), ExecMaterializeSlot(), ExecOnConflictUpdate(), ExecStoreMinimalTuple(), ExecStoreTuple(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_vacuum_page(), get_raw_page_internal(), GetRecordedFreeSpace(), GetTupleForTrigger(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_hot_search(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapgetpage(), heapgettup(), heapgettup_pagemode(), index_endscan(), index_getnext_tid(), index_rescan(), lazy_scan_heap(), lazy_vacuum_heap(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pg_visibility_map_summary(), pgstatindex_impl(), ReadBufferBI(), RelationFindReplTupleByIndex(), RelationFindReplTupleSeq(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), ResourceOwnerReleaseInternal(), revmap_get_buffer(), revmap_physical_extend(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), tablesample_getnext(), terminate_brin_buildstate(), TidNext(), UnlockReleaseBuffer(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

3293 {
3294  if (!BufferIsValid(buffer))
3295  elog(ERROR, "bad buffer ID: %d", buffer);
3296 
3297  if (BufferIsLocal(buffer))
3298  {
3300 
3301  Assert(LocalRefCount[-buffer - 1] > 0);
3302  LocalRefCount[-buffer - 1]--;
3303  return;
3304  }
3305 
3306  UnpinBuffer(GetBufferDescriptor(buffer - 1), true);
3307 }
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
#define ERROR
Definition: elog.h:43
#define GetBufferDescriptor(id)
static void UnpinBuffer(BufferDesc *buf, bool fixOwner)
Definition: bufmgr.c:1677
#define Assert(condition)
Definition: c.h:671
#define BufferIsLocal(buffer)
Definition: buf.h:37
#define BufferIsValid(bufnum)
Definition: bufmgr.h:114
#define elog
Definition: elog.h:219
int32 * LocalRefCount
Definition: localbuf.c:45
void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
Definition: resowner.c:869