PostgreSQL Source Code git master
Loading...
Searching...
No Matches
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/aio.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/proclist.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include "lib/sort_template.h"
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountData
 
struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define READV_COUNT_BITS   7
 
#define READV_COUNT_MASK   ((1 << READV_COUNT_BITS) - 1)
 

Typedefs

typedef struct PrivateRefCountData PrivateRefCountData
 
typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static void ResOwnerReleaseBufferIO (Datum res)
 
static charResOwnerPrintBufferIO (Datum res)
 
static void ResOwnerReleaseBuffer (Datum res)
 
static charResOwnerPrintBuffer (Datum res)
 
static pg_noinline PrivateRefCountEntryGetPrivateRefCountEntrySlow (Buffer buffer, bool do_move)
 
static Buffer ReadBuffer_common (Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static BlockNumber ExtendBufferedRelCommon (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static BlockNumber ExtendBufferedRelShared (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void UnpinBufferNoOwner (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static void AbortBufferIO (Buffer buffer)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
 
static bool AsyncReadBuffers (ReadBuffersOperation *operation, int *nblocks_progress)
 
static void CheckReadBuffersOperation (ReadBuffersOperation *operation, bool is_complete)
 
static Buffer GetVictimBuffer (BufferAccessStrategy strategy, IOContext io_context)
 
static void FlushUnlockedBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
static void BufferLockAcquire (Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockUnlock (Buffer buffer, BufferDesc *buf_hdr)
 
static bool BufferLockConditional (Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
 
static bool BufferLockHeldByMeInMode (BufferDesc *buf_hdr, BufferLockMode mode)
 
static bool BufferLockHeldByMe (BufferDesc *buf_hdr)
 
static void BufferLockDisown (Buffer buffer, BufferDesc *buf_hdr)
 
static int BufferLockDisownInternal (Buffer buffer, BufferDesc *buf_hdr)
 
static bool BufferLockAttempt (BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockQueueSelf (BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockDequeueSelf (BufferDesc *buf_hdr)
 
static void BufferLockWakeup (BufferDesc *buf_hdr, bool unlocked)
 
static void BufferLockProcessRelease (BufferDesc *buf_hdr, BufferLockMode mode, uint64 lockstate)
 
static uint64 BufferLockReleaseSub (BufferLockMode mode)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
static void ZeroAndLockBuffer (Buffer buffer, ReadBufferMode mode, bool already_valid)
 
static pg_attribute_always_inline Buffer PinBufferForBlock (Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static pg_attribute_always_inline bool StartReadBuffersImpl (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
static bool ReadBuffersCanStartIOOnce (Buffer buffer, bool nowait)
 
static bool ReadBuffersCanStartIO (Buffer buffer, bool nowait)
 
static void ProcessReadBuffersResult (ReadBuffersOperation *operation)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
static void InvalidateBuffer (BufferDesc *buf)
 
static bool InvalidateVictimBuffer (BufferDesc *buf_hdr)
 
uint32 GetPinLimit (void)
 
uint32 GetAdditionalPinLimit (void)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
bool BufferIsLockedByMe (Buffer buffer)
 
bool BufferIsLockedByMeInMode (Buffer buffer, BufferLockMode mode)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
static void WakePinCountWaiter (BufferDesc *buf)
 
void TrackNewBufferPin (Buffer buf)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferManagerAccess (void)
 
charDebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void UnlockBuffer (Buffer buffer)
 
void LockBufferInternal (Buffer buffer, BufferLockMode mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
bool StartBufferIO (BufferDesc *buf, bool forInput, bool nowait)
 
void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint64 set_flag_bits, bool forget_owner, bool release_aio)
 
uint64 LockBufHdr (BufferDesc *desc)
 
pg_noinline uint64 WaitBufHdrUnlocked (BufferDesc *buf)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *wb_context, IOContext io_context)
 
static bool EvictUnpinnedBufferInternal (BufferDesc *desc, bool *buffer_flushed)
 
bool EvictUnpinnedBuffer (Buffer buf, bool *buffer_flushed)
 
void EvictAllUnpinnedBuffers (int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
void EvictRelUnpinnedBuffers (Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
static bool MarkDirtyUnpinnedBufferInternal (Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
 
bool MarkDirtyUnpinnedBuffer (Buffer buf, bool *buffer_already_dirty)
 
void MarkDirtyRelUnpinnedBuffers (Relation rel, int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
void MarkDirtyAllUnpinnedBuffers (int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
static pg_attribute_always_inline void buffer_stage_common (PgAioHandle *ioh, bool is_write, bool is_temp)
 
static void buffer_readv_decode_error (PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
 
static void buffer_readv_encode_error (PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
 
static pg_attribute_always_inline void buffer_readv_complete_one (PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
 
static pg_attribute_always_inline PgAioResult buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
 
static void buffer_readv_report (PgAioResult result, const PgAioTargetData *td, int elevel)
 
static void shared_buffer_readv_stage (PgAioHandle *ioh, uint8 cb_data)
 
static PgAioResult shared_buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 
static PgAioResult shared_buffer_readv_complete_local (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 
static void local_buffer_readv_stage (PgAioHandle *ioh, uint8 cb_data)
 
static PgAioResult local_buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int io_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT
 
int io_max_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescPinCountWaitBuf = NULL
 
static Buffer PrivateRefCountArrayKeys [REFCOUNT_ARRAY_ENTRIES]
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static int ReservedRefCountSlot = -1
 
static int PrivateRefCountEntryLast = -1
 
static uint32 MaxProportionalPins
 
const ResourceOwnerDesc buffer_io_resowner_desc
 
const ResourceOwnerDesc buffer_resowner_desc
 
const PgAioHandleCallbacks aio_shared_buffer_readv_cb
 
const PgAioHandleCallbacks aio_local_buffer_readv_cb
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 92 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 82 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 81 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 74 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
false \
: \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition bufmgr.c:528
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
int32 * LocalRefCount
Definition localbuf.c:49
static int fb(int x)

Definition at line 589 of file bufmgr.c.

593 : \
595 (LocalRefCount[-(bufnum) - 1] > 0) \
596 : \
598)

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 73 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 77 of file bufmgr.c.

◆ READV_COUNT_BITS

#define READV_COUNT_BITS   7

◆ READV_COUNT_MASK

#define READV_COUNT_MASK   ((1 << READV_COUNT_BITS) - 1)

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 129 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 84 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 3440 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 3440 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 3442 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 3442 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 3439 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 3439 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 3441 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 3441 of file bufmgr.c.

◆ ST_SORT [1/2]

Definition at line 3438 of file bufmgr.c.

◆ ST_SORT [2/2]

Definition at line 3438 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

◆ PrivateRefCountData

◆ PrivateRefCountEntry

◆ SMgrSortArray

Function Documentation

◆ AbortBufferIO()

static void AbortBufferIO ( Buffer  buffer)
static

Definition at line 6999 of file bufmgr.c.

7000{
7001 BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1);
7003
7006
7007 if (!(buf_state & BM_VALID))
7008 {
7011 }
7012 else
7013 {
7016
7017 /* Issue notice if this is not the first failure... */
7018 if (buf_state & BM_IO_ERROR)
7019 {
7020 /* Buffer is pinned, so we can read tag without spinlock */
7023 errmsg("could not write block %u of %s",
7024 buf_hdr->tag.blockNum,
7026 BufTagGetForkNum(&buf_hdr->tag)).str),
7027 errdetail("Multiple failures --- write error might be permanent.")));
7028 }
7029 }
7030
7031 TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false, false);
7032}
#define BM_TAG_VALID
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc)
#define BM_DIRTY
#define BM_IO_IN_PROGRESS
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
#define BM_IO_ERROR
static BufferDesc * GetBufferDescriptor(uint32 id)
uint64 LockBufHdr(BufferDesc *desc)
Definition bufmgr.c:7097
void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint64 set_flag_bits, bool forget_owner, bool release_aio)
Definition bufmgr.c:6937
#define Assert(condition)
Definition c.h:883
uint64_t uint64
Definition c.h:557
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define WARNING
Definition elog.h:36
#define ereport(elevel,...)
Definition elog.h:150
#define relpathperm(rlocator, forknum)
Definition relpath.h:146

References Assert, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), fb(), GetBufferDescriptor(), LockBufHdr(), relpathperm, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by ResOwnerReleaseBufferIO().

◆ AsyncReadBuffers()

static bool AsyncReadBuffers ( ReadBuffersOperation operation,
int nblocks_progress 
)
static

Definition at line 1864 of file bufmgr.c.

1865{
1866 Buffer *buffers = &operation->buffers[0];
1867 int flags = operation->flags;
1868 BlockNumber blocknum = operation->blocknum;
1869 ForkNumber forknum = operation->forknum;
1870 char persistence = operation->persistence;
1871 int16 nblocks_done = operation->nblocks_done;
1872 Buffer *io_buffers = &operation->buffers[nblocks_done];
1873 int io_buffers_len = 0;
1875 uint32 ioh_flags = 0;
1879 bool did_start_io;
1880
1881 /*
1882 * When this IO is executed synchronously, either because the caller will
1883 * immediately block waiting for the IO or because IOMETHOD_SYNC is used,
1884 * the AIO subsystem needs to know.
1885 */
1886 if (flags & READ_BUFFERS_SYNCHRONOUSLY)
1888
1889 if (persistence == RELPERSISTENCE_TEMP)
1890 {
1894 }
1895 else
1896 {
1899 }
1900
1901 /*
1902 * If zero_damaged_pages is enabled, add the READ_BUFFERS_ZERO_ON_ERROR
1903 * flag. The reason for that is that, hopefully, zero_damaged_pages isn't
1904 * set globally, but on a per-session basis. The completion callback,
1905 * which may be run in other processes, e.g. in IO workers, may have a
1906 * different value of the zero_damaged_pages GUC.
1907 *
1908 * XXX: We probably should eventually use a different flag for
1909 * zero_damaged_pages, so we can report different log levels / error codes
1910 * for zero_damaged_pages and ZERO_ON_ERROR.
1911 */
1914
1915 /*
1916 * For the same reason as with zero_damaged_pages we need to use this
1917 * backend's ignore_checksum_failure value.
1918 */
1921
1922
1923 /*
1924 * To be allowed to report stats in the local completion callback we need
1925 * to prepare to report stats now. This ensures we can safely report the
1926 * checksum failure even in a critical section.
1927 */
1929
1930 /*
1931 * Get IO handle before ReadBuffersCanStartIO(), as pgaio_io_acquire()
1932 * might block, which we don't want after setting IO_IN_PROGRESS.
1933 *
1934 * If we need to wait for IO before we can get a handle, submit
1935 * already-staged IO first, so that other backends don't need to wait.
1936 * There wouldn't be a deadlock risk, as pgaio_io_acquire() just needs to
1937 * wait for already submitted IO, which doesn't require additional locks,
1938 * but it could still cause undesirable waits.
1939 *
1940 * A secondary benefit is that this would allow us to measure the time in
1941 * pgaio_io_acquire() without causing undue timer overhead in the common,
1942 * non-blocking, case. However, currently the pgstats infrastructure
1943 * doesn't really allow that, as it a) asserts that an operation can't
1944 * have time without operations b) doesn't have an API to report
1945 * "accumulated" time.
1946 */
1948 if (unlikely(!ioh))
1949 {
1951
1953 }
1954
1955 /*
1956 * Check if we can start IO on the first to-be-read buffer.
1957 *
1958 * If an I/O is already in progress in another backend, we want to wait
1959 * for the outcome: either done, or something went wrong and we will
1960 * retry.
1961 */
1962 if (!ReadBuffersCanStartIO(buffers[nblocks_done], false))
1963 {
1964 /*
1965 * Someone else has already completed this block, we're done.
1966 *
1967 * When IO is necessary, ->nblocks_done is updated in
1968 * ProcessReadBuffersResult(), but that is not called if no IO is
1969 * necessary. Thus update here.
1970 */
1971 operation->nblocks_done += 1;
1972 *nblocks_progress = 1;
1973
1975 pgaio_wref_clear(&operation->io_wref);
1976 did_start_io = false;
1977
1978 /*
1979 * Report and track this as a 'hit' for this backend, even though it
1980 * must have started out as a miss in PinBufferForBlock(). The other
1981 * backend will track this as a 'read'.
1982 */
1983 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + operation->nblocks_done,
1984 operation->smgr->smgr_rlocator.locator.spcOid,
1985 operation->smgr->smgr_rlocator.locator.dbOid,
1986 operation->smgr->smgr_rlocator.locator.relNumber,
1987 operation->smgr->smgr_rlocator.backend,
1988 true);
1989
1990 if (persistence == RELPERSISTENCE_TEMP)
1992 else
1994
1995 if (operation->rel)
1996 pgstat_count_buffer_hit(operation->rel);
1997
1999
2000 if (VacuumCostActive)
2002 }
2003 else
2004 {
2006
2007 /* We found a buffer that we need to read in. */
2008 Assert(io_buffers[0] == buffers[nblocks_done]);
2009 io_pages[0] = BufferGetBlock(buffers[nblocks_done]);
2010 io_buffers_len = 1;
2011
2012 /*
2013 * How many neighboring-on-disk blocks can we scatter-read into other
2014 * buffers at the same time? In this case we don't wait if we see an
2015 * I/O already in progress. We already set BM_IO_IN_PROGRESS for the
2016 * head block, so we should get on with that I/O as soon as possible.
2017 */
2018 for (int i = nblocks_done + 1; i < operation->nblocks; i++)
2019 {
2020 if (!ReadBuffersCanStartIO(buffers[i], true))
2021 break;
2022 /* Must be consecutive block numbers. */
2023 Assert(BufferGetBlockNumber(buffers[i - 1]) ==
2024 BufferGetBlockNumber(buffers[i]) - 1);
2025 Assert(io_buffers[io_buffers_len] == buffers[i]);
2026
2027 io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
2028 }
2029
2030 /* get a reference to wait for in WaitReadBuffers() */
2031 pgaio_io_get_wref(ioh, &operation->io_wref);
2032
2033 /* provide the list of buffers to the completion callbacks */
2035
2037 persistence == RELPERSISTENCE_TEMP ?
2040 flags);
2041
2043
2044 /* ---
2045 * Even though we're trying to issue IO asynchronously, track the time
2046 * in smgrstartreadv():
2047 * - if io_method == IOMETHOD_SYNC, we will always perform the IO
2048 * immediately
2049 * - the io method might not support the IO (e.g. worker IO for a temp
2050 * table)
2051 * ---
2052 */
2054 smgrstartreadv(ioh, operation->smgr, forknum,
2055 blocknum + nblocks_done,
2059
2060 if (persistence == RELPERSISTENCE_TEMP)
2062 else
2064
2065 /*
2066 * Track vacuum cost when issuing IO, not after waiting for it.
2067 * Otherwise we could end up issuing a lot of IO in a short timespan,
2068 * despite a low cost limit.
2069 */
2070 if (VacuumCostActive)
2072
2074 did_start_io = true;
2075 }
2076
2077 return did_start_io;
2078}
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition aio.c:162
void pgaio_wref_clear(PgAioWaitRef *iow)
Definition aio.c:964
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
Definition aio.c:366
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition aio.c:330
void pgaio_submit_staged(void)
Definition aio.c:1123
void pgaio_io_release(PgAioHandle *ioh)
Definition aio.c:240
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition aio.c:188
@ PGAIO_HCB_LOCAL_BUFFER_READV
Definition aio.h:200
@ PGAIO_HCB_SHARED_BUFFER_READV
Definition aio.h:198
@ PGAIO_HF_SYNCHRONOUS
Definition aio.h:70
@ PGAIO_HF_REFERENCES_LOCAL
Definition aio.h:60
void pgaio_io_set_handle_data_32(PgAioHandle *ioh, uint32 *data, uint8 len)
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
bool track_io_timing
Definition bufmgr.c:176
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4356
static bool ReadBuffersCanStartIO(Buffer buffer, bool nowait)
Definition bufmgr.c:1664
bool zero_damaged_pages
Definition bufmgr.c:173
#define READ_BUFFERS_ZERO_ON_ERROR
Definition bufmgr.h:122
static Block BufferGetBlock(Buffer buffer)
Definition bufmgr.h:433
#define MAX_IO_COMBINE_LIMIT
Definition bufmgr.h:173
#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES
Definition bufmgr.h:126
#define READ_BUFFERS_SYNCHRONOUSLY
Definition bufmgr.h:128
bool ignore_checksum_failure
Definition bufpage.c:27
int16_t int16
Definition c.h:551
#define unlikely(x)
Definition c.h:422
uint32_t uint32
Definition c.h:556
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition freelist.c:747
int VacuumCostPageMiss
Definition globals.c:152
bool VacuumCostActive
Definition globals.c:158
int VacuumCostBalance
Definition globals.c:157
int VacuumCostPageHit
Definition globals.c:151
BufferUsage pgBufferUsage
Definition instrument.c:20
int i
Definition isn.c:77
IOObject
Definition pgstat.h:276
@ IOOBJECT_RELATION
Definition pgstat.h:277
@ IOOBJECT_TEMP_RELATION
Definition pgstat.h:278
IOContext
Definition pgstat.h:285
@ IOCONTEXT_NORMAL
Definition pgstat.h:289
@ IOOP_READ
Definition pgstat.h:315
@ IOOP_HIT
Definition pgstat.h:309
#define pgstat_count_buffer_hit(rel)
Definition pgstat.h:720
void pgstat_prepare_report_checksum_failure(Oid dboid)
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:68
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
ForkNumber
Definition relpath.h:56
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
void smgrstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition smgr.c:753
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 shared_blks_hit
Definition instrument.h:26
ForkNumber forknum
Definition bufmgr.h:137
PgAioWaitRef io_wref
Definition bufmgr.h:150
SMgrRelation smgr
Definition bufmgr.h:135
BufferAccessStrategy strategy
Definition bufmgr.h:138
BlockNumber blocknum
Definition bufmgr.h:146
PgAioReturn io_return
Definition bufmgr.h:151
RelFileLocator locator
RelFileNumber relNumber
RelFileLocatorBackend smgr_rlocator
Definition smgr.h:38

References Assert, RelFileLocatorBackend::backend, ReadBuffersOperation::blocknum, BufferGetBlock(), BufferGetBlockNumber(), ReadBuffersOperation::buffers, CurrentResourceOwner, RelFileLocator::dbOid, fb(), ReadBuffersOperation::flags, ReadBuffersOperation::forknum, i, ignore_checksum_failure, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, IOOP_READ, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, RelFileLocatorBackend::locator, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_HCB_LOCAL_BUFFER_READV, PGAIO_HCB_SHARED_BUFFER_READV, PGAIO_HF_REFERENCES_LOCAL, PGAIO_HF_SYNCHRONOUS, pgaio_io_acquire(), pgaio_io_acquire_nb(), pgaio_io_get_wref(), pgaio_io_register_callbacks(), pgaio_io_release(), pgaio_io_set_flag(), pgaio_io_set_handle_data_32(), pgaio_submit_staged(), pgaio_wref_clear(), pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_io_op(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_prepare_report_checksum_failure(), READ_BUFFERS_IGNORE_CHECKSUM_FAILURES, READ_BUFFERS_SYNCHRONOUSLY, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersCanStartIO(), ReadBuffersOperation::rel, RelFileLocator::relNumber, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, ReadBuffersOperation::smgr, SMgrRelationData::smgr_rlocator, smgrstartreadv(), RelFileLocator::spcOid, ReadBuffersOperation::strategy, track_io_timing, unlikely, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, and zero_damaged_pages.

Referenced by StartReadBuffersImpl(), and WaitReadBuffers().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 4103 of file bufmgr.c.

4104{
4106
4108
4110}
static void CheckForBufferLeaks(void)
Definition bufmgr.c:4173
static int32 PrivateRefCountOverflowed
Definition bufmgr.c:250
void AtEOXact_LocalBuffers(bool isCommit)
Definition localbuf.c:1003

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), fb(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 4155 of file bufmgr.c.

4156{
4157 UnlockBuffers();
4158
4160
4161 /* localbuf.c needs a chance too */
4163}
void UnlockBuffers(void)
Definition bufmgr.c:5709
void AtProcExit_LocalBuffers(void)
Definition localbuf.c:1014

References AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferManagerAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3735 of file bufmgr.c.

3736{
3737 /* info obtained from freelist.c */
3738 int strategy_buf_id;
3741
3742 /*
3743 * Information saved between calls so we can determine the strategy
3744 * point's advance rate and avoid scanning already-cleaned buffers.
3745 */
3746 static bool saved_info_valid = false;
3747 static int prev_strategy_buf_id;
3749 static int next_to_clean;
3750 static uint32 next_passes;
3751
3752 /* Moving averages of allocation rate and clean-buffer density */
3753 static float smoothed_alloc = 0;
3754 static float smoothed_density = 10.0;
3755
3756 /* Potentially these could be tunables, but for now, not */
3757 float smoothing_samples = 16;
3758 float scan_whole_pool_milliseconds = 120000.0;
3759
3760 /* Used to compute how far we scan ahead */
3761 long strategy_delta;
3762 int bufs_to_lap;
3763 int bufs_ahead;
3764 float scans_per_alloc;
3767 int min_scan_buffers;
3768
3769 /* Variables for the scanning loop proper */
3770 int num_to_scan;
3771 int num_written;
3772 int reusable_buffers;
3773
3774 /* Variables for final smoothed_density update */
3775 long new_strategy_delta;
3777
3778 /*
3779 * Find out where the clock-sweep currently is, and how many buffer
3780 * allocations have happened since our last call.
3781 */
3783
3784 /* Report buffer alloc counts to pgstat */
3786
3787 /*
3788 * If we're not running the LRU scan, just stop after doing the stats
3789 * stuff. We mark the saved state invalid so that we can recover sanely
3790 * if LRU scan is turned back on later.
3791 */
3792 if (bgwriter_lru_maxpages <= 0)
3793 {
3794 saved_info_valid = false;
3795 return true;
3796 }
3797
3798 /*
3799 * Compute strategy_delta = how many buffers have been scanned by the
3800 * clock-sweep since last time. If first time through, assume none. Then
3801 * see if we are still ahead of the clock-sweep, and if so, how many
3802 * buffers we could scan before we'd catch up with it and "lap" it. Note:
3803 * weird-looking coding of xxx_passes comparisons are to avoid bogus
3804 * behavior when the passes counts wrap around.
3805 */
3806 if (saved_info_valid)
3807 {
3809
3812
3813 Assert(strategy_delta >= 0);
3814
3815 if ((int32) (next_passes - strategy_passes) > 0)
3816 {
3817 /* we're one pass ahead of the strategy point */
3819#ifdef BGW_DEBUG
3820 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3824#endif
3825 }
3826 else if (next_passes == strategy_passes &&
3828 {
3829 /* on same pass, but ahead or at least not behind */
3831#ifdef BGW_DEBUG
3832 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3836#endif
3837 }
3838 else
3839 {
3840 /*
3841 * We're behind, so skip forward to the strategy point and start
3842 * cleaning from there.
3843 */
3844#ifdef BGW_DEBUG
3845 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3849#endif
3853 }
3854 }
3855 else
3856 {
3857 /*
3858 * Initializing at startup or after LRU scanning had been off. Always
3859 * start at the strategy point.
3860 */
3861#ifdef BGW_DEBUG
3862 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3864#endif
3865 strategy_delta = 0;
3869 }
3870
3871 /* Update saved info for next time */
3874 saved_info_valid = true;
3875
3876 /*
3877 * Compute how many buffers had to be scanned for each new allocation, ie,
3878 * 1/density of reusable buffers, and track a moving average of that.
3879 *
3880 * If the strategy point didn't move, we don't update the density estimate
3881 */
3882 if (strategy_delta > 0 && recent_alloc > 0)
3883 {
3887 }
3888
3889 /*
3890 * Estimate how many reusable buffers there are between the current
3891 * strategy point and where we've scanned ahead to, based on the smoothed
3892 * density estimate.
3893 */
3896
3897 /*
3898 * Track a moving average of recent buffer allocations. Here, rather than
3899 * a true average we want a fast-attack, slow-decline behavior: we
3900 * immediately follow any increase.
3901 */
3902 if (smoothed_alloc <= (float) recent_alloc)
3904 else
3907
3908 /* Scale the estimate by a GUC to allow more aggressive tuning. */
3910
3911 /*
3912 * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3913 * eventually underflow to zero, and the underflows produce annoying
3914 * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3915 * zero, there's no point in tracking smaller and smaller values of
3916 * smoothed_alloc, so just reset it to exactly zero to avoid this
3917 * syndrome. It will pop back up as soon as recent_alloc increases.
3918 */
3919 if (upcoming_alloc_est == 0)
3920 smoothed_alloc = 0;
3921
3922 /*
3923 * Even in cases where there's been little or no buffer allocation
3924 * activity, we want to make a small amount of progress through the buffer
3925 * cache so that as many reusable buffers as possible are clean after an
3926 * idle period.
3927 *
3928 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3929 * the BGW will be called during the scan_whole_pool time; slice the
3930 * buffer pool into that many sections.
3931 */
3933
3935 {
3936#ifdef BGW_DEBUG
3937 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3939#endif
3941 }
3942
3943 /*
3944 * Now write out dirty reusable buffers, working forward from the
3945 * next_to_clean point, until we have lapped the strategy scan, or cleaned
3946 * enough buffers to match our estimate of the next cycle's allocation
3947 * requirements, or hit the bgwriter_lru_maxpages limit.
3948 */
3949
3950 num_to_scan = bufs_to_lap;
3951 num_written = 0;
3953
3954 /* Execute the LRU scan */
3955 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3956 {
3958 wb_context);
3959
3960 if (++next_to_clean >= NBuffers)
3961 {
3962 next_to_clean = 0;
3963 next_passes++;
3964 }
3965 num_to_scan--;
3966
3967 if (sync_state & BUF_WRITTEN)
3968 {
3971 {
3973 break;
3974 }
3975 }
3976 else if (sync_state & BUF_REUSABLE)
3978 }
3979
3981
3982#ifdef BGW_DEBUG
3983 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3986 bufs_to_lap - num_to_scan,
3989#endif
3990
3991 /*
3992 * Consider the above scan as being like a new allocation scan.
3993 * Characterize its density and update the smoothed one based on it. This
3994 * effectively halves the moving average period in cases where both the
3995 * strategy and the background writer are doing some useful scanning,
3996 * which is helpful because a long memory isn't as desirable on the
3997 * density estimates.
3998 */
3999 new_strategy_delta = bufs_to_lap - num_to_scan;
4001 if (new_strategy_delta > 0 && new_recent_alloc > 0)
4002 {
4006
4007#ifdef BGW_DEBUG
4008 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
4011#endif
4012 }
4013
4014 /* Return true if OK to hibernate */
4015 return (bufs_to_lap == 0 && recent_alloc == 0);
4016}
int BgWriterDelay
Definition bgwriter.c:58
#define BUF_REUSABLE
Definition bufmgr.c:82
double bgwriter_lru_multiplier
Definition bufmgr.c:175
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition bufmgr.c:4033
int bgwriter_lru_maxpages
Definition bufmgr.c:174
#define BUF_WRITTEN
Definition bufmgr.c:81
int32_t int32
Definition c.h:552
#define DEBUG2
Definition elog.h:29
#define DEBUG1
Definition elog.h:30
#define elog(elevel,...)
Definition elog.h:226
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition freelist.c:321
int NBuffers
Definition globals.c:142
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition pgstat.h:242
PgStat_Counter maxwritten_clean
Definition pgstat.h:243
PgStat_Counter buf_alloc
Definition pgstat.h:244

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, fb(), PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ buffer_readv_complete()

static pg_attribute_always_inline PgAioResult buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data,
bool  is_temp 
)
static

Definition at line 8248 of file bufmgr.c.

8250{
8251 PgAioResult result = prior_result;
8256 uint8 error_count = 0;
8257 uint8 zeroed_count = 0;
8258 uint8 ignored_count = 0;
8260 uint64 *io_data;
8261 uint8 handle_data_len;
8262
8263 if (is_temp)
8264 {
8265 Assert(td->smgr.is_temp);
8267 }
8268 else
8269 Assert(!td->smgr.is_temp);
8270
8271 /*
8272 * Iterate over all the buffers affected by this IO and call the
8273 * per-buffer completion function for each buffer.
8274 */
8275 io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
8276 for (uint8 buf_off = 0; buf_off < handle_data_len; buf_off++)
8277 {
8279 bool failed;
8280 bool failed_verification = false;
8281 bool failed_checksum = false;
8282 bool zeroed_buffer = false;
8283 bool ignored_checksum = false;
8284
8286
8287 /*
8288 * If the entire I/O failed on a lower-level, each buffer needs to be
8289 * marked as failed. In case of a partial read, the first few buffers
8290 * may be ok.
8291 */
8292 failed =
8294 || prior_result.result <= buf_off;
8295
8296 buffer_readv_complete_one(td, buf_off, buf, cb_data, failed, is_temp,
8300 &zeroed_buffer);
8301
8302 /*
8303 * Track information about the number of different kinds of error
8304 * conditions across all pages, as there can be multiple pages failing
8305 * verification as part of one IO.
8306 */
8309 if (zeroed_buffer && zeroed_count++ == 0)
8311 if (ignored_checksum && ignored_count++ == 0)
8313 if (failed_checksum)
8315 }
8316
8317 /*
8318 * If the smgr read succeeded [partially] and page verification failed for
8319 * some of the pages, adjust the IO's result state appropriately.
8320 */
8321 if (prior_result.status != PGAIO_RS_ERROR &&
8322 (error_count > 0 || ignored_count > 0 || zeroed_count > 0))
8323 {
8324 buffer_readv_encode_error(&result, is_temp,
8325 zeroed_count > 0, ignored_count > 0,
8329 pgaio_result_report(result, td, DEBUG1);
8330 }
8331
8332 /*
8333 * For shared relations this reporting is done in
8334 * shared_buffer_readv_complete_local().
8335 */
8336 if (is_temp && checkfail_count > 0)
8339
8340 return result;
8341}
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
Definition aio.c:355
uint64 * pgaio_io_get_handle_data(PgAioHandle *ioh, uint8 *len)
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
Definition aio_target.c:73
@ PGAIO_RS_ERROR
Definition aio_types.h:84
static pg_attribute_always_inline void buffer_readv_complete_one(PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
Definition bufmgr.c:8104
static void buffer_readv_encode_error(PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
Definition bufmgr.c:8009
uint8_t uint8
Definition c.h:554
ProcNumber MyProcNumber
Definition globals.c:90
static char buf[DEFAULT_XLOG_SEG_SIZE]
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
RelFileLocator rlocator
Definition aio_types.h:65
struct PgAioTargetData::@126 smgr

References Assert, buf, buffer_readv_complete_one(), buffer_readv_encode_error(), BufferIsValid(), RelFileLocator::dbOid, DEBUG1, fb(), PgAioTargetData::is_temp, MyProcNumber, pgaio_io_get_handle_data(), pgaio_io_get_owner(), pgaio_io_get_target_data(), pgaio_result_report(), PGAIO_RS_ERROR, pgstat_report_checksum_failures_in_db(), PgAioTargetData::rlocator, and PgAioTargetData::smgr.

Referenced by local_buffer_readv_complete(), and shared_buffer_readv_complete().

◆ buffer_readv_complete_one()

static pg_attribute_always_inline void buffer_readv_complete_one ( PgAioTargetData td,
uint8  buf_off,
Buffer  buffer,
uint8  flags,
bool  failed,
bool  is_temp,
bool buffer_invalid,
bool failed_checksum,
bool ignored_checksum,
bool zeroed_buffer 
)
static

Definition at line 8104 of file bufmgr.c.

8110{
8111 BufferDesc *buf_hdr = is_temp ?
8112 GetLocalBufferDescriptor(-buffer - 1)
8113 : GetBufferDescriptor(buffer - 1);
8114 BufferTag tag = buf_hdr->tag;
8115 char *bufdata = BufferGetBlock(buffer);
8117 int piv_flags;
8118
8119 /* check that the buffer is in the expected state for a read */
8120#ifdef USE_ASSERT_CHECKING
8121 {
8123
8126 /* temp buffers don't use BM_IO_IN_PROGRESS */
8127 if (!is_temp)
8130 }
8131#endif
8132
8133 *buffer_invalid = false;
8134 *failed_checksum = false;
8135 *ignored_checksum = false;
8136 *zeroed_buffer = false;
8137
8138 /*
8139 * We ask PageIsVerified() to only log the message about checksum errors,
8140 * as the completion might be run in any backend (or IO workers). We will
8141 * report checksum errors in buffer_readv_report().
8142 */
8144
8145 /* the local zero_damaged_pages may differ from the definer's */
8148
8149 /* Check for garbage data. */
8150 if (!failed)
8151 {
8152 /*
8153 * If the buffer is not currently pinned by this backend, e.g. because
8154 * we're completing this IO after an error, the buffer data will have
8155 * been marked as inaccessible when the buffer was unpinned. The AIO
8156 * subsystem holds a pin, but that doesn't prevent the buffer from
8157 * having been marked as inaccessible. The completion might also be
8158 * executed in a different process.
8159 */
8160#ifdef USE_VALGRIND
8161 if (!BufferIsPinned(buffer))
8163#endif
8164
8165 if (!PageIsVerified((Page) bufdata, tag.blockNum, piv_flags,
8167 {
8168 if (flags & READ_BUFFERS_ZERO_ON_ERROR)
8169 {
8170 memset(bufdata, 0, BLCKSZ);
8171 *zeroed_buffer = true;
8172 }
8173 else
8174 {
8175 *buffer_invalid = true;
8176 /* mark buffer as having failed */
8177 failed = true;
8178 }
8179 }
8180 else if (*failed_checksum)
8181 *ignored_checksum = true;
8182
8183 /* undo what we did above */
8184#ifdef USE_VALGRIND
8185 if (!BufferIsPinned(buffer))
8187#endif
8188
8189 /*
8190 * Immediately log a message about the invalid page, but only to the
8191 * server log. The reason to do so immediately is that this may be
8192 * executed in a different backend than the one that originated the
8193 * request. The reason to do so immediately is that the originator
8194 * might not process the query result immediately (because it is busy
8195 * doing another part of query processing) or at all (e.g. if it was
8196 * cancelled or errored out due to another IO also failing). The
8197 * definer of the IO will emit an ERROR or WARNING when processing the
8198 * IO's results
8199 *
8200 * To avoid duplicating the code to emit these log messages, we reuse
8201 * buffer_readv_report().
8202 */
8204 {
8205 PgAioResult result_one = {0};
8206
8211 *zeroed_buffer ? 1 : 0,
8212 *failed_checksum ? 1 : 0,
8215 }
8216 }
8217
8218 /* Terminate I/O and set BM_VALID. */
8219 set_flag_bits = failed ? BM_IO_ERROR : BM_VALID;
8220 if (is_temp)
8222 else
8223 TerminateBufferIO(buf_hdr, false, set_flag_bits, false, true);
8224
8225 /*
8226 * Call the BUFFER_READ_DONE tracepoint in the callback, even though the
8227 * callback may not be executed in the same backend that called
8228 * BUFFER_READ_START. The alternative would be to defer calling the
8229 * tracepoint to a later point (e.g. the local completion callback for
8230 * shared buffer reads), which seems even less helpful.
8231 */
8233 tag.blockNum,
8234 tag.spcOid,
8235 tag.dbOid,
8236 tag.relNumber,
8238 false);
8239}
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition bufmgr.c:589
bool PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
Definition bufpage.c:94
#define PIV_LOG_LOG
Definition bufpage.h:468
PageData * Page
Definition bufpage.h:81
#define PIV_IGNORE_CHECKSUM_FAILURE
Definition bufpage.h:469
#define LOG_SERVER_ONLY
Definition elog.h:32
#define false
void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty, uint64 set_flag_bits, bool release_aio)
Definition localbuf.c:562
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition memdebug.h:26
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition memdebug.h:27
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
BlockNumber blockNum
RelFileNumber relNumber
ForkNumber forkNum

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, buffer_readv_encode_error(), BufferGetBlock(), BufferIsPinned, buftag::dbOid, fb(), buftag::forkNum, GetBufferDescriptor(), GetLocalBufferDescriptor(), INVALID_PROC_NUMBER, LOG_SERVER_ONLY, MyProcNumber, PageIsVerified(), pg_atomic_read_u64(), pgaio_result_report(), PIV_IGNORE_CHECKSUM_FAILURE, PIV_LOG_LOG, READ_BUFFERS_IGNORE_CHECKSUM_FAILURES, READ_BUFFERS_ZERO_ON_ERROR, buftag::relNumber, buftag::spcOid, TerminateBufferIO(), TerminateLocalBufferIO(), VALGRIND_MAKE_MEM_DEFINED, and VALGRIND_MAKE_MEM_NOACCESS.

Referenced by buffer_readv_complete().

◆ buffer_readv_decode_error()

static void buffer_readv_decode_error ( PgAioResult  result,
bool zeroed_any,
bool ignored_any,
uint8 zeroed_or_error_count,
uint8 checkfail_count,
uint8 first_off 
)
inlinestatic

Definition at line 7967 of file bufmgr.c.

7973{
7974 uint32 rem_error = result.error_data;
7975
7976 /* see static asserts in buffer_readv_encode_error */
7977#define READV_COUNT_BITS 7
7978#define READV_COUNT_MASK ((1 << READV_COUNT_BITS) - 1)
7979
7980 *zeroed_any = rem_error & 1;
7981 rem_error >>= 1;
7982
7983 *ignored_any = rem_error & 1;
7984 rem_error >>= 1;
7985
7988
7991
7994}
#define READV_COUNT_BITS
#define READV_COUNT_MASK
uint32 error_data
Definition aio_types.h:111

References PgAioResult::error_data, fb(), READV_COUNT_BITS, and READV_COUNT_MASK.

Referenced by buffer_readv_encode_error(), buffer_readv_report(), and shared_buffer_readv_complete_local().

◆ buffer_readv_encode_error()

static void buffer_readv_encode_error ( PgAioResult result,
bool  is_temp,
bool  zeroed_any,
bool  ignored_any,
uint8  error_count,
uint8  zeroed_count,
uint8  checkfail_count,
uint8  first_error_off,
uint8  first_zeroed_off,
uint8  first_ignored_off 
)
inlinestatic

Definition at line 8009 of file bufmgr.c.

8019{
8020
8021 uint8 shift = 0;
8025
8027 "PG_IOV_MAX is bigger than reserved space for error data");
8029 "PGAIO_RESULT_ERROR_BITS is insufficient for buffer_readv");
8030
8031 /*
8032 * We only have space to encode one offset - but luckily that's good
8033 * enough. If there is an error, the error is the interesting offset, same
8034 * with a zeroed buffer vs an ignored buffer.
8035 */
8036 if (error_count > 0)
8038 else if (zeroed_count > 0)
8040 else
8042
8043 Assert(!zeroed_any || error_count == 0);
8044
8045 result->error_data = 0;
8046
8047 result->error_data |= zeroed_any << shift;
8048 shift += 1;
8049
8050 result->error_data |= ignored_any << shift;
8051 shift += 1;
8052
8053 result->error_data |= ((uint32) zeroed_or_error_count) << shift;
8054 shift += READV_COUNT_BITS;
8055
8056 result->error_data |= ((uint32) checkfail_count) << shift;
8057 shift += READV_COUNT_BITS;
8058
8059 result->error_data |= ((uint32) first_off) << shift;
8060 shift += READV_COUNT_BITS;
8061
8062 result->id = is_temp ? PGAIO_HCB_LOCAL_BUFFER_READV :
8064
8065 if (error_count > 0)
8066 result->status = PGAIO_RS_ERROR;
8067 else
8068 result->status = PGAIO_RS_WARNING;
8069
8070 /*
8071 * The encoding is complicated enough to warrant cross-checking it against
8072 * the decode function.
8073 */
8074#ifdef USE_ASSERT_CHECKING
8075 {
8076 bool zeroed_any_2,
8081
8086 &first_off_2);
8092 }
8093#endif
8094
8095#undef READV_COUNT_BITS
8096#undef READV_COUNT_MASK
8097}
#define PGAIO_RESULT_ERROR_BITS
Definition aio_types.h:98
@ PGAIO_RS_WARNING
Definition aio_types.h:83
static void buffer_readv_decode_error(PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
Definition bufmgr.c:7967
#define StaticAssertDecl(condition, errmessage)
Definition c.h:952
#define PG_IOV_MAX
Definition pg_iovec.h:47
uint32 status
Definition aio_types.h:108
uint32 id
Definition aio_types.h:105

References Assert, buffer_readv_decode_error(), PgAioResult::error_data, fb(), PgAioResult::id, PG_IOV_MAX, PGAIO_HCB_LOCAL_BUFFER_READV, PGAIO_HCB_SHARED_BUFFER_READV, PGAIO_RESULT_ERROR_BITS, PGAIO_RS_ERROR, PGAIO_RS_WARNING, READV_COUNT_BITS, StaticAssertDecl, and PgAioResult::status.

Referenced by buffer_readv_complete(), and buffer_readv_complete_one().

◆ buffer_readv_report()

static void buffer_readv_report ( PgAioResult  result,
const PgAioTargetData td,
int  elevel 
)
static

Definition at line 8351 of file bufmgr.c.

8353{
8354 int nblocks = td->smgr.nblocks;
8355 BlockNumber first = td->smgr.blockNum;
8356 BlockNumber last = first + nblocks - 1;
8359 RelPathStr rpath =
8361 bool zeroed_any,
8365 first_off;
8367 const char *msg_one,
8368 *msg_mult,
8369 *det_mult,
8370 *hint_mult;
8371
8375 &first_off);
8376
8377 /*
8378 * Treat a read that had both zeroed buffers *and* ignored checksums as a
8379 * special case, it's too irregular to be emitted the same way as the
8380 * other cases.
8381 */
8382 if (zeroed_any && ignored_any)
8383 {
8385 Assert(nblocks > 1); /* same block can't be both zeroed and ignored */
8386 Assert(result.status != PGAIO_RS_ERROR);
8388
8389 ereport(elevel,
8391 errmsg("zeroing %u page(s) and ignoring %u checksum failure(s) among blocks %u..%u of relation \"%s\"",
8392 affected_count, checkfail_count, first, last, rpath.str),
8393 affected_count > 1 ?
8394 errdetail("Block %u held the first zeroed page.",
8395 first + first_off) : 0,
8396 errhint_plural("See server log for details about the other %d invalid block.",
8397 "See server log for details about the other %d invalid blocks.",
8400 return;
8401 }
8402
8403 /*
8404 * The other messages are highly repetitive. To avoid duplicating a long
8405 * and complicated ereport(), gather the translated format strings
8406 * separately and then do one common ereport.
8407 */
8408 if (result.status == PGAIO_RS_ERROR)
8409 {
8410 Assert(!zeroed_any); /* can't have invalid pages when zeroing them */
8412 msg_one = _("invalid page in block %u of relation \"%s\"");
8413 msg_mult = _("%u invalid pages among blocks %u..%u of relation \"%s\"");
8414 det_mult = _("Block %u held the first invalid page.");
8415 hint_mult = _("See server log for the other %u invalid block(s).");
8416 }
8417 else if (zeroed_any && !ignored_any)
8418 {
8420 msg_one = _("invalid page in block %u of relation \"%s\"; zeroing out page");
8421 msg_mult = _("zeroing out %u invalid pages among blocks %u..%u of relation \"%s\"");
8422 det_mult = _("Block %u held the first zeroed page.");
8423 hint_mult = _("See server log for the other %u zeroed block(s).");
8424 }
8425 else if (!zeroed_any && ignored_any)
8426 {
8428 msg_one = _("ignoring checksum failure in block %u of relation \"%s\"");
8429 msg_mult = _("ignoring %u checksum failures among blocks %u..%u of relation \"%s\"");
8430 det_mult = _("Block %u held the first ignored page.");
8431 hint_mult = _("See server log for the other %u ignored block(s).");
8432 }
8433 else
8435
8436 ereport(elevel,
8438 affected_count == 1 ?
8439 errmsg_internal(msg_one, first + first_off, rpath.str) :
8440 errmsg_internal(msg_mult, affected_count, first, last, rpath.str),
8443}
#define pg_unreachable()
Definition c.h:351
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
int errdetail_internal(const char *fmt,...)
Definition elog.c:1243
int errhint_internal(const char *fmt,...)
Definition elog.c:1352
int errhint_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition elog.c:1373
#define _(x)
Definition elog.c:91
const char * str
#define ERRCODE_DATA_CORRUPTED
int ProcNumber
Definition procnumber.h:24
#define relpathbackend(rlocator, backend, forknum)
Definition relpath.h:141
char str[REL_PATH_STR_MAXLEN+1]
Definition relpath.h:123
BlockNumber blockNum
Definition aio_types.h:66
BlockNumber nblocks
Definition aio_types.h:67
ForkNumber forkNum
Definition aio_types.h:68

References _, Assert, PgAioTargetData::blockNum, buffer_readv_decode_error(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errdetail(), errdetail_internal(), errhint_internal(), errhint_plural(), errmsg(), errmsg_internal(), fb(), PgAioTargetData::forkNum, INVALID_PROC_NUMBER, PgAioTargetData::is_temp, MyProcNumber, PgAioTargetData::nblocks, pg_unreachable, PGAIO_RS_ERROR, relpathbackend, PgAioTargetData::rlocator, PgAioTargetData::smgr, PgAioResult::status, and RelPathStr::str.

◆ buffer_stage_common()

static pg_attribute_always_inline void buffer_stage_common ( PgAioHandle ioh,
bool  is_write,
bool  is_temp 
)
static

Definition at line 7860 of file bufmgr.c.

7861{
7862 uint64 *io_data;
7863 uint8 handle_data_len;
7866
7867 io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
7868
7870
7871 /* iterate over all buffers affected by the vectored readv/writev */
7872 for (int i = 0; i < handle_data_len; i++)
7873 {
7874 Buffer buffer = (Buffer) io_data[i];
7875 BufferDesc *buf_hdr = is_temp ?
7876 GetLocalBufferDescriptor(-buffer - 1)
7877 : GetBufferDescriptor(buffer - 1);
7879
7880 /*
7881 * Check that all the buffers are actually ones that could conceivably
7882 * be done in one IO, i.e. are sequential. This is the last
7883 * buffer-aware code before IO is actually executed and confusion
7884 * about which buffers are targeted by IO can be hard to debug, making
7885 * it worth doing extra-paranoid checks.
7886 */
7887 if (i == 0)
7888 first = buf_hdr->tag;
7889 else
7890 {
7891 Assert(buf_hdr->tag.relNumber == first.relNumber);
7892 Assert(buf_hdr->tag.blockNum == first.blockNum + i);
7893 }
7894
7895 if (is_temp)
7897 else
7899
7900 /* verify the buffer is in the expected state */
7902 if (is_write)
7903 {
7906 }
7907 else
7908 {
7911 }
7912
7913 /* temp buffers don't use BM_IO_IN_PROGRESS */
7914 if (!is_temp)
7916
7918
7919 /*
7920 * Reflect that the buffer is now owned by the AIO subsystem.
7921 *
7922 * For local buffers: This can't be done just via LocalRefCount, as
7923 * one might initially think, as this backend could error out while
7924 * AIO is still in progress, releasing all the pins by the backend
7925 * itself.
7926 *
7927 * This pin is released again in TerminateBufferIO().
7928 */
7929 buf_hdr->io_wref = io_ref;
7930
7931 if (is_temp)
7932 {
7935 }
7936 else
7938
7939 /*
7940 * Ensure the content lock that prevents buffer modifications while
7941 * the buffer is being written out is not released early due to an
7942 * error.
7943 */
7944 if (is_write && !is_temp)
7945 {
7947
7948 /*
7949 * Lock is now owned by AIO subsystem.
7950 */
7951 BufferLockDisown(buffer, buf_hdr);
7952 }
7953
7954 /*
7955 * Stop tracking this buffer via the resowner - the AIO system now
7956 * keeps track.
7957 */
7958 if (!is_temp)
7960 }
7961}
static void pg_atomic_unlocked_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:494
#define BUF_REFCOUNT_ONE
static uint64 UnlockBufHdrExt(BufferDesc *desc, uint64 old_buf_state, uint64 set_bits, uint64 unset_bits, int refcount_change)
static void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
#define BUF_STATE_GET_REFCOUNT(state)
static void BufferLockDisown(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:6118
static bool BufferLockHeldByMe(BufferDesc *buf_hdr)
Definition bufmgr.c:6390
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
BufferTag tag

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferLockDisown(), BufferLockHeldByMe(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, LockBufHdr(), pg_atomic_read_u64(), pg_atomic_unlocked_write_u64(), PG_USED_FOR_ASSERTS_ONLY, pgaio_io_get_handle_data(), pgaio_io_get_wref(), ResourceOwnerForgetBufferIO(), and UnlockBufHdrExt().

Referenced by local_buffer_readv_stage(), and shared_buffer_readv_stage().

◆ BufferAlloc()

static pg_attribute_always_inline BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr,
IOContext  io_context 
)
inlinestatic

Definition at line 2100 of file bufmgr.c.

2104{
2105 BufferTag newTag; /* identity of requested block */
2106 uint32 newHash; /* hash value for newTag */
2107 LWLock *newPartitionLock; /* buffer partition lock for it */
2108 int existing_buf_id;
2112 uint64 set_bits = 0;
2113
2114 /* Make sure we will have room to remember the buffer pin */
2117
2118 /* create a tag so we can lookup the buffer */
2119 InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
2120
2121 /* determine its hash code and partition lock ID */
2124
2125 /* see if the block is in the buffer pool already */
2128 if (existing_buf_id >= 0)
2129 {
2130 BufferDesc *buf;
2131 bool valid;
2132
2133 /*
2134 * Found it. Now, pin the buffer so no one can steal it from the
2135 * buffer pool, and check to see if the correct data has been loaded
2136 * into the buffer.
2137 */
2139
2140 valid = PinBuffer(buf, strategy, false);
2141
2142 /* Can release the mapping lock as soon as we've pinned it */
2144
2145 *foundPtr = true;
2146
2147 if (!valid)
2148 {
2149 /*
2150 * We can only get here if (a) someone else is still reading in
2151 * the page, (b) a previous read attempt failed, or (c) someone
2152 * called StartReadBuffers() but not yet WaitReadBuffers().
2153 */
2154 *foundPtr = false;
2155 }
2156
2157 return buf;
2158 }
2159
2160 /*
2161 * Didn't find it in the buffer pool. We'll have to initialize a new
2162 * buffer. Remember to unlock the mapping lock while doing the work.
2163 */
2165
2166 /*
2167 * Acquire a victim buffer. Somebody else might try to do the same, we
2168 * don't hold any conflicting locks. If so we'll have to undo our work
2169 * later.
2170 */
2173
2174 /*
2175 * Try to make a hashtable entry for the buffer under its new tag. If
2176 * somebody else inserted another buffer for the tag, we'll release the
2177 * victim buffer we acquired and use the already inserted one.
2178 */
2181 if (existing_buf_id >= 0)
2182 {
2184 bool valid;
2185
2186 /*
2187 * Got a collision. Someone has already done what we were about to do.
2188 * We'll just handle this as if it were found in the buffer pool in
2189 * the first place. First, give up the buffer we were planning to
2190 * use.
2191 *
2192 * We could do this after releasing the partition lock, but then we'd
2193 * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry()
2194 * before acquiring the lock, for the rare case of such a collision.
2195 */
2197
2198 /* remaining code should match code at top of routine */
2199
2201
2202 valid = PinBuffer(existing_buf_hdr, strategy, false);
2203
2204 /* Can release the mapping lock as soon as we've pinned it */
2206
2207 *foundPtr = true;
2208
2209 if (!valid)
2210 {
2211 /*
2212 * We can only get here if (a) someone else is still reading in
2213 * the page, (b) a previous read attempt failed, or (c) someone
2214 * called StartReadBuffers() but not yet WaitReadBuffers().
2215 */
2216 *foundPtr = false;
2217 }
2218
2219 return existing_buf_hdr;
2220 }
2221
2222 /*
2223 * Need to lock the buffer header too in order to change its tag.
2224 */
2226
2227 /* some sanity checks while we hold the buffer header lock */
2230
2231 victim_buf_hdr->tag = newTag;
2232
2233 /*
2234 * Make sure BM_PERMANENT is set for buffers that must be written at every
2235 * checkpoint. Unlogged buffers only need to be written at shutdown
2236 * checkpoints, except for their "init" forks, which need to be treated
2237 * just like permanent relations.
2238 */
2240 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
2242
2244 set_bits, 0, 0);
2245
2247
2248 /*
2249 * Buffer contents are currently invalid.
2250 */
2251 *foundPtr = false;
2252
2253 return victim_buf_hdr;
2254}
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_PERMANENT
#define BUF_USAGECOUNT_ONE
static LWLock * BufMappingPartitionLock(uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition buf_table.c:78
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition buf_table.c:118
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
Definition bufmgr.c:2451
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
Definition bufmgr.c:3181
static void ReservePrivateRefCountEntry(void)
Definition bufmgr.c:293
static void UnpinBuffer(BufferDesc *buf)
Definition bufmgr.c:3360
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
@ INIT_FORKNUM
Definition relpath.h:61
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition resowner.c:449

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetVictimBuffer(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrRelationData::smgr_rlocator, UnlockBufHdrExt(), and UnpinBuffer().

Referenced by PinBufferForBlock().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 4356 of file bufmgr.c.

4357{
4359
4360 Assert(BufferIsPinned(buffer));
4361
4362 if (BufferIsLocal(buffer))
4363 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4364 else
4365 bufHdr = GetBufferDescriptor(buffer - 1);
4366
4367 /* pinned, so OK to read tag without spinlock */
4368 return bufHdr->tag.blockNum;
4369}
#define BufferIsLocal(buffer)
Definition buf.h:37

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, fb(), GetBufferDescriptor(), and GetLocalBufferDescriptor().

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_finish_split(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), AsyncReadBuffers(), BitmapHeapScanNextBlock(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), btvacuumpage(), check_index_page(), CheckReadBuffersOperation(), collect_corrupt_items(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), gistvacuumpage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_would_be_all_visible(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_heap(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum_heap_rel(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), prune_freeze_plan(), read_stream_start_pending_read(), ReadBufferBI(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), StartReadBuffersImpl(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), verify_heapam(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and visibilitymap_set_vmbits().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 4634 of file bufmgr.c.

4635{
4636 char *page = BufferGetPage(buffer);
4638 XLogRecPtr lsn;
4639
4640 /*
4641 * If we don't need locking for correctness, fastpath out.
4642 */
4643 if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
4644 return PageGetLSN(page);
4645
4646 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4647 Assert(BufferIsValid(buffer));
4648 Assert(BufferIsPinned(buffer));
4649
4650 bufHdr = GetBufferDescriptor(buffer - 1);
4652 lsn = PageGetLSN(page);
4654
4655 return lsn;
4656}
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
static XLogRecPtr PageGetLSN(const PageData *page)
Definition bufpage.h:385
#define XLogHintBitIsNeeded()
Definition xlog.h:122
uint64 XLogRecPtr
Definition xlogdefs.h:21

References Assert, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), fb(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_killitems(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 4377 of file bufmgr.c.

4379{
4381
4382 /* Do the same checks as BufferGetBlockNumber. */
4383 Assert(BufferIsPinned(buffer));
4384
4385 if (BufferIsLocal(buffer))
4386 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4387 else
4388 bufHdr = GetBufferDescriptor(buffer - 1);
4389
4390 /* pinned, so OK to read tag without spinlock */
4391 *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4392 *forknum = BufTagGetForkNum(&bufHdr->tag);
4393 *blknum = bufHdr->tag.blockNum;
4394}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), GetBufferDescriptor(), and GetLocalBufferDescriptor().

Referenced by fsm_search_avail(), ginRedoInsertEntry(), heap_inplace_update_and_unlock(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 3024 of file bufmgr.c.

3025{
3027
3028 Assert(BufferIsPinned(buffer));
3029
3030 if (BufferIsLocal(buffer))
3031 {
3032 int bufid = -buffer - 1;
3033
3035 /* Content locks are not maintained for local buffers. */
3036 }
3037 else
3038 {
3039 bufHdr = GetBufferDescriptor(buffer - 1);
3041 }
3042
3043 return pg_atomic_read_u64(&bufHdr->state) & BM_DIRTY;
3044}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2997
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220

References Assert, BM_DIRTY, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and pg_atomic_read_u64().

Referenced by heap_multi_insert(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), log_heap_prune_and_freeze(), and XLogRegisterBuffer().

◆ BufferIsLockedByMe()

bool BufferIsLockedByMe ( Buffer  buffer)

Definition at line 2971 of file bufmgr.c.

2972{
2974
2975 Assert(BufferIsPinned(buffer));
2976
2977 if (BufferIsLocal(buffer))
2978 {
2979 /* Content locks are not maintained for local buffers. */
2980 return true;
2981 }
2982 else
2983 {
2984 bufHdr = GetBufferDescriptor(buffer - 1);
2985 return BufferLockHeldByMe(bufHdr);
2986 }
2987}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockHeldByMe(), fb(), and GetBufferDescriptor().

Referenced by FlushOneBuffer(), and MarkBufferDirtyHint().

◆ BufferIsLockedByMeInMode()

bool BufferIsLockedByMeInMode ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 2997 of file bufmgr.c.

2998{
3000
3001 Assert(BufferIsPinned(buffer));
3002
3003 if (BufferIsLocal(buffer))
3004 {
3005 /* Content locks are not maintained for local buffers. */
3006 return true;
3007 }
3008 else
3009 {
3010 bufHdr = GetBufferDescriptor(buffer - 1);
3012 }
3013}
static bool BufferLockHeldByMeInMode(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:6372
static PgChecksumMode mode

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockHeldByMeInMode(), fb(), GetBufferDescriptor(), and mode.

Referenced by BufferIsDirty(), HeapTupleSetHintBits(), IsBufferCleanupOK(), MarkBufferDirty(), visibilitymap_set(), visibilitymap_set_vmbits(), and XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 4604 of file bufmgr.c.

4605{
4607
4608 /* Local buffers are used only for temp relations. */
4609 if (BufferIsLocal(buffer))
4610 return false;
4611
4612 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4613 Assert(BufferIsValid(buffer));
4614 Assert(BufferIsPinned(buffer));
4615
4616 /*
4617 * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
4618 * need not bother with the buffer header spinlock. Even if someone else
4619 * changes the buffer header state while we're doing this, the state is
4620 * changed atomically, so we'll read the old value or the new value, but
4621 * not random garbage.
4622 */
4623 bufHdr = GetBufferDescriptor(buffer - 1);
4624 return (pg_atomic_read_u64(&bufHdr->state) & BM_PERMANENT) != 0;
4625}

References Assert, BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), fb(), GetBufferDescriptor(), and pg_atomic_read_u64().

Referenced by SetHintBits().

◆ BufferLockAcquire()

static void BufferLockAcquire ( Buffer  buffer,
BufferDesc buf_hdr,
BufferLockMode  mode 
)
inlinestatic

Definition at line 5755 of file bufmgr.c.

5756{
5757 PrivateRefCountEntry *entry;
5758 int extraWaits = 0;
5759
5760 /*
5761 * Get reference to the refcount entry before we hold the lock, it seems
5762 * better to do before holding the lock.
5763 */
5764 entry = GetPrivateRefCountEntry(buffer, true);
5765
5766 /*
5767 * We better not already hold a lock on the buffer.
5768 */
5770
5771 /*
5772 * Lock out cancel/die interrupts until we exit the code section protected
5773 * by the content lock. This ensures that interrupts will not interfere
5774 * with manipulations of data structures in shared memory.
5775 */
5777
5778 for (;;)
5779 {
5780 uint32 wait_event = 0; /* initialized to avoid compiler warning */
5781 bool mustwait;
5782
5783 /*
5784 * Try to grab the lock the first time, we're not in the waitqueue
5785 * yet/anymore.
5786 */
5788
5789 if (likely(!mustwait))
5790 {
5791 break;
5792 }
5793
5794 /*
5795 * Ok, at this point we couldn't grab the lock on the first try. We
5796 * cannot simply queue ourselves to the end of the list and wait to be
5797 * woken up because by now the lock could long have been released.
5798 * Instead add us to the queue and try to grab the lock again. If we
5799 * succeed we need to revert the queuing and be happy, otherwise we
5800 * recheck the lock. If we still couldn't grab it, we know that the
5801 * other locker will see our queue entries when releasing since they
5802 * existed before we checked for the lock.
5803 */
5804
5805 /* add to the queue */
5807
5808 /* we're now guaranteed to be woken up if necessary */
5810
5811 /* ok, grabbed the lock the second time round, need to undo queueing */
5812 if (!mustwait)
5813 {
5815 break;
5816 }
5817
5818 switch (mode)
5819 {
5822 break;
5825 break;
5826 case BUFFER_LOCK_SHARE:
5828 break;
5829 case BUFFER_LOCK_UNLOCK:
5831
5832 }
5834
5835 /*
5836 * Wait until awakened.
5837 *
5838 * It is possible that we get awakened for a reason other than being
5839 * signaled by BufferLockWakeup(). If so, loop back and wait again.
5840 * Once we've gotten the lock, re-increment the sema by the number of
5841 * additional signals received.
5842 */
5843 for (;;)
5844 {
5847 break;
5848 extraWaits++;
5849 }
5850
5852
5853 /* Retrying, allow BufferLockRelease to release waiters again. */
5855 }
5856
5857 /* Remember that we now hold this lock */
5858 entry->data.lockmode = mode;
5859
5860 /*
5861 * Fix the process wait semaphore's count for any absorbed wakeups.
5862 */
5863 while (unlikely(extraWaits-- > 0))
5865}
static uint64 pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_)
Definition atomics.h:551
#define BM_LOCK_WAKE_IN_PROGRESS
static bool BufferLockAttempt(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5943
static void BufferLockDequeueSelf(BufferDesc *buf_hdr)
Definition bufmgr.c:6050
static void BufferLockQueueSelf(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:6010
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition bufmgr.c:493
@ BUFFER_LOCK_SHARE_EXCLUSIVE
Definition bufmgr.h:215
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
#define likely(x)
Definition c.h:421
@ LW_WS_NOT_WAITING
Definition lwlock.h:30
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
void PGSemaphoreUnlock(PGSemaphore sema)
Definition posix_sema.c:335
void PGSemaphoreLock(PGSemaphore sema)
Definition posix_sema.c:315
PGPROC * MyProc
Definition proc.c:67
PGSemaphore sem
Definition proc.h:183
uint8 lwWaiting
Definition proc.h:246
BufferLockMode lockmode
Definition bufmgr.c:109
PrivateRefCountData data
Definition bufmgr.c:125
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85

References Assert, BM_LOCK_WAKE_IN_PROGRESS, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferLockAttempt(), BufferLockDequeueSelf(), BufferLockQueueSelf(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, likely, PrivateRefCountData::lockmode, LW_WS_NOT_WAITING, PGPROC::lwWaiting, mode, MyProc, pg_atomic_fetch_and_u64(), pg_unreachable, PGSemaphoreLock(), PGSemaphoreUnlock(), pgstat_report_wait_end(), pgstat_report_wait_start(), PGPROC::sem, and unlikely.

Referenced by FlushUnlockedBuffer(), LockBufferInternal(), and MarkDirtyUnpinnedBufferInternal().

◆ BufferLockAttempt()

static bool BufferLockAttempt ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
inlinestatic

Definition at line 5943 of file bufmgr.c.

5944{
5946
5947 /*
5948 * Read once outside the loop, later iterations will get the newer value
5949 * via compare & exchange.
5950 */
5952
5953 /* loop until we've determined whether we could acquire the lock or not */
5954 while (true)
5955 {
5957 bool lock_free;
5958
5960
5962 {
5963 lock_free = (old_state & BM_LOCK_MASK) == 0;
5964 if (lock_free)
5966 }
5968 {
5970 if (lock_free)
5972 }
5973 else
5974 {
5976 if (lock_free)
5978 }
5979
5980 /*
5981 * Attempt to swap in the state we are expecting. If we didn't see
5982 * lock to be free, that's just the old value. If we saw it as free,
5983 * we'll attempt to mark it acquired. The reason that we always swap
5984 * in the value is that this doubles as a memory barrier. We could try
5985 * to be smarter and only swap in values if we saw the lock as free,
5986 * but benchmark haven't shown it as beneficial so far.
5987 *
5988 * Retry if the value changed since we last looked at it.
5989 */
5992 {
5993 if (lock_free)
5994 {
5995 /* Great! Got the lock. */
5996 return false;
5997 }
5998 else
5999 return true; /* somebody else has the lock */
6000 }
6001 }
6002
6004}
static bool pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition atomics.h:522
#define BM_LOCK_VAL_SHARED
#define BM_LOCK_VAL_EXCLUSIVE
#define BM_LOCK_MASK
#define BM_LOCK_VAL_SHARE_EXCLUSIVE

References BM_LOCK_MASK, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_VAL_SHARE_EXCLUSIVE, BM_LOCK_VAL_SHARED, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE_EXCLUSIVE, fb(), likely, mode, pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), and pg_unreachable.

Referenced by BufferLockAcquire(), and BufferLockConditional().

◆ BufferLockConditional()

static bool BufferLockConditional ( Buffer  buffer,
BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 5900 of file bufmgr.c.

5901{
5902 PrivateRefCountEntry *entry = GetPrivateRefCountEntry(buffer, true);
5903 bool mustwait;
5904
5905 /*
5906 * We better not already hold a lock on the buffer.
5907 */
5909
5910 /*
5911 * Lock out cancel/die interrupts until we exit the code section protected
5912 * by the content lock. This ensures that interrupts will not interfere
5913 * with manipulations of data structures in shared memory.
5914 */
5916
5917 /* Check for the lock */
5919
5920 if (mustwait)
5921 {
5922 /* Failed to get lock, so release interrupt holdoff */
5924 }
5925 else
5926 {
5927 entry->data.lockmode = mode;
5928 }
5929
5930 return !mustwait;
5931}
#define RESUME_INTERRUPTS()
Definition miscadmin.h:136

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferLockAttempt(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, PrivateRefCountData::lockmode, mode, and RESUME_INTERRUPTS.

Referenced by ConditionalLockBuffer(), and GetVictimBuffer().

◆ BufferLockDequeueSelf()

static void BufferLockDequeueSelf ( BufferDesc buf_hdr)
static

Definition at line 6050 of file bufmgr.c.

6051{
6052 bool on_waitlist;
6053
6055
6057 if (on_waitlist)
6058 proclist_delete(&buf_hdr->lock_waiters, MyProcNumber, lwWaitLink);
6059
6060 if (proclist_is_empty(&buf_hdr->lock_waiters) &&
6062 {
6064 }
6065
6066 /* XXX: combine with fetch_and above? */
6068
6069 /* clear waiting state again, nice for debugging */
6070 if (on_waitlist)
6072 else
6073 {
6074 int extraWaits = 0;
6075
6076
6077 /*
6078 * Somebody else dequeued us and has or will wake us up. Deal with the
6079 * superfluous absorption of a wakeup.
6080 */
6081
6082 /*
6083 * Clear BM_LOCK_WAKE_IN_PROGRESS if somebody woke us before we
6084 * removed ourselves - they'll have set it.
6085 */
6087
6088 /*
6089 * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
6090 * get reset at some inconvenient point later. Most of the time this
6091 * will immediately return.
6092 */
6093 for (;;)
6094 {
6097 break;
6098 extraWaits++;
6099 }
6100
6101 /*
6102 * Fix the process wait semaphore's count for any absorbed wakeups.
6103 */
6104 while (extraWaits-- > 0)
6106 }
6107}
#define BM_LOCK_HAS_WAITERS
@ LW_WS_WAITING
Definition lwlock.h:31
#define proclist_delete(list, procno, link_member)
Definition proclist.h:187
static bool proclist_is_empty(const proclist_head *list)
Definition proclist.h:38

References BM_LOCK_HAS_WAITERS, BM_LOCK_WAKE_IN_PROGRESS, fb(), LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_WAITING, PGPROC::lwWaiting, MyProc, MyProcNumber, pg_atomic_fetch_and_u64(), pg_atomic_read_u64(), PGSemaphoreLock(), PGSemaphoreUnlock(), proclist_delete, proclist_is_empty(), PGPROC::sem, and UnlockBufHdr().

Referenced by BufferLockAcquire().

◆ BufferLockDisown()

static void BufferLockDisown ( Buffer  buffer,
BufferDesc buf_hdr 
)
inlinestatic

Definition at line 6118 of file bufmgr.c.

6119{
6122}
static int BufferLockDisownInternal(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:6132

References PrivateRefCountEntry::buffer, BufferLockDisownInternal(), fb(), and RESUME_INTERRUPTS.

Referenced by buffer_stage_common().

◆ BufferLockDisownInternal()

static int BufferLockDisownInternal ( Buffer  buffer,
BufferDesc buf_hdr 
)
inlinestatic

Definition at line 6132 of file bufmgr.c.

6133{
6136
6137 ref = GetPrivateRefCountEntry(buffer, false);
6138 if (ref == NULL)
6139 elog(ERROR, "lock %d is not held", buffer);
6140 mode = ref->data.lockmode;
6141 ref->data.lockmode = BUFFER_LOCK_UNLOCK;
6142
6143 return mode;
6144}
BufferLockMode
Definition bufmgr.h:204
#define ERROR
Definition elog.h:39

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, elog, ERROR, fb(), GetPrivateRefCountEntry(), and mode.

Referenced by BufferLockDisown(), and BufferLockUnlock().

◆ BufferLockHeldByMe()

static bool BufferLockHeldByMe ( BufferDesc buf_hdr)
static

Definition at line 6390 of file bufmgr.c.

6391{
6392 PrivateRefCountEntry *entry =
6394
6395 if (!entry)
6396 return false;
6397 else
6398 return entry->data.lockmode != BUFFER_LOCK_UNLOCK;
6399}
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)

References BUFFER_LOCK_UNLOCK, BufferDescriptorGetBuffer(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), and PrivateRefCountData::lockmode.

Referenced by buffer_stage_common(), BufferIsLockedByMe(), and UnpinBufferNoOwner().

◆ BufferLockHeldByMeInMode()

static bool BufferLockHeldByMeInMode ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 6372 of file bufmgr.c.

6373{
6374 PrivateRefCountEntry *entry =
6376
6377 if (!entry)
6378 return false;
6379 else
6380 return entry->data.lockmode == mode;
6381}

References BufferDescriptorGetBuffer(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), PrivateRefCountData::lockmode, and mode.

Referenced by BufferIsLockedByMeInMode().

◆ BufferLockProcessRelease()

static void BufferLockProcessRelease ( BufferDesc buf_hdr,
BufferLockMode  mode,
uint64  lockstate 
)
static

Definition at line 6317 of file bufmgr.c.

6318{
6319 bool check_waiters = false;
6320 bool wake_exclusive = false;
6321
6322 /* nobody else can have that kind of lock */
6324
6325 /*
6326 * If we're still waiting for backends to get scheduled, don't wake them
6327 * up again. Otherwise check if we need to look through the waitqueue to
6328 * wake other backends.
6329 */
6332 {
6333 if ((lockstate & BM_LOCK_MASK) == 0)
6334 {
6335 /*
6336 * We released a lock and the lock was, in that moment, free. We
6337 * therefore can wake waiters for any kind of lock.
6338 */
6339 check_waiters = true;
6340 wake_exclusive = true;
6341 }
6343 {
6344 /*
6345 * We released the lock, but another backend still holds a lock.
6346 * We can't have released an exclusive lock, as there couldn't
6347 * have been other lock holders. If we released a share lock, no
6348 * waiters need to be woken up, as there must be other share
6349 * lockers. However, if we held a share-exclusive lock, another
6350 * backend now could acquire a share-exclusive lock.
6351 */
6352 check_waiters = true;
6353 wake_exclusive = false;
6354 }
6355 }
6356
6357 /*
6358 * As waking up waiters requires the spinlock to be acquired, only do so
6359 * if necessary.
6360 */
6361 if (check_waiters)
6363}
static void BufferLockWakeup(BufferDesc *buf_hdr, bool unlocked)
Definition bufmgr.c:6152

References Assert, BM_LOCK_HAS_WAITERS, BM_LOCK_MASK, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_WAKE_IN_PROGRESS, BUFFER_LOCK_SHARE_EXCLUSIVE, BufferLockWakeup(), fb(), and mode.

Referenced by BufferLockUnlock().

◆ BufferLockQueueSelf()

static void BufferLockQueueSelf ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 6010 of file bufmgr.c.

6011{
6012 /*
6013 * If we don't have a PGPROC structure, there's no way to wait. This
6014 * should never occur, since MyProc should only be null during shared
6015 * memory initialization.
6016 */
6017 if (MyProc == NULL)
6018 elog(PANIC, "cannot wait without a PGPROC structure");
6019
6021 elog(PANIC, "queueing for lock while waiting on another one");
6022
6024
6025 /* setting the flag is protected by the spinlock */
6027
6028 /*
6029 * These are currently used both for lwlocks and buffer content locks,
6030 * which is acceptable, although not pretty, because a backend can't wait
6031 * for both types of locks at the same time.
6032 */
6035
6036 proclist_push_tail(&buf_hdr->lock_waiters, MyProcNumber, lwWaitLink);
6037
6038 /* Can release the mutex now */
6040}
static uint64 pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_)
Definition atomics.h:560
#define PANIC
Definition elog.h:42
#define proclist_push_tail(list, procno, link_member)
Definition proclist.h:191
uint8 lwWaitMode
Definition proc.h:247

References BM_LOCK_HAS_WAITERS, elog, fb(), LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_WAITING, PGPROC::lwWaiting, PGPROC::lwWaitMode, mode, MyProc, MyProcNumber, PANIC, pg_atomic_fetch_or_u64(), proclist_push_tail, and UnlockBufHdr().

Referenced by BufferLockAcquire().

◆ BufferLockReleaseSub()

static uint64 BufferLockReleaseSub ( BufferLockMode  mode)
inlinestatic

Definition at line 6288 of file bufmgr.c.

6289{
6290 /*
6291 * Turns out that a switch() leads gcc to generate sufficiently worse code
6292 * for this to show up in profiles...
6293 */
6295 return BM_LOCK_VAL_EXCLUSIVE;
6298 else
6299 {
6301 return BM_LOCK_VAL_SHARED;
6302 }
6303
6304 return 0; /* keep compiler quiet */
6305}

References Assert, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_VAL_SHARE_EXCLUSIVE, BM_LOCK_VAL_SHARED, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, and mode.

Referenced by BufferLockUnlock().

◆ BufferLockUnlock()

static void BufferLockUnlock ( Buffer  buffer,
BufferDesc buf_hdr 
)
static

Definition at line 5871 of file bufmgr.c.

5872{
5875 uint64 sub;
5876
5878
5879 /*
5880 * Release my hold on lock, after that it can immediately be acquired by
5881 * others, even if we still have to wakeup other waiters.
5882 */
5884
5886
5888
5889 /*
5890 * Now okay to allow cancel/die interrupts.
5891 */
5893}
static uint64 pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
Definition atomics.h:578
static void BufferLockProcessRelease(BufferDesc *buf_hdr, BufferLockMode mode, uint64 lockstate)
Definition bufmgr.c:6317
static uint64 BufferLockReleaseSub(BufferLockMode mode)
Definition bufmgr.c:6288

References PrivateRefCountEntry::buffer, BufferLockDisownInternal(), BufferLockProcessRelease(), BufferLockReleaseSub(), fb(), mode, pg_atomic_sub_fetch_u64(), and RESUME_INTERRUPTS.

Referenced by FlushUnlockedBuffer(), MarkDirtyUnpinnedBufferInternal(), ResOwnerReleaseBuffer(), and UnlockBuffer().

◆ BufferLockWakeup()

static void BufferLockWakeup ( BufferDesc buf_hdr,
bool  unlocked 
)
static

Definition at line 6152 of file bufmgr.c.

6153{
6154 bool new_wake_in_progress = false;
6155 bool wake_share_exclusive = true;
6158
6160
6161 /* lock wait list while collecting backends to wake up */
6163
6164 proclist_foreach_modify(iter, &buf_hdr->lock_waiters, lwWaitLink)
6165 {
6166 PGPROC *waiter = GetPGProcByNumber(iter.cur);
6167
6168 /*
6169 * Already woke up a conflicting lock, so skip over this wait list
6170 * entry.
6171 */
6173 continue;
6175 continue;
6176
6177 proclist_delete(&buf_hdr->lock_waiters, iter.cur, lwWaitLink);
6178 proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
6179
6180 /*
6181 * Prevent additional wakeups until retryer gets to run. Backends that
6182 * are just waiting for the lock to become free don't retry
6183 * automatically.
6184 */
6185 new_wake_in_progress = true;
6186
6187 /*
6188 * Signal that the process isn't on the wait list anymore. This allows
6189 * BufferLockDequeueSelf() to remove itself from the waitlist with a
6190 * proclist_delete(), rather than having to check if it has been
6191 * removed from the list.
6192 */
6193 Assert(waiter->lwWaiting == LW_WS_WAITING);
6195
6196 /*
6197 * Don't wakeup further waiters after waking a conflicting waiter.
6198 */
6199 if (waiter->lwWaitMode == BUFFER_LOCK_SHARE)
6200 {
6201 /*
6202 * Share locks conflict with exclusive locks.
6203 */
6204 wake_exclusive = false;
6205 }
6206 else if (waiter->lwWaitMode == BUFFER_LOCK_SHARE_EXCLUSIVE)
6207 {
6208 /*
6209 * Share-exclusive locks conflict with share-exclusive and
6210 * exclusive locks.
6211 */
6212 wake_exclusive = false;
6213 wake_share_exclusive = false;
6214 }
6215 else if (waiter->lwWaitMode == BUFFER_LOCK_EXCLUSIVE)
6216 {
6217 /*
6218 * Exclusive locks conflict with all other locks, there's no point
6219 * in waking up anybody else.
6220 */
6221 break;
6222 }
6223 }
6224
6226
6227 /* unset required flags, and release lock, in one fell swoop */
6228 {
6231
6233 while (true)
6234 {
6236
6237 /* compute desired flags */
6238
6241 else
6243
6244 if (proclist_is_empty(&buf_hdr->lock_waiters))
6246
6247 desired_state &= ~BM_LOCKED; /* release lock */
6248
6251 break;
6252 }
6253 }
6254
6255 /* Awaken any waiters I removed from the queue. */
6256 proclist_foreach_modify(iter, &wakeup, lwWaitLink)
6257 {
6258 PGPROC *waiter = GetPGProcByNumber(iter.cur);
6259
6260 proclist_delete(&wakeup, iter.cur, lwWaitLink);
6261
6262 /*
6263 * Guarantee that lwWaiting being unset only becomes visible once the
6264 * unlink from the link has completed. Otherwise the target backend
6265 * could be woken up for other reason and enqueue for a new lock - if
6266 * that happens before the list unlink happens, the list would end up
6267 * being corrupted.
6268 *
6269 * The barrier pairs with the LockBufHdr() when enqueuing for another
6270 * lock.
6271 */
6273 waiter->lwWaiting = LW_WS_NOT_WAITING;
6274 PGSemaphoreUnlock(waiter->sem);
6275 }
6276}
#define pg_write_barrier()
Definition atomics.h:155
@ LW_WS_PENDING_WAKEUP
Definition lwlock.h:32
#define GetPGProcByNumber(n)
Definition proc.h:446
static void proclist_init(proclist_head *list)
Definition proclist.h:29
#define proclist_foreach_modify(iter, lhead, link_member)
Definition proclist.h:206
Definition proc.h:179
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]

References Assert, BM_LOCK_HAS_WAITERS, BM_LOCK_WAKE_IN_PROGRESS, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, proclist_mutable_iter::cur, fb(), GetPGProcByNumber, LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_PENDING_WAKEUP, LW_WS_WAITING, PGPROC::lwWaiting, PGPROC::lwWaitMode, pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), pg_write_barrier, PGSemaphoreUnlock(), proclist_delete, proclist_foreach_modify, proclist_init(), proclist_is_empty(), proclist_push_tail, PGPROC::sem, and wakeup.

Referenced by BufferLockProcessRelease().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 3456 of file bufmgr.c.

3457{
3459 int buf_id;
3460 int num_to_scan;
3461 int num_spaces;
3462 int num_processed;
3463 int num_written;
3465 Oid last_tsid;
3467 int i;
3468 uint64 mask = BM_DIRTY;
3470
3471 /*
3472 * Unless this is a shutdown checkpoint or we have been explicitly told,
3473 * we write only permanent, dirty buffers. But at shutdown or end of
3474 * recovery, we write all dirty buffers.
3475 */
3478 mask |= BM_PERMANENT;
3479
3480 /*
3481 * Loop over all buffers, and mark the ones that need to be written with
3482 * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
3483 * can estimate how much work needs to be done.
3484 *
3485 * This allows us to write only those pages that were dirty when the
3486 * checkpoint began, and not those that get dirtied while it proceeds.
3487 * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
3488 * later in this function, or by normal backends or the bgwriter cleaning
3489 * scan, the flag is cleared. Any buffer dirtied after this point won't
3490 * have the flag set.
3491 *
3492 * Note that if we fail to write some buffer, we may leave buffers with
3493 * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
3494 * certainly need to be written for the next checkpoint attempt, too.
3495 */
3496 num_to_scan = 0;
3497 for (buf_id = 0; buf_id < NBuffers; buf_id++)
3498 {
3500 uint64 set_bits = 0;
3501
3502 /*
3503 * Header spinlock is enough to examine BM_DIRTY, see comment in
3504 * SyncOneBuffer.
3505 */
3507
3508 if ((buf_state & mask) == mask)
3509 {
3510 CkptSortItem *item;
3511
3513
3514 item = &CkptBufferIds[num_to_scan++];
3515 item->buf_id = buf_id;
3516 item->tsId = bufHdr->tag.spcOid;
3517 item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
3518 item->forkNum = BufTagGetForkNum(&bufHdr->tag);
3519 item->blockNum = bufHdr->tag.blockNum;
3520 }
3521
3523 set_bits, 0,
3524 0);
3525
3526 /* Check for barrier events in case NBuffers is large. */
3529 }
3530
3531 if (num_to_scan == 0)
3532 return; /* nothing to do */
3533
3535
3537
3538 /*
3539 * Sort buffers that need to be written to reduce the likelihood of random
3540 * IO. The sorting is also important for the implementation of balancing
3541 * writes between tablespaces. Without balancing writes we'd potentially
3542 * end up writing to the tablespaces one-by-one; possibly overloading the
3543 * underlying system.
3544 */
3546
3547 num_spaces = 0;
3548
3549 /*
3550 * Allocate progress status for each tablespace with buffers that need to
3551 * be flushed. This requires the to-be-flushed array to be sorted.
3552 */
3554 for (i = 0; i < num_to_scan; i++)
3555 {
3556 CkptTsStatus *s;
3557 Oid cur_tsid;
3558
3560
3561 /*
3562 * Grow array of per-tablespace status structs, every time a new
3563 * tablespace is found.
3564 */
3566 {
3567 Size sz;
3568
3569 num_spaces++;
3570
3571 /*
3572 * Not worth adding grow-by-power-of-2 logic here - even with a
3573 * few hundred tablespaces this should be fine.
3574 */
3575 sz = sizeof(CkptTsStatus) * num_spaces;
3576
3577 if (per_ts_stat == NULL)
3579 else
3581
3582 s = &per_ts_stat[num_spaces - 1];
3583 memset(s, 0, sizeof(*s));
3584 s->tsId = cur_tsid;
3585
3586 /*
3587 * The first buffer in this tablespace. As CkptBufferIds is sorted
3588 * by tablespace all (s->num_to_scan) buffers in this tablespace
3589 * will follow afterwards.
3590 */
3591 s->index = i;
3592
3593 /*
3594 * progress_slice will be determined once we know how many buffers
3595 * are in each tablespace, i.e. after this loop.
3596 */
3597
3599 }
3600 else
3601 {
3602 s = &per_ts_stat[num_spaces - 1];
3603 }
3604
3605 s->num_to_scan++;
3606
3607 /* Check for barrier events. */
3610 }
3611
3612 Assert(num_spaces > 0);
3613
3614 /*
3615 * Build a min-heap over the write-progress in the individual tablespaces,
3616 * and compute how large a portion of the total progress a single
3617 * processed buffer is.
3618 */
3621 NULL);
3622
3623 for (i = 0; i < num_spaces; i++)
3624 {
3626
3627 ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
3628
3630 }
3631
3633
3634 /*
3635 * Iterate through to-be-checkpointed buffers and write the ones (still)
3636 * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
3637 * tablespaces; otherwise the sorting would lead to only one tablespace
3638 * receiving writes at a time, making inefficient use of the hardware.
3639 */
3640 num_processed = 0;
3641 num_written = 0;
3642 while (!binaryheap_empty(ts_heap))
3643 {
3647
3648 buf_id = CkptBufferIds[ts_stat->index].buf_id;
3649 Assert(buf_id != -1);
3650
3651 bufHdr = GetBufferDescriptor(buf_id);
3652
3653 num_processed++;
3654
3655 /*
3656 * We don't need to acquire the lock here, because we're only looking
3657 * at a single bit. It's possible that someone else writes the buffer
3658 * and clears the flag right after we check, but that doesn't matter
3659 * since SyncOneBuffer will then do nothing. However, there is a
3660 * further race condition: it's conceivable that between the time we
3661 * examine the bit here and the time SyncOneBuffer acquires the lock,
3662 * someone else not only wrote the buffer but replaced it with another
3663 * page and dirtied it. In that improbable case, SyncOneBuffer will
3664 * write the buffer though we didn't need to. It doesn't seem worth
3665 * guarding against this, though.
3666 */
3668 {
3669 if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
3670 {
3673 num_written++;
3674 }
3675 }
3676
3677 /*
3678 * Measure progress independent of actually having to flush the buffer
3679 * - otherwise writing become unbalanced.
3680 */
3681 ts_stat->progress += ts_stat->progress_slice;
3682 ts_stat->num_scanned++;
3683 ts_stat->index++;
3684
3685 /* Have all the buffers from the tablespace been processed? */
3686 if (ts_stat->num_scanned == ts_stat->num_to_scan)
3687 {
3689 }
3690 else
3691 {
3692 /* update heap with the new progress */
3694 }
3695
3696 /*
3697 * Sleep to throttle our I/O rate.
3698 *
3699 * (This will check for barrier events even if it doesn't sleep.)
3700 */
3701 CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
3702 }
3703
3704 /*
3705 * Issue all pending flushes. Only checkpointer calls BufferSync(), so
3706 * IOContext will always be IOCONTEXT_NORMAL.
3707 */
3709
3711 per_ts_stat = NULL;
3713
3714 /*
3715 * Update checkpoint statistics. As noted above, this doesn't include
3716 * buffers written by other backends or bgwriter scan.
3717 */
3719
3721}
void binaryheap_build(binaryheap *heap)
Definition binaryheap.c:136
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:253
bh_node_type binaryheap_first(binaryheap *heap)
Definition binaryheap.c:175
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition binaryheap.c:190
void binaryheap_free(binaryheap *heap)
Definition binaryheap.c:73
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:114
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition binaryheap.c:37
#define binaryheap_empty(h)
Definition binaryheap.h:65
CkptSortItem * CkptBufferIds
Definition buf_init.c:26
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_CHECKPOINT_NEEDED
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition bufmgr.c:7234
int checkpoint_flush_after
Definition bufmgr.c:207
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition bufmgr.c:7257
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
Definition bufmgr.c:7319
double float8
Definition c.h:654
size_t Size
Definition c.h:629
void CheckpointWriteDelay(int flags, double progress)
volatile sig_atomic_t ProcSignalBarrierPending
Definition globals.c:40
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
#define InvalidOid
unsigned int Oid
void ProcessProcSignalBarrier(void)
Definition procsignal.c:499
int ckpt_bufs_written
Definition xlog.h:178
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition bufmgr.c:148
int num_to_scan
Definition bufmgr.c:151
PgStat_Counter buffers_written
Definition pgstat.h:266
CheckpointStatsData CheckpointStats
Definition xlog.c:212
#define CHECKPOINT_FLUSH_UNLOGGED
Definition xlog.h:154
#define CHECKPOINT_END_OF_RECOVERY
Definition xlog.h:151
#define CHECKPOINT_IS_SHUTDOWN
Definition xlog.h:150

References Assert, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buffers_written, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, DatumGetPointer(), fb(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IOCONTEXT_NORMAL, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u64(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), SyncOneBuffer(), ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdrExt(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 7169 of file bufmgr.c.

7170{
7171 int ret;
7174
7177
7179
7180 if (ret != 0)
7181 return ret;
7182
7184 return -1;
7186 return 1;
7187
7188 if (ba->blockNum < bb->blockNum)
7189 return -1;
7190 if (ba->blockNum > bb->blockNum)
7191 return 1;
7192
7193 return 0;
7194}
static int rlocator_comparator(const void *p1, const void *p2)
Definition bufmgr.c:7070

References BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), and rlocator_comparator().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 6484 of file bufmgr.c.

6485{
6486 if (BufferIsLocal(buffer))
6487 {
6488 if (LocalRefCount[-buffer - 1] != 1)
6489 elog(ERROR, "incorrect local pin count: %d",
6490 LocalRefCount[-buffer - 1]);
6491 }
6492 else
6493 {
6494 if (GetPrivateRefCount(buffer) != 1)
6495 elog(ERROR, "incorrect local pin count: %d",
6496 GetPrivateRefCount(buffer));
6497 }
6498}

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), lazy_scan_heap(), and LockBufferForCleanup().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 4173 of file bufmgr.c.

4174{
4175#ifdef USE_ASSERT_CHECKING
4176 int RefCountErrors = 0;
4178 int i;
4179 char *s;
4180
4181 /* check the array */
4182 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
4183 {
4185 {
4186 res = &PrivateRefCountArray[i];
4187
4189 elog(WARNING, "buffer refcount leak: %s", s);
4190 pfree(s);
4191
4193 }
4194 }
4195
4196 /* if necessary search the hash */
4198 {
4200
4202 while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
4203 {
4205 elog(WARNING, "buffer refcount leak: %s", s);
4206 pfree(s);
4208 }
4209 }
4210
4211 Assert(RefCountErrors == 0);
4212#endif
4213}
#define InvalidBuffer
Definition buf.h:25
static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES]
Definition bufmgr.c:247
char * DebugPrintBufferRefcount(Buffer buffer)
Definition bufmgr.c:4299
#define REFCOUNT_ARRAY_ENTRIES
Definition bufmgr.c:129
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition bufmgr.c:248
static HTAB * PrivateRefCountHash
Definition bufmgr.c:249
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380

References Assert, PrivateRefCountEntry::buffer, DebugPrintBufferRefcount(), elog, fb(), hash_seq_init(), hash_seq_search(), i, InvalidBuffer, pfree(), PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and WARNING.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 4342 of file bufmgr.c.

4343{
4344 BufferSync(flags);
4345}
static void BufferSync(int flags)
Definition bufmgr.c:3456

References BufferSync().

Referenced by CheckPointGuts().

◆ CheckReadBuffersOperation()

static void CheckReadBuffersOperation ( ReadBuffersOperation operation,
bool  is_complete 
)
static

Definition at line 1627 of file bufmgr.c.

1628{
1629#ifdef USE_ASSERT_CHECKING
1630 Assert(operation->nblocks_done <= operation->nblocks);
1631 Assert(!is_complete || operation->nblocks == operation->nblocks_done);
1632
1633 for (int i = 0; i < operation->nblocks; i++)
1634 {
1635 Buffer buffer = operation->buffers[i];
1636 BufferDesc *buf_hdr = BufferIsLocal(buffer) ?
1637 GetLocalBufferDescriptor(-buffer - 1) :
1638 GetBufferDescriptor(buffer - 1);
1639
1640 Assert(BufferGetBlockNumber(buffer) == operation->blocknum + i);
1642
1643 if (i < operation->nblocks_done)
1645 }
1646#endif
1647}

References Assert, ReadBuffersOperation::blocknum, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufferGetBlockNumber(), BufferIsLocal, ReadBuffersOperation::buffers, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, and pg_atomic_read_u64().

Referenced by StartReadBuffersImpl(), and WaitReadBuffers().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 7203 of file bufmgr.c.

7204{
7205 /* compare tablespace */
7206 if (a->tsId < b->tsId)
7207 return -1;
7208 else if (a->tsId > b->tsId)
7209 return 1;
7210 /* compare relation */
7211 if (a->relNumber < b->relNumber)
7212 return -1;
7213 else if (a->relNumber > b->relNumber)
7214 return 1;
7215 /* compare fork */
7216 else if (a->forkNum < b->forkNum)
7217 return -1;
7218 else if (a->forkNum > b->forkNum)
7219 return 1;
7220 /* compare block number */
7221 else if (a->blockNum < b->blockNum)
7222 return -1;
7223 else if (a->blockNum > b->blockNum)
7224 return 1;
7225 /* equal page IDs are unlikely, but not impossible */
7226 return 0;
7227}
int b
Definition isn.c:74
int a
Definition isn.c:73

References a, and b.

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 6464 of file bufmgr.c.

6465{
6466 BufferDesc *buf;
6467
6468 Assert(BufferIsPinned(buffer));
6469 if (BufferIsLocal(buffer))
6470 return true; /* act as though we got it */
6471
6472 buf = GetBufferDescriptor(buffer - 1);
6473
6475}
static bool BufferLockConditional(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5900

References Assert, buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsPinned, BufferLockConditional(), and GetBufferDescriptor().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 6690 of file bufmgr.c.

6691{
6694 refcount;
6695
6696 Assert(BufferIsValid(buffer));
6697
6698 /* see AIO related comment in LockBufferForCleanup() */
6699
6700 if (BufferIsLocal(buffer))
6701 {
6702 refcount = LocalRefCount[-buffer - 1];
6703 /* There should be exactly one pin */
6704 Assert(refcount > 0);
6705 if (refcount != 1)
6706 return false;
6707 /* Nobody else to wait for */
6708 return true;
6709 }
6710
6711 /* There should be exactly one local pin */
6712 refcount = GetPrivateRefCount(buffer);
6713 Assert(refcount);
6714 if (refcount != 1)
6715 return false;
6716
6717 /* Try to acquire lock */
6718 if (!ConditionalLockBuffer(buffer))
6719 return false;
6720
6721 bufHdr = GetBufferDescriptor(buffer - 1);
6724
6725 Assert(refcount > 0);
6726 if (refcount == 1)
6727 {
6728 /* Successfully acquired exclusive lock with pincount 1 */
6730 return true;
6731 }
6732
6733 /* Failed, so release the lock */
6736 return false;
6737}
bool ConditionalLockBuffer(Buffer buffer)
Definition bufmgr.c:6464
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), fb(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 5377 of file bufmgr.c.

5379{
5380 char relpersistence;
5383
5384 /* Set the relpersistence. */
5385 relpersistence = permanent ?
5387
5390
5391 /*
5392 * Create and copy all forks of the relation. During create database we
5393 * have a separate cleanup mechanism which deletes complete database
5394 * directory. Therefore, each individual relation doesn't need to be
5395 * registered for cleanup.
5396 */
5397 RelationCreateStorage(dst_rlocator, relpersistence, false);
5398
5399 /* copy main fork. */
5401 permanent);
5402
5403 /* copy those extra forks that exist */
5404 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
5405 forkNum <= MAX_FORKNUM; forkNum++)
5406 {
5407 if (smgrexists(src_rel, forkNum))
5408 {
5409 smgrcreate(dst_rel, forkNum, false);
5410
5411 /*
5412 * WAL log creation if the relation is persistent, or this is the
5413 * init fork of an unlogged relation.
5414 */
5415 if (permanent || forkNum == INIT_FORKNUM)
5416 log_smgrcreate(&dst_rlocator, forkNum);
5417
5418 /* Copy a fork's data, block by block. */
5420 permanent);
5421 }
5422 }
5423}
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition bufmgr.c:5263
@ MAIN_FORKNUM
Definition relpath.h:58
#define MAX_FORKNUM
Definition relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition smgr.c:240
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition smgr.c:481
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:462
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition storage.c:122
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition storage.c:187

References fb(), INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char * DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 4299 of file bufmgr.c.

4300{
4301 BufferDesc *buf;
4303 char *result;
4304 ProcNumber backend;
4306
4307 Assert(BufferIsValid(buffer));
4308 if (BufferIsLocal(buffer))
4309 {
4310 buf = GetLocalBufferDescriptor(-buffer - 1);
4311 loccount = LocalRefCount[-buffer - 1];
4312 backend = MyProcNumber;
4313 }
4314 else
4315 {
4316 buf = GetBufferDescriptor(buffer - 1);
4317 loccount = GetPrivateRefCount(buffer);
4318 backend = INVALID_PROC_NUMBER;
4319 }
4320
4321 /* theoretically we should lock the bufHdr here */
4322 buf_state = pg_atomic_read_u64(&buf->state);
4323
4324 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%" PRIx64 ", refcount=%u %d)",
4325 buffer,
4327 BufTagGetForkNum(&buf->tag)).str,
4328 buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
4330 return result;
4331}
#define BUF_FLAG_MASK
char * psprintf(const char *fmt,...)
Definition psprintf.c:43

References Assert, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pg_atomic_read_u64(), psprintf(), and relpathbackend.

Referenced by buffer_call_start_io(), buffer_call_terminate_io(), CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBuffer().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 5030 of file bufmgr.c.

5031{
5032 int i;
5033
5034 /*
5035 * We needn't consider local buffers, since by assumption the target
5036 * database isn't our own.
5037 */
5038
5039 for (i = 0; i < NBuffers; i++)
5040 {
5042
5043 /*
5044 * As in DropRelationBuffers, an unlocked precheck should be safe and
5045 * saves some cycles.
5046 */
5047 if (bufHdr->tag.dbOid != dbid)
5048 continue;
5049
5051 if (bufHdr->tag.dbOid == dbid)
5052 InvalidateBuffer(bufHdr); /* releases spinlock */
5053 else
5055 }
5056}
static void InvalidateBuffer(BufferDesc *buf)
Definition bufmgr.c:2273

References fb(), GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 4680 of file bufmgr.c.

4682{
4683 int i;
4684 int j;
4685 RelFileLocatorBackend rlocator;
4688
4689 rlocator = smgr_reln->smgr_rlocator;
4690
4691 /* If it's a local relation, it's localbuf.c's problem. */
4692 if (RelFileLocatorBackendIsTemp(rlocator))
4693 {
4694 if (rlocator.backend == MyProcNumber)
4695 DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
4697
4698 return;
4699 }
4700
4701 /*
4702 * To remove all the pages of the specified relation forks from the buffer
4703 * pool, we need to scan the entire buffer pool but we can optimize it by
4704 * finding the buffers from BufMapping table provided we know the exact
4705 * size of each fork of the relation. The exact size is required to ensure
4706 * that we don't leave any buffer for the relation being dropped as
4707 * otherwise the background writer or checkpointer can lead to a PANIC
4708 * error while flushing buffers corresponding to files that don't exist.
4709 *
4710 * To know the exact size, we rely on the size cached for each fork by us
4711 * during recovery which limits the optimization to recovery and on
4712 * standbys but we can easily extend it once we have shared cache for
4713 * relation size.
4714 *
4715 * In recovery, we cache the value returned by the first lseek(SEEK_END)
4716 * and the future writes keeps the cached value up-to-date. See
4717 * smgrextend. It is possible that the value of the first lseek is smaller
4718 * than the actual number of existing blocks in the file due to buggy
4719 * Linux kernels that might not have accounted for the recent write. But
4720 * that should be fine because there must not be any buffers after that
4721 * file size.
4722 */
4723 for (i = 0; i < nforks; i++)
4724 {
4725 /* Get the number of blocks for a relation's fork */
4727
4729 {
4731 break;
4732 }
4733
4734 /* calculate the number of blocks to be invalidated */
4736 }
4737
4738 /*
4739 * We apply the optimization iff the total number of blocks to invalidate
4740 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4741 */
4744 {
4745 for (j = 0; j < nforks; j++)
4746 FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4748 return;
4749 }
4750
4751 for (i = 0; i < NBuffers; i++)
4752 {
4754
4755 /*
4756 * We can make this a tad faster by prechecking the buffer tag before
4757 * we attempt to lock the buffer; this saves a lot of lock
4758 * acquisitions in typical cases. It should be safe because the
4759 * caller must have AccessExclusiveLock on the relation, or some other
4760 * reason to be certain that no one is loading new pages of the rel
4761 * into the buffer pool. (Otherwise we might well miss such pages
4762 * entirely.) Therefore, while the tag might be changing while we
4763 * look at it, it can't be changing *to* a value we care about, only
4764 * *away* from such a value. So false negatives are impossible, and
4765 * false positives are safe because we'll recheck after getting the
4766 * buffer lock.
4767 *
4768 * We could check forkNum and blockNum as well as the rlocator, but
4769 * the incremental win from doing so seems small.
4770 */
4771 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4772 continue;
4773
4775
4776 for (j = 0; j < nforks; j++)
4777 {
4778 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4779 BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4780 bufHdr->tag.blockNum >= firstDelBlock[j])
4781 {
4782 InvalidateBuffer(bufHdr); /* releases spinlock */
4783 break;
4784 }
4785 }
4786 if (j >= nforks)
4788 }
4789}
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition bufmgr.c:92
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition bufmgr.c:4970
int j
Definition isn.c:78
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Definition localbuf.c:665
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:847

References RelFileLocatorBackend::backend, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), fb(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, smgrnblocks_cached(), and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4800 of file bufmgr.c.

4801{
4802 int i;
4803 int n = 0;
4804 SMgrRelation *rels;
4805 BlockNumber (*block)[MAX_FORKNUM + 1];
4808 bool cached = true;
4809 bool use_bsearch;
4810
4811 if (nlocators == 0)
4812 return;
4813
4814 rels = palloc_array(SMgrRelation, nlocators); /* non-local relations */
4815
4816 /* If it's a local relation, it's localbuf.c's problem. */
4817 for (i = 0; i < nlocators; i++)
4818 {
4819 if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4820 {
4821 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4822 DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4823 }
4824 else
4825 rels[n++] = smgr_reln[i];
4826 }
4827
4828 /*
4829 * If there are no non-local relations, then we're done. Release the
4830 * memory and return.
4831 */
4832 if (n == 0)
4833 {
4834 pfree(rels);
4835 return;
4836 }
4837
4838 /*
4839 * This is used to remember the number of blocks for all the relations
4840 * forks.
4841 */
4842 block = (BlockNumber (*)[MAX_FORKNUM + 1])
4843 palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4844
4845 /*
4846 * We can avoid scanning the entire buffer pool if we know the exact size
4847 * of each of the given relation forks. See DropRelationBuffers.
4848 */
4849 for (i = 0; i < n && cached; i++)
4850 {
4851 for (int j = 0; j <= MAX_FORKNUM; j++)
4852 {
4853 /* Get the number of blocks for a relation's fork. */
4854 block[i][j] = smgrnblocks_cached(rels[i], j);
4855
4856 /* We need to only consider the relation forks that exists. */
4857 if (block[i][j] == InvalidBlockNumber)
4858 {
4859 if (!smgrexists(rels[i], j))
4860 continue;
4861 cached = false;
4862 break;
4863 }
4864
4865 /* calculate the total number of blocks to be invalidated */
4866 nBlocksToInvalidate += block[i][j];
4867 }
4868 }
4869
4870 /*
4871 * We apply the optimization iff the total number of blocks to invalidate
4872 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4873 */
4875 {
4876 for (i = 0; i < n; i++)
4877 {
4878 for (int j = 0; j <= MAX_FORKNUM; j++)
4879 {
4880 /* ignore relation forks that doesn't exist */
4881 if (!BlockNumberIsValid(block[i][j]))
4882 continue;
4883
4884 /* drop all the buffers for a particular relation fork */
4885 FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4886 j, block[i][j], 0);
4887 }
4888 }
4889
4890 pfree(block);
4891 pfree(rels);
4892 return;
4893 }
4894
4895 pfree(block);
4896 locators = palloc_array(RelFileLocator, n); /* non-local relations */
4897 for (i = 0; i < n; i++)
4898 locators[i] = rels[i]->smgr_rlocator.locator;
4899
4900 /*
4901 * For low number of relations to drop just use a simple walk through, to
4902 * save the bsearch overhead. The threshold to use is rather a guess than
4903 * an exactly determined value, as it depends on many factors (CPU and RAM
4904 * speeds, amount of shared buffers etc.).
4905 */
4907
4908 /* sort the list of rlocators if necessary */
4909 if (use_bsearch)
4911
4912 for (i = 0; i < NBuffers; i++)
4913 {
4914 RelFileLocator *rlocator = NULL;
4916
4917 /*
4918 * As in DropRelationBuffers, an unlocked precheck should be safe and
4919 * saves some cycles.
4920 */
4921
4922 if (!use_bsearch)
4923 {
4924 int j;
4925
4926 for (j = 0; j < n; j++)
4927 {
4929 {
4930 rlocator = &locators[j];
4931 break;
4932 }
4933 }
4934 }
4935 else
4936 {
4937 RelFileLocator locator;
4938
4939 locator = BufTagGetRelFileLocator(&bufHdr->tag);
4940 rlocator = bsearch(&locator,
4941 locators, n, sizeof(RelFileLocator),
4943 }
4944
4945 /* buffer doesn't belong to any of the given relfilelocators; skip it */
4946 if (rlocator == NULL)
4947 continue;
4948
4950 if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4951 InvalidateBuffer(bufHdr); /* releases spinlock */
4952 else
4954 }
4955
4956 pfree(locators);
4957 pfree(rels);
4958}
#define RELS_BSEARCH_THRESHOLD
Definition bufmgr.c:84
#define palloc_array(type, count)
Definition fe_memutils.h:76
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition localbuf.c:702
#define qsort(a, b, c, d)
Definition port.h:495

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), fb(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), palloc_array, pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictAllUnpinnedBuffers()

void EvictAllUnpinnedBuffers ( int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 7561 of file bufmgr.c.

7563{
7564 *buffers_evicted = 0;
7565 *buffers_skipped = 0;
7566 *buffers_flushed = 0;
7567
7568 for (int buf = 1; buf <= NBuffers; buf++)
7569 {
7570 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7572 bool buffer_flushed;
7573
7575
7577 if (!(buf_state & BM_VALID))
7578 continue;
7579
7582
7583 LockBufHdr(desc);
7584
7586 (*buffers_evicted)++;
7587 else
7588 (*buffers_skipped)++;
7589
7590 if (buffer_flushed)
7591 (*buffers_flushed)++;
7592 }
7593}
static bool EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed)
Definition bufmgr.c:7470
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
pg_atomic_uint64 state

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), fb(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u64(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_evict_all().

◆ EvictRelUnpinnedBuffers()

void EvictRelUnpinnedBuffers ( Relation  rel,
int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 7611 of file bufmgr.c.

7613{
7615
7616 *buffers_skipped = 0;
7617 *buffers_evicted = 0;
7618 *buffers_flushed = 0;
7619
7620 for (int buf = 1; buf <= NBuffers; buf++)
7621 {
7622 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7624 bool buffer_flushed;
7625
7627
7628 /* An unlocked precheck should be safe and saves some cycles. */
7629 if ((buf_state & BM_VALID) == 0 ||
7631 continue;
7632
7633 /* Make sure we can pin the buffer. */
7636
7637 buf_state = LockBufHdr(desc);
7638
7639 /* recheck, could have changed without the lock */
7640 if ((buf_state & BM_VALID) == 0 ||
7642 {
7643 UnlockBufHdr(desc);
7644 continue;
7645 }
7646
7648 (*buffers_evicted)++;
7649 else
7650 (*buffers_skipped)++;
7651
7652 if (buffer_flushed)
7653 (*buffers_flushed)++;
7654 }
7655}
#define RelationUsesLocalBuffers(relation)
Definition rel.h:646
RelFileLocator rd_locator
Definition rel.h:57

References Assert, BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), fb(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u64(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_evict_relation().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf,
bool buffer_flushed 
)

◆ EvictUnpinnedBufferInternal()

static bool EvictUnpinnedBufferInternal ( BufferDesc desc,
bool buffer_flushed 
)
static

Definition at line 7470 of file bufmgr.c.

7471{
7473 bool result;
7474
7475 *buffer_flushed = false;
7476
7479
7480 if ((buf_state & BM_VALID) == 0)
7481 {
7482 UnlockBufHdr(desc);
7483 return false;
7484 }
7485
7486 /* Check that it's not pinned already. */
7488 {
7489 UnlockBufHdr(desc);
7490 return false;
7491 }
7492
7493 PinBuffer_Locked(desc); /* releases spinlock */
7494
7495 /* If it was dirty, try to clean it once. */
7496 if (buf_state & BM_DIRTY)
7497 {
7499 *buffer_flushed = true;
7500 }
7501
7502 /* This will return false if it becomes dirty or someone else pins it. */
7503 result = InvalidateVictimBuffer(desc);
7504
7505 UnpinBuffer(desc);
7506
7507 return result;
7508}
#define BM_LOCKED
static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition bufmgr.c:4553
static void PinBuffer_Locked(BufferDesc *buf)
Definition bufmgr.c:3292
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
Definition bufmgr.c:2374

References Assert, BM_DIRTY, BM_LOCKED, BM_VALID, BUF_STATE_GET_REFCOUNT, fb(), FlushUnlockedBuffer(), InvalidateVictimBuffer(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, pg_atomic_read_u64(), PinBuffer_Locked(), BufferDesc::state, UnlockBufHdr(), and UnpinBuffer().

Referenced by EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), and EvictUnpinnedBuffer().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 964 of file bufmgr.c.

968{
969 Buffer buf;
970 uint32 extend_by = 1;
971
972 ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
973 &buf, &extend_by);
974
975 return buf;
976}
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:996

References buf, ExtendBufferedRelBy(), and fb().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 996 of file bufmgr.c.

1003{
1004 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
1005 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
1006 Assert(extend_by > 0);
1007
1008 if (bmr.relpersistence == '\0')
1009 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
1010
1011 return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1013 buffers, extended_by);
1014}
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:2659

References Assert, ExtendBufferedRelCommon(), fb(), and InvalidBlockNumber.

Referenced by ExtendBufferedRel(), grow_rel(), and RelationAddBlocks().

◆ ExtendBufferedRelCommon()

static BlockNumber ExtendBufferedRelCommon ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2659 of file bufmgr.c.

2667{
2669
2671 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.spcOid,
2672 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.dbOid,
2673 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.relNumber,
2674 BMR_GET_SMGR(bmr)->smgr_rlocator.backend,
2675 extend_by);
2676
2677 if (bmr.relpersistence == RELPERSISTENCE_TEMP)
2680 buffers, &extend_by);
2681 else
2682 first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags,
2684 buffers, &extend_by);
2686
2688 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.spcOid,
2689 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.dbOid,
2690 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.relNumber,
2691 BMR_GET_SMGR(bmr)->smgr_rlocator.backend,
2692 *extended_by,
2693 first_block);
2694
2695 return first_block;
2696}
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:2703
#define BMR_GET_SMGR(bmr)
Definition bufmgr.h:118
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition localbuf.c:346

References BMR_GET_SMGR, ExtendBufferedRelLocal(), ExtendBufferedRelShared(), and fb().

Referenced by ExtendBufferedRelBy(), and ExtendBufferedRelTo().

◆ ExtendBufferedRelShared()

static BlockNumber ExtendBufferedRelShared ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2703 of file bufmgr.c.

2711{
2715
2717
2718 /*
2719 * Acquire victim buffers for extension without holding extension lock.
2720 * Writing out victim buffers is the most expensive part of extending the
2721 * relation, particularly when doing so requires WAL flushes. Zeroing out
2722 * the buffers is also quite expensive, so do that before holding the
2723 * extension lock as well.
2724 *
2725 * These pages are pinned by us and not valid. While we hold the pin they
2726 * can't be acquired as victim buffers by another backend.
2727 */
2728 for (uint32 i = 0; i < extend_by; i++)
2729 {
2731
2732 buffers[i] = GetVictimBuffer(strategy, io_context);
2734
2735 /* new buffers are zero-filled */
2736 MemSet(buf_block, 0, BLCKSZ);
2737 }
2738
2739 /*
2740 * Lock relation against concurrent extensions, unless requested not to.
2741 *
2742 * We use the same extension lock for all forks. That's unnecessarily
2743 * restrictive, but currently extensions for forks don't happen often
2744 * enough to make it worth locking more granularly.
2745 *
2746 * Note that another backend might have extended the relation by the time
2747 * we get the lock.
2748 */
2749 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2751
2752 /*
2753 * If requested, invalidate size cache, so that smgrnblocks asks the
2754 * kernel.
2755 */
2756 if (flags & EB_CLEAR_SIZE_CACHE)
2757 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
2758
2760
2761 /*
2762 * Now that we have the accurate relation size, check if the caller wants
2763 * us to extend to only up to a specific size. If there were concurrent
2764 * extensions, we might have acquired too many buffers and need to release
2765 * them.
2766 */
2768 {
2770
2772 extend_by = 0;
2773 else if ((uint64) first_block + extend_by > extend_upto)
2775
2776 for (uint32 i = extend_by; i < orig_extend_by; i++)
2777 {
2778 BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
2779
2781 }
2782
2783 if (extend_by == 0)
2784 {
2785 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2788 return first_block;
2789 }
2790 }
2791
2792 /* Fail if relation is already at maximum possible length */
2794 ereport(ERROR,
2796 errmsg("cannot extend relation %s beyond %u blocks",
2797 relpath(BMR_GET_SMGR(bmr)->smgr_rlocator, fork).str,
2798 MaxBlockNumber)));
2799
2800 /*
2801 * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
2802 *
2803 * This needs to happen before we extend the relation, because as soon as
2804 * we do, other backends can start to read in those pages.
2805 */
2806 for (uint32 i = 0; i < extend_by; i++)
2807 {
2808 Buffer victim_buf = buffers[i];
2810 BufferTag tag;
2811 uint32 hash;
2813 int existing_id;
2814
2815 /* in case we need to pin an existing buffer below */
2818
2819 InitBufferTag(&tag, &BMR_GET_SMGR(bmr)->smgr_rlocator.locator, fork,
2820 first_block + i);
2821 hash = BufTableHashCode(&tag);
2823
2825
2827
2828 /*
2829 * We get here only in the corner case where we are trying to extend
2830 * the relation but we found a pre-existing buffer. This can happen
2831 * because a prior attempt at extending the relation failed, and
2832 * because mdread doesn't complain about reads beyond EOF (when
2833 * zero_damaged_pages is ON) and so a previous attempt to read a block
2834 * beyond EOF could have left a "valid" zero-filled buffer.
2835 *
2836 * This has also been observed when relation was overwritten by
2837 * external process. Since the legitimate cases should always have
2838 * left a zero-filled buffer, complain if not PageIsNew.
2839 */
2840 if (existing_id >= 0)
2841 {
2844 bool valid;
2845
2846 /*
2847 * Pin the existing buffer before releasing the partition lock,
2848 * preventing it from being evicted.
2849 */
2850 valid = PinBuffer(existing_hdr, strategy, false);
2851
2854
2857
2858 if (valid && !PageIsNew((Page) buf_block))
2859 ereport(ERROR,
2860 (errmsg("unexpected data beyond EOF in block %u of relation \"%s\"",
2861 existing_hdr->tag.blockNum,
2862 relpath(BMR_GET_SMGR(bmr)->smgr_rlocator, fork).str)));
2863
2864 /*
2865 * We *must* do smgr[zero]extend before succeeding, else the page
2866 * will not be reserved by the kernel, and the next P_NEW call
2867 * will decide to return the same page. Clear the BM_VALID bit,
2868 * do StartBufferIO() and proceed.
2869 *
2870 * Loop to handle the very small possibility that someone re-sets
2871 * BM_VALID between our clearing it and StartBufferIO inspecting
2872 * it.
2873 */
2874 do
2875 {
2877 } while (!StartBufferIO(existing_hdr, true, false));
2878 }
2879 else
2880 {
2882 uint64 set_bits = 0;
2883
2885
2886 /* some sanity checks while we hold the buffer header lock */
2889
2890 victim_buf_hdr->tag = tag;
2891
2893 if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
2895
2897 set_bits, 0,
2898 0);
2899
2901
2902 /* XXX: could combine the locked operations in it with the above */
2903 StartBufferIO(victim_buf_hdr, true, false);
2904 }
2905 }
2906
2908
2909 /*
2910 * Note: if smgrzeroextend fails, we will end up with buffers that are
2911 * allocated but not marked BM_VALID. The next relation extension will
2912 * still select the same block number (because the relation didn't get any
2913 * longer on disk) and so future attempts to extend the relation will find
2914 * the same buffers (if they have not been recycled) but come right back
2915 * here to try smgrzeroextend again.
2916 *
2917 * We don't need to set checksum for all-zero pages.
2918 */
2920
2921 /*
2922 * Release the file-extension lock; it's now OK for someone else to extend
2923 * the relation some more.
2924 *
2925 * We remove IO_IN_PROGRESS after this, as waking up waiting backends can
2926 * take noticeable time.
2927 */
2928 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2930
2932 io_start, 1, extend_by * BLCKSZ);
2933
2934 /* Set BM_VALID, terminate IO, and wake up any waiters */
2935 for (uint32 i = 0; i < extend_by; i++)
2936 {
2937 Buffer buf = buffers[i];
2939 bool lock = false;
2940
2941 if (flags & EB_LOCK_FIRST && i == 0)
2942 lock = true;
2943 else if (flags & EB_LOCK_TARGET)
2944 {
2946 if (first_block + i + 1 == extend_upto)
2947 lock = true;
2948 }
2949
2950 if (lock)
2952
2953 TerminateBufferIO(buf_hdr, false, BM_VALID, true, false);
2954 }
2955
2957
2959
2960 return first_block;
2961}
#define MaxBlockNumber
Definition block.h:35
#define BM_JUST_DIRTIED
#define BufHdrGetBlock(bufHdr)
Definition bufmgr.c:73
void LimitAdditionalPins(uint32 *additional_pins)
Definition bufmgr.c:2641
bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
Definition bufmgr.c:6879
void * Block
Definition bufmgr.h:26
@ EB_LOCK_TARGET
Definition bufmgr.h:93
@ EB_CLEAR_SIZE_CACHE
Definition bufmgr.h:90
@ EB_SKIP_EXTENSION_LOCK
Definition bufmgr.h:75
@ EB_LOCK_FIRST
Definition bufmgr.h:87
static bool PageIsNew(const PageData *page)
Definition bufpage.h:233
#define MemSet(start, val, len)
Definition c.h:1023
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:474
#define ExclusiveLock
Definition lockdefs.h:42
@ IOOP_EXTEND
Definition pgstat.h:314
static unsigned hash(unsigned *uv, int n)
Definition rege_dfa.c:715
#define relpath(rlocator, forknum)
Definition relpath.h:150
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:819
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition smgr.c:649
int64 shared_blks_written
Definition instrument.h:29

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, BMR_GET_SMGR, buf, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BUFFER_LOCK_EXCLUSIVE, BufferDescriptorGetBuffer(), BufHdrGetBlock, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), CurrentResourceOwner, EB_CLEAR_SIZE_CACHE, EB_LOCK_FIRST, EB_LOCK_TARGET, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), errmsg(), ERROR, ExclusiveLock, fb(), GetBufferDescriptor(), GetVictimBuffer(), hash(), i, INIT_FORKNUM, InitBufferTag(), InvalidBlockNumber, IOContextForStrategy(), IOOBJECT_RELATION, IOOP_EXTEND, LimitAdditionalPins(), LockBuffer(), LockBufHdr(), LockRelationForExtension(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MaxBlockNumber, MemSet, PageIsNew(), pg_atomic_fetch_and_u64(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer(), relpath, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_written, smgrnblocks(), smgrzeroextend(), StartBufferIO(), str, TerminateBufferIO(), track_io_timing, UnlockBufHdrExt(), UnlockRelationForExtension(), and UnpinBuffer().

Referenced by ExtendBufferedRelCommon().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 1025 of file bufmgr.c.

1031{
1033 uint32 extended_by = 0;
1034 Buffer buffer = InvalidBuffer;
1035 Buffer buffers[64];
1036
1037 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
1038 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
1040
1041 if (bmr.relpersistence == '\0')
1042 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
1043
1044 /*
1045 * If desired, create the file if it doesn't exist. If
1046 * smgr_cached_nblocks[fork] is positive then it must exist, no need for
1047 * an smgrexists call.
1048 */
1049 if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
1050 (BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == 0 ||
1051 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == InvalidBlockNumber) &&
1053 {
1055
1056 /* recheck, fork might have been created concurrently */
1059
1061 }
1062
1063 /*
1064 * If requested, invalidate size cache, so that smgrnblocks asks the
1065 * kernel.
1066 */
1067 if (flags & EB_CLEAR_SIZE_CACHE)
1068 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
1069
1070 /*
1071 * Estimate how many pages we'll need to extend by. This avoids acquiring
1072 * unnecessarily many victim buffers.
1073 */
1075
1076 /*
1077 * Since no-one else can be looking at the page contents yet, there is no
1078 * difference between an exclusive lock and a cleanup-strength lock. Note
1079 * that we pass the original mode to ReadBuffer_common() below, when
1080 * falling back to reading the buffer to a concurrent relation extension.
1081 */
1083 flags |= EB_LOCK_TARGET;
1084
1085 while (current_size < extend_to)
1086 {
1087 uint32 num_pages = lengthof(buffers);
1089
1090 if ((uint64) current_size + num_pages > extend_to)
1091 num_pages = extend_to - current_size;
1092
1093 first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1094 num_pages, extend_to,
1095 buffers, &extended_by);
1096
1098 Assert(num_pages != 0 || current_size >= extend_to);
1099
1100 for (uint32 i = 0; i < extended_by; i++)
1101 {
1102 if (first_block + i != extend_to - 1)
1103 ReleaseBuffer(buffers[i]);
1104 else
1105 buffer = buffers[i];
1106 }
1107 }
1108
1109 /*
1110 * It's possible that another backend concurrently extended the relation.
1111 * In that case read the buffer.
1112 *
1113 * XXX: Should we control this via a flag?
1114 */
1115 if (buffer == InvalidBuffer)
1116 {
1117 Assert(extended_by == 0);
1118 buffer = ReadBuffer_common(bmr.rel, BMR_GET_SMGR(bmr), bmr.relpersistence,
1119 fork, extend_to - 1, mode, strategy);
1120 }
1121
1122 return buffer;
1123}
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:1293
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5501
@ EB_PERFORMING_RECOVERY
Definition bufmgr.h:78
@ EB_CREATE_FORK_IF_NEEDED
Definition bufmgr.h:84
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition bufmgr.h:49
@ RBM_ZERO_AND_LOCK
Definition bufmgr.h:47
#define lengthof(array)
Definition c.h:813
static int64 current_size

References Assert, BMR_GET_SMGR, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), fb(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, ReadBuffer_common(), ReleaseBuffer(), smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 4970 of file bufmgr.c.

4973{
4974 BlockNumber curBlock;
4975
4976 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4977 {
4978 uint32 bufHash; /* hash value for tag */
4979 BufferTag bufTag; /* identity of requested block */
4980 LWLock *bufPartitionLock; /* buffer partition lock for it */
4981 int buf_id;
4983
4984 /* create a tag so we can lookup the buffer */
4985 InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4986
4987 /* determine its hash code and partition lock ID */
4990
4991 /* Check that it is in the buffer pool. If not, do nothing. */
4993 buf_id = BufTableLookup(&bufTag, bufHash);
4995
4996 if (buf_id < 0)
4997 continue;
4998
4999 bufHdr = GetBufferDescriptor(buf_id);
5000
5001 /*
5002 * We need to lock the buffer header and recheck if the buffer is
5003 * still associated with the same block because the buffer could be
5004 * evicted by some other backend loading blocks for a different
5005 * relation after we release lock on the BufMapping table.
5006 */
5008
5009 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
5010 BufTagGetForkNum(&bufHdr->tag) == forkNum &&
5011 bufHdr->tag.blockNum >= firstDelBlock)
5012 InvalidateBuffer(bufHdr); /* releases spinlock */
5013 else
5015 }
5016}

References BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), fb(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 4416 of file bufmgr.c.

4418{
4420 ErrorContextCallback errcallback;
4423 char *bufToWrite;
4425
4426 /*
4427 * Try to start an I/O operation. If StartBufferIO returns false, then
4428 * someone else flushed the buffer before we could, so we need not do
4429 * anything.
4430 */
4431 if (!StartBufferIO(buf, false, false))
4432 return;
4433
4434 /* Setup error traceback support for ereport() */
4436 errcallback.arg = buf;
4437 errcallback.previous = error_context_stack;
4438 error_context_stack = &errcallback;
4439
4440 /* Find smgr relation for buffer */
4441 if (reln == NULL)
4443
4445 buf->tag.blockNum,
4446 reln->smgr_rlocator.locator.spcOid,
4447 reln->smgr_rlocator.locator.dbOid,
4448 reln->smgr_rlocator.locator.relNumber);
4449
4451
4452 /*
4453 * Run PageGetLSN while holding header lock, since we don't have the
4454 * buffer locked exclusively in all cases.
4455 */
4457
4458 /* To check if block content changes while flushing. - vadim 01/17/97 */
4460 0, BM_JUST_DIRTIED,
4461 0);
4462
4463 /*
4464 * Force XLOG flush up to buffer's LSN. This implements the basic WAL
4465 * rule that log updates must hit disk before any of the data-file changes
4466 * they describe do.
4467 *
4468 * However, this rule does not apply to unlogged relations, which will be
4469 * lost after a crash anyway. Most unlogged relation pages do not bear
4470 * LSNs since we never emit WAL records for them, and therefore flushing
4471 * up through the buffer LSN would be useless, but harmless. However,
4472 * GiST indexes use LSNs internally to track page-splits, and therefore
4473 * unlogged GiST pages bear "fake" LSNs generated by
4474 * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
4475 * LSN counter could advance past the WAL insertion point; and if it did
4476 * happen, attempting to flush WAL through that location would fail, with
4477 * disastrous system-wide consequences. To make sure that can't happen,
4478 * skip the flush if the buffer isn't permanent.
4479 */
4480 if (buf_state & BM_PERMANENT)
4482
4483 /*
4484 * Now it's safe to write the buffer to disk. Note that no one else should
4485 * have been able to write it, while we were busy with log flushing,
4486 * because we got the exclusive right to perform I/O by setting the
4487 * BM_IO_IN_PROGRESS bit.
4488 */
4490
4491 /*
4492 * Update page checksum if desired. Since we have only shared lock on the
4493 * buffer, other processes might be updating hint bits in it, so we must
4494 * copy the page to private storage if we do checksumming.
4495 */
4496 bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
4497
4499
4500 /*
4501 * bufToWrite is either the shared buffer or a copy, as appropriate.
4502 */
4504 BufTagGetForkNum(&buf->tag),
4505 buf->tag.blockNum,
4506 bufToWrite,
4507 false);
4508
4509 /*
4510 * When a strategy is in use, only flushes of dirty buffers already in the
4511 * strategy ring are counted as strategy writes (IOCONTEXT
4512 * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
4513 * statistics tracking.
4514 *
4515 * If a shared buffer initially added to the ring must be flushed before
4516 * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
4517 *
4518 * If a shared buffer which was added to the ring later because the
4519 * current strategy buffer is pinned or in use or because all strategy
4520 * buffers were dirty and rejected (for BAS_BULKREAD operations only)
4521 * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
4522 * (from_ring will be false).
4523 *
4524 * When a strategy is not in use, the write can only be a "regular" write
4525 * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
4526 */
4529
4531
4532 /*
4533 * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
4534 * end the BM_IO_IN_PROGRESS state.
4535 */
4536 TerminateBufferIO(buf, true, 0, true, false);
4537
4539 buf->tag.blockNum,
4540 reln->smgr_rlocator.locator.spcOid,
4541 reln->smgr_rlocator.locator.dbOid,
4542 reln->smgr_rlocator.locator.relNumber);
4543
4544 /* Pop the error context stack */
4545 error_context_stack = errcallback.previous;
4546}
#define BufferGetLSN(bufHdr)
Definition bufmgr.c:74
static void shared_buffer_write_error_callback(void *arg)
Definition bufmgr.c:7038
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition bufpage.c:1509
ErrorContextCallback * error_context_stack
Definition elog.c:95
@ IOOP_WRITE
Definition pgstat.h:316
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition smgr.h:131
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2784

References ErrorContextCallback::arg, BM_JUST_DIRTIED, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, error_context_stack, fb(), INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITE, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), ErrorContextCallback::previous, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), smgropen(), smgrwrite(), StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdrExt(), and XLogFlush().

Referenced by FlushOneBuffer(), FlushUnlockedBuffer(), and GetVictimBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 5441 of file bufmgr.c.

5442{
5443 int i;
5445
5446 for (i = 0; i < NBuffers; i++)
5447 {
5449
5451
5452 /*
5453 * As in DropRelationBuffers, an unlocked precheck should be safe and
5454 * saves some cycles.
5455 */
5456 if (bufHdr->tag.dbOid != dbid)
5457 continue;
5458
5459 /* Make sure we can handle the pin */
5462
5464 if (bufHdr->tag.dbOid == dbid &&
5466 {
5470 }
5471 else
5473 }
5474}

References BM_DIRTY, BM_VALID, CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 5481 of file bufmgr.c.

5482{
5484
5485 /* currently not needed, but no fundamental reason not to support */
5486 Assert(!BufferIsLocal(buffer));
5487
5488 Assert(BufferIsPinned(buffer));
5489
5490 bufHdr = GetBufferDescriptor(buffer - 1);
5491
5492 Assert(BufferIsLockedByMe(buffer));
5493
5495}
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition bufmgr.c:4416
bool BufferIsLockedByMe(Buffer buffer)
Definition bufmgr.c:2971

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsLockedByMe(), BufferIsPinned, fb(), FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), invalidate_rel_block(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 5077 of file bufmgr.c.

5078{
5079 int i;
5081 SMgrRelation srel = RelationGetSmgr(rel);
5082
5083 if (RelationUsesLocalBuffers(rel))
5084 {
5085 for (i = 0; i < NLocBuffer; i++)
5086 {
5088
5090 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5091 ((buf_state = pg_atomic_read_u64(&bufHdr->state)) &
5092 (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5093 {
5094 ErrorContextCallback errcallback;
5095
5096 /* Setup error traceback support for ereport() */
5098 errcallback.arg = bufHdr;
5099 errcallback.previous = error_context_stack;
5100 error_context_stack = &errcallback;
5101
5102 /* Make sure we can handle the pin */
5105
5106 /*
5107 * Pin/unpin mostly to make valgrind work, but it also seems
5108 * like the right thing to do.
5109 */
5110 PinLocalBuffer(bufHdr, false);
5111
5112
5113 FlushLocalBuffer(bufHdr, srel);
5114
5116
5117 /* Pop the error context stack */
5118 error_context_stack = errcallback.previous;
5119 }
5120 }
5121
5122 return;
5123 }
5124
5125 for (i = 0; i < NBuffers; i++)
5126 {
5128
5130
5131 /*
5132 * As in DropRelationBuffers, an unlocked precheck should be safe and
5133 * saves some cycles.
5134 */
5136 continue;
5137
5138 /* Make sure we can handle the pin */
5141
5143 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5145 {
5149 }
5150 else
5152 }
5153}
static void local_buffer_write_error_callback(void *arg)
Definition bufmgr.c:7054
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
Definition localbuf.c:183
void UnpinLocalBuffer(Buffer buffer)
Definition localbuf.c:841
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition localbuf.c:805
int NLocBuffer
Definition localbuf.c:45
static SMgrRelation RelationGetSmgr(Relation rel)
Definition rel.h:576

References ErrorContextCallback::arg, BM_DIRTY, BM_VALID, BufferDescriptorGetBuffer(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, fb(), FlushLocalBuffer(), FlushUnlockedBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, local_buffer_write_error_callback(), LockBufHdr(), NBuffers, NLocBuffer, pg_atomic_read_u64(), PinBuffer_Locked(), PinLocalBuffer(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 5165 of file bufmgr.c.

5166{
5167 int i;
5169 bool use_bsearch;
5170
5171 if (nrels == 0)
5172 return;
5173
5174 /* fill-in array for qsort */
5176
5177 for (i = 0; i < nrels; i++)
5178 {
5179 Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
5180
5181 srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
5182 srels[i].srel = smgrs[i];
5183 }
5184
5185 /*
5186 * Save the bsearch overhead for low number of relations to sync. See
5187 * DropRelationsAllBuffers for details.
5188 */
5190
5191 /* sort the list of SMgrRelations if necessary */
5192 if (use_bsearch)
5193 qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
5194
5195 for (i = 0; i < NBuffers; i++)
5196 {
5200
5201 /*
5202 * As in DropRelationBuffers, an unlocked precheck should be safe and
5203 * saves some cycles.
5204 */
5205
5206 if (!use_bsearch)
5207 {
5208 int j;
5209
5210 for (j = 0; j < nrels; j++)
5211 {
5212 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
5213 {
5214 srelent = &srels[j];
5215 break;
5216 }
5217 }
5218 }
5219 else
5220 {
5221 RelFileLocator rlocator;
5222
5223 rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
5224 srelent = bsearch(&rlocator,
5225 srels, nrels, sizeof(SMgrSortArray),
5227 }
5228
5229 /* buffer doesn't belong to any of the given relfilelocators; skip it */
5230 if (srelent == NULL)
5231 continue;
5232
5233 /* Make sure we can handle the pin */
5236
5238 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
5240 {
5244 }
5245 else
5247 }
5248
5249 pfree(srels);
5250}

References Assert, BM_DIRTY, BM_VALID, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, LockBufHdr(), NBuffers, palloc_array, pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), rlocator_comparator(), UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ FlushUnlockedBuffer()

static void FlushUnlockedBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 4553 of file bufmgr.c.

4555{
4557
4560 BufferLockUnlock(buffer, buf);
4561}
static void BufferLockAcquire(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5755
static void BufferLockUnlock(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:5871

References buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_SHARE, BufferDescriptorGetBuffer(), BufferLockAcquire(), BufferLockUnlock(), fb(), FlushBuffer(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by EvictUnpinnedBufferInternal(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 551 of file bufmgr.c.

552{
553 Assert(ref->data.refcount == 0);
554 Assert(ref->data.lockmode == BUFFER_LOCK_UNLOCK);
555
556 if (ref >= &PrivateRefCountArray[0] &&
558 {
559 ref->buffer = InvalidBuffer;
561
562
563 /*
564 * Mark the just used entry as reserved - in many scenarios that
565 * allows us to avoid ever having to search the array/hash for free
566 * entries.
567 */
569 }
570 else
571 {
572 bool found;
573 Buffer buffer = ref->buffer;
574
576 Assert(found);
579 }
580}
static int ReservedRefCountSlot
Definition bufmgr.c:252
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_REMOVE
Definition hsearch.h:115

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, fb(), HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountSlot.

Referenced by UnpinBufferNoOwner().

◆ GetAdditionalPinLimit()

uint32 GetAdditionalPinLimit ( void  )

Definition at line 2615 of file bufmgr.c.

2616{
2618
2619 /*
2620 * We get the number of "overflowed" pins for free, but don't know the
2621 * number of pins in PrivateRefCountArray. The cost of calculating that
2622 * exactly doesn't seem worth it, so just assume the max.
2623 */
2625
2626 /* Is this backend already holding more than its fair share? */
2628 return 0;
2629
2631}
static uint32 MaxProportionalPins
Definition bufmgr.c:255

References fb(), MaxProportionalPins, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by LimitAdditionalPins(), and read_stream_start_pending_read().

◆ GetPinLimit()

uint32 GetPinLimit ( void  )

Definition at line 2603 of file bufmgr.c.

2604{
2605 return MaxProportionalPins;
2606}

References MaxProportionalPins.

Referenced by GetAccessStrategy(), and read_stream_begin_impl().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 528 of file bufmgr.c.

529{
531
532 Assert(BufferIsValid(buffer));
533 Assert(!BufferIsLocal(buffer));
534
535 /*
536 * Not moving the entry - that's ok for the current users, but we might
537 * want to change this one day.
538 */
539 ref = GetPrivateRefCountEntry(buffer, false);
540
541 if (ref == NULL)
542 return 0;
543 return ref->data.refcount;
544}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), fb(), and GetPrivateRefCountEntry().

Referenced by CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), and MarkBufferDirtyHint().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
inlinestatic

Definition at line 493 of file bufmgr.c.

494{
495 Assert(BufferIsValid(buffer));
496 Assert(!BufferIsLocal(buffer));
497
498 /*
499 * It's very common to look up the same buffer repeatedly. To make that
500 * fast, we have a one-entry cache.
501 *
502 * In contrast to the loop in GetPrivateRefCountEntrySlow(), here it
503 * faster to check PrivateRefCountArray[].buffer, as in the case of a hit
504 * fewer addresses are computed and fewer cachelines are accessed. Whereas
505 * in GetPrivateRefCountEntrySlow()'s case, checking
506 * PrivateRefCountArrayKeys saves a lot of memory accesses.
507 */
508 if (likely(PrivateRefCountEntryLast != -1) &&
510 {
512 }
513
514 /*
515 * The code for the cached lookup is small enough to be worth inlining
516 * into the caller. In the miss case however, that empirically doesn't
517 * seem worth it.
518 */
519 return GetPrivateRefCountEntrySlow(buffer, do_move);
520}
static pg_noinline PrivateRefCountEntry * GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move)
Definition bufmgr.c:404
static int PrivateRefCountEntryLast
Definition bufmgr.c:253

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), fb(), GetPrivateRefCountEntrySlow(), likely, PrivateRefCountArray, and PrivateRefCountEntryLast.

Referenced by BufferLockAcquire(), BufferLockConditional(), BufferLockDisownInternal(), BufferLockHeldByMe(), BufferLockHeldByMeInMode(), GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), ResOwnerReleaseBuffer(), and UnpinBufferNoOwner().

◆ GetPrivateRefCountEntrySlow()

static pg_noinline PrivateRefCountEntry * GetPrivateRefCountEntrySlow ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 404 of file bufmgr.c.

405{
407 int match = -1;
408 int i;
409
410 /*
411 * First search for references in the array, that'll be sufficient in the
412 * majority of cases.
413 */
414 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
415 {
416 if (PrivateRefCountArrayKeys[i] == buffer)
417 {
418 match = i;
419 /* see ReservePrivateRefCountEntry() for why we don't return */
420 }
421 }
422
423 if (likely(match != -1))
424 {
425 /* update cache for the next lookup */
427
428 return &PrivateRefCountArray[match];
429 }
430
431 /*
432 * By here we know that the buffer, if already pinned, isn't residing in
433 * the array.
434 *
435 * Only look up the buffer in the hashtable if we've previously overflowed
436 * into it.
437 */
439 return NULL;
440
442
443 if (res == NULL)
444 return NULL;
445 else if (!do_move)
446 {
447 /* caller doesn't want us to move the hash entry into the array */
448 return res;
449 }
450 else
451 {
452 /* move buffer from hashtable into the free array slot */
453 bool found;
455
456 /* Ensure there's a free array slot */
458
459 /* Use up the reserved slot */
463 Assert(free->buffer == InvalidBuffer);
464
465 /* and fill it */
466 free->buffer = buffer;
467 free->data = res->data;
469 /* update cache for the next lookup */
471
473
474
475 /* delete from hashtable */
477 Assert(found);
480
481 return free;
482 }
483}
@ HASH_FIND
Definition hsearch.h:113
#define free(a)

References Assert, PrivateRefCountEntry::buffer, PrivateRefCountEntry::data, fb(), free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, likely, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountEntryLast, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountSlot, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCountEntry().

◆ GetVictimBuffer()

static Buffer GetVictimBuffer ( BufferAccessStrategy  strategy,
IOContext  io_context 
)
static

Definition at line 2451 of file bufmgr.c.

2452{
2454 Buffer buf;
2456 bool from_ring;
2457
2458 /*
2459 * Ensure, before we pin a victim buffer, that there's a free refcount
2460 * entry and resource owner slot for the pin.
2461 */
2464
2465 /* we return here if a prospective victim buffer gets used concurrently */
2466again:
2467
2468 /*
2469 * Select a victim buffer. The buffer is returned pinned and owned by
2470 * this backend.
2471 */
2474
2475 /*
2476 * We shouldn't have any other pins for this buffer.
2477 */
2479
2480 /*
2481 * If the buffer was dirty, try to write it out. There is a race
2482 * condition here, in that someone might dirty it after we released the
2483 * buffer header lock above, or even while we are writing it out (since
2484 * our share-lock won't prevent hint-bit updates). We will recheck the
2485 * dirty bit after re-locking the buffer header.
2486 */
2487 if (buf_state & BM_DIRTY)
2488 {
2491
2492 /*
2493 * We need a share-lock on the buffer contents to write it out (else
2494 * we might write invalid data, eg because someone else is compacting
2495 * the page contents while we write). We must use a conditional lock
2496 * acquisition here to avoid deadlock. Even though the buffer was not
2497 * pinned (and therefore surely not locked) when StrategyGetBuffer
2498 * returned it, someone else could have pinned and exclusive-locked it
2499 * by the time we get here. If we try to get the lock unconditionally,
2500 * we'd block waiting for them; if they later block waiting for us,
2501 * deadlock ensues. (This has been observed to happen when two
2502 * backends are both trying to split btree index pages, and the second
2503 * one just happens to be trying to split the page the first one got
2504 * from StrategyGetBuffer.)
2505 */
2507 {
2508 /*
2509 * Someone else has locked the buffer, so give it up and loop back
2510 * to get another one.
2511 */
2513 goto again;
2514 }
2515
2516 /*
2517 * If using a nondefault strategy, and writing the buffer would
2518 * require a WAL flush, let the strategy decide whether to go ahead
2519 * and write/reuse the buffer or to choose another victim. We need a
2520 * lock to inspect the page LSN, so this can't be done inside
2521 * StrategyGetBuffer.
2522 */
2523 if (strategy != NULL)
2524 {
2525 XLogRecPtr lsn;
2526
2527 /* Read the LSN while holding buffer header lock */
2529 lsn = BufferGetLSN(buf_hdr);
2531
2532 if (XLogNeedsFlush(lsn)
2533 && StrategyRejectBuffer(strategy, buf_hdr, from_ring))
2534 {
2537 goto again;
2538 }
2539 }
2540
2541 /* OK, do the I/O */
2544
2546 &buf_hdr->tag);
2547 }
2548
2549
2550 if (buf_state & BM_VALID)
2551 {
2552 /*
2553 * When a BufferAccessStrategy is in use, blocks evicted from shared
2554 * buffers are counted as IOOP_EVICT in the corresponding context
2555 * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
2556 * strategy in two cases: 1) while initially claiming buffers for the
2557 * strategy ring 2) to replace an existing strategy ring buffer
2558 * because it is pinned or in use and cannot be reused.
2559 *
2560 * Blocks evicted from buffers already in the strategy ring are
2561 * counted as IOOP_REUSE in the corresponding strategy context.
2562 *
2563 * At this point, we can accurately count evictions and reuses,
2564 * because we have successfully claimed the valid buffer. Previously,
2565 * we may have been forced to release the buffer due to concurrent
2566 * pinners or erroring out.
2567 */
2569 from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0);
2570 }
2571
2572 /*
2573 * If the buffer has an entry in the buffer mapping table, delete it. This
2574 * can fail because another backend could have pinned or dirtied the
2575 * buffer.
2576 */
2578 {
2580 goto again;
2581 }
2582
2583 /* a final set of sanity checks */
2584#ifdef USE_ASSERT_CHECKING
2586
2589
2591#endif
2592
2593 return buf;
2594}
WritebackContext BackendWritebackContext
Definition buf_init.c:25
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition bufmgr.c:6484
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
Definition bufmgr.c:7269
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint64 *buf_state, bool *from_ring)
Definition freelist.c:174
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
Definition freelist.c:787
@ IOOP_EVICT
Definition pgstat.h:307
@ IOOP_REUSE
Definition pgstat.h:310
bool XLogNeedsFlush(XLogRecPtr record)
Definition xlog.c:3146

References Assert, BackendWritebackContext, BM_DIRTY, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetBuffer(), BufferGetLSN, BufferLockConditional(), CheckBufferIsPinnedOnce(), CurrentResourceOwner, fb(), FlushBuffer(), InvalidateVictimBuffer(), IOOBJECT_RELATION, IOOP_EVICT, IOOP_REUSE, LockBuffer(), LockBufHdr(), pg_atomic_read_u64(), pgstat_count_io_op(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), StrategyGetBuffer(), StrategyRejectBuffer(), UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by BufferAlloc(), and ExtendBufferedRelShared().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 6664 of file bufmgr.c.

6665{
6667
6668 /*
6669 * If we get woken slowly then it's possible that the Startup process was
6670 * already woken by other backends before we got here. Also possible that
6671 * we get here by multiple interrupts or interrupts at inappropriate
6672 * times, so make sure we do nothing if the bufid is not set.
6673 */
6674 if (bufid < 0)
6675 return false;
6676
6677 if (GetPrivateRefCount(bufid + 1) > 0)
6678 return true;
6679
6680 return false;
6681}
int GetStartupBufferPinWaitBufId(void)
Definition proc.c:771

References fb(), GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 5533 of file bufmgr.c.

5534{
5535 Assert(BufferIsPinned(buffer));
5537 if (BufferIsLocal(buffer))
5538 LocalRefCount[-buffer - 1]++;
5539 else
5540 {
5542
5543 ref = GetPrivateRefCountEntry(buffer, true);
5544 Assert(ref != NULL);
5545 ref->data.refcount++;
5546 }
5548}
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, fb(), GetPrivateRefCountEntry(), LocalRefCount, ResourceOwnerEnlarge(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), RelationAddBlocks(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferManagerAccess()

void InitBufferManagerAccess ( void  )

Definition at line 4120 of file bufmgr.c.

4121{
4123
4124 /*
4125 * An advisory limit on the number of pins each backend should hold, based
4126 * on shared_buffers and the maximum number of connections possible.
4127 * That's very pessimistic, but outside toy-sized shared_buffers it should
4128 * allow plenty of pins. LimitAdditionalPins() and
4129 * GetAdditionalPinLimit() can be used to check the remaining balance.
4130 */
4132
4135
4136 hash_ctl.keysize = sizeof(Buffer);
4137 hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
4138
4139 PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
4141
4142 /*
4143 * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
4144 * the corresponding phase of backend shutdown.
4145 */
4146 Assert(MyProc != NULL);
4148}
static void AtProcExit_Buffers(int code, Datum arg)
Definition bufmgr.c:4155
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
int MaxBackends
Definition globals.c:146
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
#define NUM_AUXILIARY_PROCS
Definition proc.h:469

References Assert, AtProcExit_Buffers(), fb(), HASH_BLOBS, hash_create(), HASH_ELEM, MaxBackends, MaxProportionalPins, MyProc, NBuffers, NUM_AUXILIARY_PROCS, on_shmem_exit(), PrivateRefCountArray, PrivateRefCountArrayKeys, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 2273 of file bufmgr.c.

2274{
2276 uint32 oldHash; /* hash value for oldTag */
2277 LWLock *oldPartitionLock; /* buffer partition lock for it */
2280
2281 /* Save the original buffer tag before dropping the spinlock */
2282 oldTag = buf->tag;
2283
2285
2286 /*
2287 * Need to compute the old tag's hashcode and partition lock ID. XXX is it
2288 * worth storing the hashcode in BufferDesc so we need not recompute it
2289 * here? Probably not.
2290 */
2293
2294retry:
2295
2296 /*
2297 * Acquire exclusive mapping lock in preparation for changing the buffer's
2298 * association.
2299 */
2301
2302 /* Re-lock the buffer header */
2304
2305 /* If it's changed while we were waiting for lock, do nothing */
2306 if (!BufferTagsEqual(&buf->tag, &oldTag))
2307 {
2310 return;
2311 }
2312
2313 /*
2314 * We assume the reason for it to be pinned is that either we were
2315 * asynchronously reading the page in before erroring out or someone else
2316 * is flushing the page out. Wait for the IO to finish. (This could be
2317 * an infinite loop if the refcount is messed up... it would be nice to
2318 * time out after awhile, but there seems no way to be sure how many loops
2319 * may be needed. Note that if the other guy has pinned the buffer but
2320 * not yet done StartBufferIO, WaitIO will fall through and we'll
2321 * effectively be busy-looping here.)
2322 */
2324 {
2327 /* safety check: should definitely not be our *own* pin */
2329 elog(ERROR, "buffer is pinned in InvalidateBuffer");
2330 WaitIO(buf);
2331 goto retry;
2332 }
2333
2334 /*
2335 * An invalidated buffer should not have any backends waiting to lock the
2336 * buffer, therefore BM_LOCK_WAKE_IN_PROGRESS should not be set.
2337 */
2339
2340 /*
2341 * Clear out the buffer's tag and flags. We must do this to ensure that
2342 * linear scans of the buffer array don't think the buffer is valid.
2343 */
2345 ClearBufferTag(&buf->tag);
2346
2348 0,
2350 0);
2351
2352 /*
2353 * Remove the buffer from the lookup hashtable, if it was in there.
2354 */
2355 if (oldFlags & BM_TAG_VALID)
2357
2358 /*
2359 * Done with mapping lock.
2360 */
2362}
#define BUF_USAGECOUNT_MASK
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static void ClearBufferTag(BufferTag *tag)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition buf_table.c:148
static void WaitIO(BufferDesc *buf)
Definition bufmgr.c:6800

References Assert, BM_LOCK_WAKE_IN_PROGRESS, BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog, ERROR, fb(), GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), UnlockBufHdr(), UnlockBufHdrExt(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ InvalidateVictimBuffer()

static bool InvalidateVictimBuffer ( BufferDesc buf_hdr)
static

Definition at line 2374 of file bufmgr.c.

2375{
2377 uint32 hash;
2379 BufferTag tag;
2380
2382
2383 /* have buffer pinned, so it's safe to read tag without lock */
2384 tag = buf_hdr->tag;
2385
2386 hash = BufTableHashCode(&tag);
2388
2390
2391 /* lock the buffer header */
2393
2394 /*
2395 * We have the buffer pinned nobody else should have been able to unset
2396 * this concurrently.
2397 */
2400 Assert(BufferTagsEqual(&buf_hdr->tag, &tag));
2401
2402 /*
2403 * If somebody else pinned the buffer since, or even worse, dirtied it,
2404 * give up on this buffer: It's clearly in use.
2405 */
2407 {
2409
2412
2413 return false;
2414 }
2415
2416 /*
2417 * An invalidated buffer should not have any backends waiting to lock the
2418 * buffer, therefore BM_LOCK_WAKE_IN_PROGRESS should not be set.
2419 */
2421
2422 /*
2423 * Clear out the buffer's tag and flags and usagecount. This is not
2424 * strictly required, as BM_TAG_VALID/BM_VALID needs to be checked before
2425 * doing anything with the buffer. But currently it's beneficial, as the
2426 * cheaper pre-check for several linear scans of shared buffers use the
2427 * tag (see e.g. FlushDatabaseBuffers()).
2428 */
2429 ClearBufferTag(&buf_hdr->tag);
2431 0,
2433 0);
2434
2436
2437 /* finally delete buffer from the buffer mapping table */
2438 BufTableDelete(&tag, hash);
2439
2441
2446
2447 return true;
2448}

References Assert, BM_DIRTY, BM_LOCK_WAKE_IN_PROGRESS, BM_TAG_VALID, BM_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), fb(), GetPrivateRefCount(), hash(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u64(), UnlockBufHdr(), and UnlockBufHdrExt().

Referenced by EvictUnpinnedBufferInternal(), and GetVictimBuffer().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 6748 of file bufmgr.c.

6749{
6752
6753 Assert(BufferIsValid(buffer));
6754
6755 /* see AIO related comment in LockBufferForCleanup() */
6756
6757 if (BufferIsLocal(buffer))
6758 {
6759 /* There should be exactly one pin */
6760 if (LocalRefCount[-buffer - 1] != 1)
6761 return false;
6762 /* Nobody else to wait for */
6763 return true;
6764 }
6765
6766 /* There should be exactly one local pin */
6767 if (GetPrivateRefCount(buffer) != 1)
6768 return false;
6769
6770 bufHdr = GetBufferDescriptor(buffer - 1);
6771
6772 /* caller must hold exclusive lock on buffer */
6774
6776
6779 {
6780 /* pincount is OK. */
6782 return true;
6783 }
6784
6786 return false;
6787}

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsValid(), fb(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext wb_context,
IOContext  io_context 
)

Definition at line 7319 of file bufmgr.c.

7320{
7322 int i;
7323
7324 if (wb_context->nr_pending == 0)
7325 return;
7326
7327 /*
7328 * Executing the writes in-order can make them a lot faster, and allows to
7329 * merge writeback requests to consecutive blocks into larger writebacks.
7330 */
7331 sort_pending_writebacks(wb_context->pending_writebacks,
7332 wb_context->nr_pending);
7333
7335
7336 /*
7337 * Coalesce neighbouring writes, but nothing else. For that we iterate
7338 * through the, now sorted, array of pending flushes, and look forward to
7339 * find all neighbouring (or identical) writes.
7340 */
7341 for (i = 0; i < wb_context->nr_pending; i++)
7342 {
7346 int ahead;
7347 BufferTag tag;
7349 Size nblocks = 1;
7350
7351 cur = &wb_context->pending_writebacks[i];
7352 tag = cur->tag;
7354
7355 /*
7356 * Peek ahead, into following writeback requests, to see if they can
7357 * be combined with the current one.
7358 */
7359 for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
7360 {
7361
7362 next = &wb_context->pending_writebacks[i + ahead + 1];
7363
7364 /* different file, stop */
7366 BufTagGetRelFileLocator(&next->tag)) ||
7367 BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
7368 break;
7369
7370 /* ok, block queued twice, skip */
7371 if (cur->tag.blockNum == next->tag.blockNum)
7372 continue;
7373
7374 /* only merge consecutive writes */
7375 if (cur->tag.blockNum + 1 != next->tag.blockNum)
7376 break;
7377
7378 nblocks++;
7379 cur = next;
7380 }
7381
7382 i += ahead;
7383
7384 /* and finally tell the kernel to write the data to storage */
7386 smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
7387 }
7388
7389 /*
7390 * Assume that writeback requests are only issued for buffers containing
7391 * blocks of permanent relations.
7392 */
7394 IOOP_WRITEBACK, io_start, wb_context->nr_pending, 0);
7395
7396 wb_context->nr_pending = 0;
7397}
static int32 next
Definition blutils.c:225
struct cursor * cur
Definition ecpg.c:29
@ IOOP_WRITEBACK
Definition pgstat.h:311
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition smgr.c:805

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, fb(), i, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITEBACK, next, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RelFileLocatorEquals, smgropen(), smgrwriteback(), and track_io_timing.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2641 of file bufmgr.c.

2642{
2643 uint32 limit;
2644
2645 if (*additional_pins <= 1)
2646 return;
2647
2648 limit = GetAdditionalPinLimit();
2649 limit = Max(limit, 1);
2650 if (limit < *additional_pins)
2651 *additional_pins = limit;
2652}
uint32 GetAdditionalPinLimit(void)
Definition bufmgr.c:2615
#define Max(x, y)
Definition c.h:1001

References fb(), GetAdditionalPinLimit(), and Max.

Referenced by ExtendBufferedRelShared().

◆ local_buffer_readv_complete()

static PgAioResult local_buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 8503 of file bufmgr.c.

8505{
8507}
static pg_attribute_always_inline PgAioResult buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
Definition bufmgr.c:8248

References buffer_readv_complete(), and fb().

◆ local_buffer_readv_stage()

static void local_buffer_readv_stage ( PgAioHandle ioh,
uint8  cb_data 
)
static

Definition at line 8497 of file bufmgr.c.

8498{
8499 buffer_stage_common(ioh, false, true);
8500}
static pg_attribute_always_inline void buffer_stage_common(PgAioHandle *ioh, bool is_write, bool is_temp)
Definition bufmgr.c:7860

References buffer_stage_common(), and fb().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void arg)
static

Definition at line 7054 of file bufmgr.c.

7055{
7057
7058 if (bufHdr != NULL)
7059 errcontext("writing block %u of relation \"%s\"",
7060 bufHdr->tag.blockNum,
7063 BufTagGetForkNum(&bufHdr->tag)).str);
7064}
#define errcontext
Definition elog.h:198
void * arg

References arg, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, fb(), MyProcNumber, and relpathbackend.

Referenced by FlushRelationBuffers().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 6517 of file bufmgr.c.

6518{
6520 TimestampTz waitStart = 0;
6521 bool waiting = false;
6522 bool logged_recovery_conflict = false;
6523
6524 Assert(BufferIsPinned(buffer));
6526
6528
6529 /*
6530 * We do not yet need to be worried about in-progress AIOs holding a pin,
6531 * as we, so far, only support doing reads via AIO and this function can
6532 * only be called once the buffer is valid (i.e. no read can be in
6533 * flight).
6534 */
6535
6536 /* Nobody else to wait for */
6537 if (BufferIsLocal(buffer))
6538 return;
6539
6540 bufHdr = GetBufferDescriptor(buffer - 1);
6541
6542 for (;;)
6543 {
6545 uint64 unset_bits = 0;
6546
6547 /* Try to acquire lock */
6550
6553 {
6554 /* Successfully acquired exclusive lock with pincount 1 */
6556
6557 /*
6558 * Emit the log message if recovery conflict on buffer pin was
6559 * resolved but the startup process waited longer than
6560 * deadlock_timeout for it.
6561 */
6564 waitStart, GetCurrentTimestamp(),
6565 NULL, false);
6566
6567 if (waiting)
6568 {
6569 /* reset ps display to remove the suffix if we added one */
6571 waiting = false;
6572 }
6573 return;
6574 }
6575 /* Failed, so mark myself as waiting for pincount 1 */
6577 {
6580 elog(ERROR, "multiple backends attempting to wait for pincount 1");
6581 }
6582 bufHdr->wait_backend_pgprocno = MyProcNumber;
6586 0);
6588
6589 /* Wait to be signaled by UnpinBuffer() */
6590 if (InHotStandby)
6591 {
6592 if (!waiting)
6593 {
6594 /* adjust the process title to indicate that it's waiting */
6595 set_ps_display_suffix("waiting");
6596 waiting = true;
6597 }
6598
6599 /*
6600 * Emit the log message if the startup process is waiting longer
6601 * than deadlock_timeout for recovery conflict on buffer pin.
6602 *
6603 * Skip this if first time through because the startup process has
6604 * not started waiting yet in this case. So, the wait start
6605 * timestamp is set after this logic.
6606 */
6607 if (waitStart != 0 && !logged_recovery_conflict)
6608 {
6610
6611 if (TimestampDifferenceExceeds(waitStart, now,
6613 {
6615 waitStart, now, NULL, true);
6617 }
6618 }
6619
6620 /*
6621 * Set the wait start timestamp if logging is enabled and first
6622 * time through.
6623 */
6624 if (log_recovery_conflict_waits && waitStart == 0)
6625 waitStart = GetCurrentTimestamp();
6626
6627 /* Publish the bufid that Startup process waits on */
6628 SetStartupBufferPinWaitBufId(buffer - 1);
6629 /* Set alarm and then wait to be signaled by UnpinBuffer() */
6631 /* Reset the published bufid */
6633 }
6634 else
6636
6637 /*
6638 * Remove flag marking us as waiter. Normally this will not be set
6639 * anymore, but ProcWaitForSignal() can return for other signals as
6640 * well. We take care to only reset the flag if we're the waiter, as
6641 * theoretically another backend could have started waiting. That's
6642 * impossible with the current usages due to table level locking, but
6643 * better be safe.
6644 */
6646 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
6647 bufHdr->wait_backend_pgprocno == MyProcNumber)
6649
6651 0, unset_bits,
6652 0);
6653
6655 /* Loop back and try again */
6656 }
6657}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1609
#define BM_PIN_COUNT_WAITER
static BufferDesc * PinCountWaitBuf
Definition bufmgr.c:212
int64 TimestampTz
Definition timestamp.h:39
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition ps_status.c:387
int DeadlockTimeout
Definition proc.c:58
void SetStartupBufferPinWaitBufId(int bufid)
Definition proc.c:759
void ProcWaitForSignal(uint32 wait_event_info)
Definition proc.c:1980
void ResolveRecoveryConflictWithBufferPin(void)
Definition standby.c:793
bool log_recovery_conflict_waits
Definition standby.c:42
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition standby.c:274
static volatile sig_atomic_t waiting
#define InHotStandby
Definition xlogutils.h:60

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, fb(), GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), UnlockBufHdrExt(), and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroAndLockBuffer().

◆ LockBufferInternal()

void LockBufferInternal ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 6421 of file bufmgr.c.

6422{
6424
6425 /*
6426 * We can't wait if we haven't got a PGPROC. This should only occur
6427 * during bootstrap or shared memory initialization. Put an Assert here
6428 * to catch unsafe coding practices.
6429 */
6431
6432 /* handled in LockBuffer() wrapper */
6434
6435 Assert(BufferIsPinned(buffer));
6436 if (BufferIsLocal(buffer))
6437 return; /* local buffers need no lock */
6438
6439 buf_hdr = GetBufferDescriptor(buffer - 1);
6440
6441 /*
6442 * Test the most frequent lock modes first. While a switch (mode) would be
6443 * nice, at least gcc generates considerably worse code for it.
6444 *
6445 * Call BufferLockAcquire() with a constant argument for mode, to generate
6446 * more efficient code for the different lock modes.
6447 */
6448 if (mode == BUFFER_LOCK_SHARE)
6450 else if (mode == BUFFER_LOCK_EXCLUSIVE)
6454 else
6455 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
6456}
bool IsUnderPostmaster
Definition globals.c:120

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, BufferLockAcquire(), elog, ERROR, fb(), GetBufferDescriptor(), IsUnderPostmaster, mode, and MyProc.

Referenced by LockBuffer().

◆ LockBufHdr()

uint64 LockBufHdr ( BufferDesc desc)

Definition at line 7097 of file bufmgr.c.

7098{
7100
7102
7103 while (true)
7104 {
7105 /*
7106 * Always try once to acquire the lock directly, without setting up
7107 * the spin-delay infrastructure. The work necessary for that shows up
7108 * in profiles and is rarely necessary.
7109 */
7111 if (likely(!(old_buf_state & BM_LOCKED)))
7112 break; /* got lock */
7113
7114 /* and then spin without atomic operations until lock is released */
7115 {
7117
7119
7120 while (old_buf_state & BM_LOCKED)
7121 {
7124 }
7126 }
7127
7128 /*
7129 * Retry. The lock might obviously already be re-acquired by the time
7130 * we're attempting to get it again.
7131 */
7132 }
7133
7134 return old_buf_state | BM_LOCKED;
7135}
void perform_spin_delay(SpinDelayStatus *status)
Definition s_lock.c:126
void finish_spin_delay(SpinDelayStatus *status)
Definition s_lock.c:186
#define init_local_spin_delay(status)
Definition s_lock.h:753

References Assert, BM_LOCKED, BufferDescriptorGetBuffer(), BufferIsLocal, fb(), finish_spin_delay(), init_local_spin_delay, likely, perform_spin_delay(), pg_atomic_fetch_or_u64(), pg_atomic_read_u64(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), buffer_stage_common(), BufferAlloc(), BufferGetLSNAtomic(), BufferLockDequeueSelf(), BufferLockQueueSelf(), BufferLockWakeup(), BufferSync(), ConditionalLockBufferForCleanup(), create_toy_buffer(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), MarkDirtyUnpinnedBuffer(), pg_buffercache_os_pages_internal(), pg_buffercache_pages(), StartBufferIO(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), WaitIO(), and WakePinCountWaiter().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 3056 of file bufmgr.c.

3057{
3061
3062 if (!BufferIsValid(buffer))
3063 elog(ERROR, "bad buffer ID: %d", buffer);
3064
3065 if (BufferIsLocal(buffer))
3066 {
3067 MarkLocalBufferDirty(buffer);
3068 return;
3069 }
3070
3071 bufHdr = GetBufferDescriptor(buffer - 1);
3072
3073 Assert(BufferIsPinned(buffer));
3075
3076 /*
3077 * NB: We have to wait for the buffer header spinlock to be not held, as
3078 * TerminateBufferIO() relies on the spinlock.
3079 */
3081 for (;;)
3082 {
3085
3087
3090
3092 buf_state))
3093 break;
3094 }
3095
3096 /*
3097 * If the buffer was not dirty already, do vacuum accounting.
3098 */
3099 if (!(old_buf_state & BM_DIRTY))
3100 {
3102 if (VacuumCostActive)
3104 }
3105}
pg_noinline uint64 WaitBufHdrUnlocked(BufferDesc *buf)
Definition bufmgr.c:7145
int VacuumCostPageDirty
Definition globals.c:153
void MarkLocalBufferDirty(Buffer buffer)
Definition localbuf.c:491
int64 shared_blks_dirtied
Definition instrument.h:28

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, BufferIsValid(), elog, ERROR, fb(), GetBufferDescriptor(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), pgBufferUsage, BufferUsage::shared_blks_dirtied, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), MarkDirtyUnpinnedBufferInternal(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 5565 of file bufmgr.c.

5566{
5568 Page page = BufferGetPage(buffer);
5569
5570 if (!BufferIsValid(buffer))
5571 elog(ERROR, "bad buffer ID: %d", buffer);
5572
5573 if (BufferIsLocal(buffer))
5574 {
5575 MarkLocalBufferDirty(buffer);
5576 return;
5577 }
5578
5579 bufHdr = GetBufferDescriptor(buffer - 1);
5580
5581 Assert(GetPrivateRefCount(buffer) > 0);
5582 /* here, either share or exclusive lock is OK */
5583 Assert(BufferIsLockedByMe(buffer));
5584
5585 /*
5586 * This routine might get called many times on the same page, if we are
5587 * making the first scan after commit of an xact that added/deleted many
5588 * tuples. So, be as quick as we can if the buffer is already dirty. We
5589 * do this by not acquiring spinlock if it looks like the status bits are
5590 * already set. Since we make this test unlocked, there's a chance we
5591 * might fail to notice that the flags have just been cleared, and failed
5592 * to reset them, due to memory-ordering issues. But since this function
5593 * is only intended to be used in cases where failing to write out the
5594 * data would be harmless anyway, it doesn't really matter.
5595 */
5596 if ((pg_atomic_read_u64(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
5598 {
5600 bool dirtied = false;
5601 bool delayChkptFlags = false;
5603
5604 /*
5605 * If we need to protect hint bit updates from torn writes, WAL-log a
5606 * full page image of the page. This full page image is only necessary
5607 * if the hint bit update is the first change to the page since the
5608 * last checkpoint.
5609 *
5610 * We don't check full_page_writes here because that logic is included
5611 * when we call XLogInsert() since the value changes dynamically.
5612 */
5613 if (XLogHintBitIsNeeded() &&
5615 {
5616 /*
5617 * If we must not write WAL, due to a relfilelocator-specific
5618 * condition or being in recovery, don't dirty the page. We can
5619 * set the hint, just not dirty the page as a result so the hint
5620 * is lost when we evict the page or shutdown.
5621 *
5622 * See src/backend/storage/page/README for longer discussion.
5623 */
5624 if (RecoveryInProgress() ||
5626 return;
5627
5628 /*
5629 * If the block is already dirty because we either made a change
5630 * or set a hint already, then we don't need to write a full page
5631 * image. Note that aggressive cleaning of blocks dirtied by hint
5632 * bit setting would increase the call rate. Bulk setting of hint
5633 * bits would reduce the call rate...
5634 *
5635 * We must issue the WAL record before we mark the buffer dirty.
5636 * Otherwise we might write the page before we write the WAL. That
5637 * causes a race condition, since a checkpoint might occur between
5638 * writing the WAL record and marking the buffer dirty. We solve
5639 * that with a kluge, but one that is already in use during
5640 * transaction commit to prevent race conditions. Basically, we
5641 * simply prevent the checkpoint WAL record from being written
5642 * until we have marked the buffer dirty. We don't start the
5643 * checkpoint flush until we have marked dirty, so our checkpoint
5644 * must flush the change to disk successfully or the checkpoint
5645 * never gets written, so crash recovery will fix.
5646 *
5647 * It's possible we may enter here without an xid, so it is
5648 * essential that CreateCheckPoint waits for virtual transactions
5649 * rather than full transactionids.
5650 */
5653 delayChkptFlags = true;
5654 lsn = XLogSaveBufferForHint(buffer, buffer_std);
5655 }
5656
5658
5660
5661 if (!(buf_state & BM_DIRTY))
5662 {
5663 dirtied = true; /* Means "will be dirtied by this action" */
5664
5665 /*
5666 * Set the page LSN if we wrote a backup block. We aren't supposed
5667 * to set this when only holding a share lock but as long as we
5668 * serialise it somehow we're OK. We choose to set LSN while
5669 * holding the buffer header lock, which causes any reader of an
5670 * LSN who holds only a share lock to also obtain a buffer header
5671 * lock before using PageGetLSN(), which is enforced in
5672 * BufferGetLSNAtomic().
5673 *
5674 * If checksums are enabled, you might think we should reset the
5675 * checksum here. That will happen when the page is written
5676 * sometime later in this checkpoint cycle.
5677 */
5678 if (XLogRecPtrIsValid(lsn))
5679 PageSetLSN(page, lsn);
5680 }
5681
5684 0, 0);
5685
5686 if (delayChkptFlags)
5688
5689 if (dirtied)
5690 {
5692 if (VacuumCostActive)
5694 }
5695 }
5696}
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
#define DELAY_CHKPT_START
Definition proc.h:135
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition storage.c:573
int delayChkptFlags
Definition proc.h:263
bool RecoveryInProgress(void)
Definition xlog.c:6461
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsLockedByMe(), BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, fb(), GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u64(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, UnlockBufHdrExt(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsValid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ MarkDirtyAllUnpinnedBuffers()

void MarkDirtyAllUnpinnedBuffers ( int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 7811 of file bufmgr.c.

7814{
7815 *buffers_dirtied = 0;
7817 *buffers_skipped = 0;
7818
7819 for (int buf = 1; buf <= NBuffers; buf++)
7820 {
7821 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7824
7826
7828 if (!(buf_state & BM_VALID))
7829 continue;
7830
7833
7834 LockBufHdr(desc);
7835
7837 (*buffers_dirtied)++;
7838 else if (buffer_already_dirty)
7839 (*buffers_already_dirty)++;
7840 else
7841 (*buffers_skipped)++;
7842 }
7843}
static bool MarkDirtyUnpinnedBufferInternal(Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
Definition bufmgr.c:7662

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u64(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_mark_dirty_all().

◆ MarkDirtyRelUnpinnedBuffers()

void MarkDirtyRelUnpinnedBuffers ( Relation  rel,
int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 7754 of file bufmgr.c.

7758{
7760
7761 *buffers_dirtied = 0;
7763 *buffers_skipped = 0;
7764
7765 for (int buf = 1; buf <= NBuffers; buf++)
7766 {
7767 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7770
7772
7773 /* An unlocked precheck should be safe and saves some cycles. */
7774 if ((buf_state & BM_VALID) == 0 ||
7776 continue;
7777
7778 /* Make sure we can pin the buffer. */
7781
7782 buf_state = LockBufHdr(desc);
7783
7784 /* recheck, could have changed without the lock */
7785 if ((buf_state & BM_VALID) == 0 ||
7787 {
7788 UnlockBufHdr(desc);
7789 continue;
7790 }
7791
7793 (*buffers_dirtied)++;
7794 else if (buffer_already_dirty)
7795 (*buffers_already_dirty)++;
7796 else
7797 (*buffers_skipped)++;
7798 }
7799}

References Assert, BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u64(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_mark_dirty_relation().

◆ MarkDirtyUnpinnedBuffer()

bool MarkDirtyUnpinnedBuffer ( Buffer  buf,
bool buffer_already_dirty 
)

Definition at line 7718 of file bufmgr.c.

7719{
7720 BufferDesc *desc;
7721 bool buffer_dirtied = false;
7722
7724
7725 /* Make sure we can pin the buffer. */
7728
7729 desc = GetBufferDescriptor(buf - 1);
7730 LockBufHdr(desc);
7731
7733 /* Both can not be true at the same time */
7735
7736 return buffer_dirtied;
7737}

References Assert, buf, BufferIsLocal, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by pg_buffercache_mark_dirty().

◆ MarkDirtyUnpinnedBufferInternal()

static bool MarkDirtyUnpinnedBufferInternal ( Buffer  buf,
BufferDesc desc,
bool buffer_already_dirty 
)
static

Definition at line 7662 of file bufmgr.c.

7664{
7666 bool result = false;
7667
7668 *buffer_already_dirty = false;
7669
7672
7673 if ((buf_state & BM_VALID) == 0)
7674 {
7675 UnlockBufHdr(desc);
7676 return false;
7677 }
7678
7679 /* Check that it's not pinned already. */
7681 {
7682 UnlockBufHdr(desc);
7683 return false;
7684 }
7685
7686 /* Pin the buffer and then release the buffer spinlock */
7687 PinBuffer_Locked(desc);
7688
7689 /* If it was not already dirty, mark it as dirty. */
7690 if (!(buf_state & BM_DIRTY))
7691 {
7694 result = true;
7695 BufferLockUnlock(buf, desc);
7696 }
7697 else
7698 *buffer_already_dirty = true;
7699
7700 UnpinBuffer(desc);
7701
7702 return result;
7703}
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3056

References Assert, BM_DIRTY, BM_LOCKED, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BufferLockAcquire(), BufferLockUnlock(), fb(), MarkBufferDirty(), pg_atomic_read_u64(), PinBuffer_Locked(), BufferDesc::state, UnlockBufHdr(), and UnpinBuffer().

Referenced by MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), and MarkDirtyUnpinnedBuffer().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 373 of file bufmgr.c.

374{
376
377 /* only allowed to be called when a reservation has been made */
379
380 /* use up the reserved entry */
382
383 /* and fill it */
385 res->buffer = buffer;
386 res->data.refcount = 0;
388
389 /* update cache for the next lookup */
391
393
394 return res;
395}

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, PrivateRefCountEntry::data, PrivateRefCountData::lockmode, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountEntryLast, PrivateRefCountData::refcount, and ReservedRefCountSlot.

Referenced by TrackNewBufferPin().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy,
bool  skip_if_not_valid 
)
static

Definition at line 3181 of file bufmgr.c.

3183{
3185 bool result;
3187
3190
3191 ref = GetPrivateRefCountEntry(b, true);
3192
3193 if (ref == NULL)
3194 {
3197
3199 for (;;)
3200 {
3202 return false;
3203
3204 /*
3205 * We're not allowed to increase the refcount while the buffer
3206 * header spinlock is held. Wait for the lock to be released.
3207 */
3210
3212
3213 /* increase refcount */
3215
3216 if (strategy == NULL)
3217 {
3218 /* Default case: increase usagecount unless already max. */
3221 }
3222 else
3223 {
3224 /*
3225 * Ring buffers shouldn't evict others from pool. Thus we
3226 * don't make usagecount more than 1.
3227 */
3230 }
3231
3233 buf_state))
3234 {
3235 result = (buf_state & BM_VALID) != 0;
3236
3238 break;
3239 }
3240 }
3241 }
3242 else
3243 {
3244 /*
3245 * If we previously pinned the buffer, it is likely to be valid, but
3246 * it may not be if StartReadBuffers() was called and
3247 * WaitReadBuffers() hasn't been called yet. We'll check by loading
3248 * the flags without locking. This is racy, but it's OK to return
3249 * false spuriously: when WaitReadBuffers() calls StartBufferIO(),
3250 * it'll see that it's now valid.
3251 *
3252 * Note: We deliberately avoid a Valgrind client request here.
3253 * Individual access methods can optionally superimpose buffer page
3254 * client requests on top of our client requests to enforce that
3255 * buffers are only accessed while locked (and pinned). It's possible
3256 * that the buffer page is legitimately non-accessible here. We
3257 * cannot meddle with that.
3258 */
3259 result = (pg_atomic_read_u64(&buf->state) & BM_VALID) != 0;
3260
3261 Assert(ref->data.refcount > 0);
3262 ref->data.refcount++;
3264 }
3265
3266 return result;
3267}
#define BM_MAX_USAGE_COUNT
#define BUF_STATE_GET_USAGECOUNT(state)
void TrackNewBufferPin(Buffer buf)
Definition bufmgr.c:3416

References Assert, b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, CurrentResourceOwner, fb(), GetPrivateRefCountEntry(), pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), ReservedRefCountSlot, ResourceOwnerRememberBuffer(), TrackNewBufferPin(), unlikely, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 3292 of file bufmgr.c.

3293{
3295
3296 /*
3297 * As explained, We don't expect any preexisting pins. That allows us to
3298 * manipulate the PrivateRefCount after releasing the spinlock
3299 */
3301
3302 /*
3303 * Since we hold the buffer spinlock, we can update the buffer state and
3304 * release the lock in one operation.
3305 */
3307
3309 0, 0, 1);
3310
3312}

References Assert, buf, BufferDescriptorGetBuffer(), fb(), GetPrivateRefCountEntry(), pg_atomic_read_u64(), TrackNewBufferPin(), and UnlockBufHdrExt().

Referenced by EvictUnpinnedBufferInternal(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), MarkDirtyUnpinnedBufferInternal(), and SyncOneBuffer().

◆ PinBufferForBlock()

static pg_attribute_always_inline Buffer PinBufferForBlock ( Relation  rel,
SMgrRelation  smgr,
char  persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1210 of file bufmgr.c.

1217{
1221
1222 Assert(blockNum != P_NEW);
1223
1224 /* Persistence should be set before */
1225 Assert((persistence == RELPERSISTENCE_TEMP ||
1226 persistence == RELPERSISTENCE_PERMANENT ||
1227 persistence == RELPERSISTENCE_UNLOGGED));
1228
1229 if (persistence == RELPERSISTENCE_TEMP)
1230 {
1233 }
1234 else
1235 {
1236 io_context = IOContextForStrategy(strategy);
1238 }
1239
1240 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1244 smgr->smgr_rlocator.backend);
1245
1246 if (persistence == RELPERSISTENCE_TEMP)
1247 {
1248 bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1249 if (*foundPtr)
1251 }
1252 else
1253 {
1254 bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1255 strategy, foundPtr, io_context);
1256 if (*foundPtr)
1258 }
1259 if (rel)
1260 {
1261 /*
1262 * While pgBufferUsage's "read" counter isn't bumped unless we reach
1263 * WaitReadBuffers() (so, not for hits, and not for buffers that are
1264 * zeroed instead), the per-relation stats always count them.
1265 */
1267 if (*foundPtr)
1269 }
1270 if (*foundPtr)
1271 {
1273 if (VacuumCostActive)
1275
1276 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1280 smgr->smgr_rlocator.backend,
1281 true);
1282 }
1283
1285}
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
Definition bufmgr.c:2100
#define P_NEW
Definition bufmgr.h:198
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition localbuf.c:119
#define pgstat_count_buffer_read(rel)
Definition pgstat.h:715

References Assert, RelFileLocatorBackend::backend, BufferAlloc(), BufferDescriptorGetBuffer(), RelFileLocator::dbOid, fb(), IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, BufferUsage::local_blks_hit, LocalBufferAlloc(), RelFileLocatorBackend::locator, P_NEW, pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_buffer_read, pgstat_count_io_op(), RelFileLocator::relNumber, BufferUsage::shared_blks_hit, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, VacuumCostActive, VacuumCostBalance, and VacuumCostPageHit.

Referenced by ReadBuffer_common(), and StartReadBuffersImpl().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 772 of file bufmgr.c.

773{
775 Assert(BlockNumberIsValid(blockNum));
776
778 {
779 /* see comments in ReadBufferExtended */
783 errmsg("cannot access temporary tables of other sessions")));
784
785 /* pass it off to localbuf.c */
786 return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
787 }
788 else
789 {
790 /* pass it to the shared buffer version */
791 return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
792 }
793}
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:682
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition localbuf.c:72
#define RELATION_IS_OTHER_TEMP(relation)
Definition rel.h:667
#define RelationIsValid(relation)
Definition rel.h:489

References Assert, BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, fb(), PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by count_nondeletable_pages(), invalidate_rel_block(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 682 of file bufmgr.c.

685{
686 PrefetchBufferResult result = {InvalidBuffer, false};
687 BufferTag newTag; /* identity of requested block */
688 uint32 newHash; /* hash value for newTag */
689 LWLock *newPartitionLock; /* buffer partition lock for it */
690 int buf_id;
691
692 Assert(BlockNumberIsValid(blockNum));
693
694 /* create a tag so we can lookup the buffer */
695 InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
696 forkNum, blockNum);
697
698 /* determine its hash code and partition lock ID */
701
702 /* see if the block is in the buffer pool already */
704 buf_id = BufTableLookup(&newTag, newHash);
706
707 /* If not in buffers, initiate prefetch */
708 if (buf_id < 0)
709 {
710#ifdef USE_PREFETCH
711 /*
712 * Try to initiate an asynchronous read. This returns false in
713 * recovery if the relation file doesn't exist.
714 */
715 if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
716 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
717 {
718 result.initiated_io = true;
719 }
720#endif /* USE_PREFETCH */
721 }
722 else
723 {
724 /*
725 * Report the buffer it was in at that time. The caller may be able
726 * to avoid a buffer table lookup, but it's not pinned and it must be
727 * rechecked!
728 */
729 result.recent_buffer = buf_id + 1;
730 }
731
732 /*
733 * If the block *is* in buffers, we do nothing. This is not really ideal:
734 * the block might be just about to be evicted, which would be stupid
735 * since we know we are going to need it soon. But the only easy answer
736 * is to bump the usage_count, which does not seem like a great solution:
737 * when the caller does ultimately touch the block, usage_count would get
738 * bumped again, resulting in too much favoritism for blocks that are
739 * involved in a prefetch sequence. A real fix would involve some
740 * additional per-buffer state, and it's not clear that there's enough of
741 * a problem to justify that.
742 */
743
744 return result;
745}
int io_direct_flags
Definition fd.c:168
#define IO_DIRECT_DATA
Definition fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition smgr.c:678
Buffer recent_buffer
Definition bufmgr.h:61

References Assert, BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), fb(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ProcessReadBuffersResult()

static void ProcessReadBuffersResult ( ReadBuffersOperation operation)
static

Definition at line 1693 of file bufmgr.c.

1694{
1695 PgAioReturn *aio_ret = &operation->io_return;
1697 int newly_read_blocks = 0;
1698
1699 Assert(pgaio_wref_valid(&operation->io_wref));
1700 Assert(aio_ret->result.status != PGAIO_RS_UNKNOWN);
1701
1702 /*
1703 * SMGR reports the number of blocks successfully read as the result of
1704 * the IO operation. Thus we can simply add that to ->nblocks_done.
1705 */
1706
1707 if (likely(rs != PGAIO_RS_ERROR))
1708 newly_read_blocks = aio_ret->result.result;
1709
1710 if (rs == PGAIO_RS_ERROR || rs == PGAIO_RS_WARNING)
1711 pgaio_result_report(aio_ret->result, &aio_ret->target_data,
1712 rs == PGAIO_RS_ERROR ? ERROR : WARNING);
1713 else if (aio_ret->result.status == PGAIO_RS_PARTIAL)
1714 {
1715 /*
1716 * We'll retry, so we just emit a debug message to the server log (or
1717 * not even that in prod scenarios).
1718 */
1719 pgaio_result_report(aio_ret->result, &aio_ret->target_data, DEBUG1);
1720 elog(DEBUG3, "partial read, will retry");
1721 }
1722
1725
1726 operation->nblocks_done += newly_read_blocks;
1727
1728 Assert(operation->nblocks_done <= operation->nblocks);
1729}
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition aio.c:971
PgAioResultStatus
Definition aio_types.h:79
@ PGAIO_RS_UNKNOWN
Definition aio_types.h:80
@ PGAIO_RS_PARTIAL
Definition aio_types.h:82
#define DEBUG3
Definition elog.h:28
PgAioResult result
Definition aio_types.h:132

References Assert, DEBUG1, DEBUG3, elog, ERROR, fb(), ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, likely, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, pgaio_result_report(), PGAIO_RS_ERROR, PGAIO_RS_PARTIAL, PGAIO_RS_UNKNOWN, PGAIO_RS_WARNING, pgaio_wref_valid(), PgAioReturn::result, PgAioResult::status, and WARNING.

Referenced by WaitReadBuffers().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 864 of file bufmgr.c.

865{
867}
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:911
@ RBM_NORMAL
Definition bufmgr.h:46

References fb(), MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static pg_attribute_always_inline Buffer ReadBuffer_common ( Relation  rel,
SMgrRelation  smgr,
char  smgr_persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
static

Definition at line 1293 of file bufmgr.c.

1297{
1298 ReadBuffersOperation operation;
1299 Buffer buffer;
1300 int flags;
1301 char persistence;
1302
1303 /*
1304 * Backward compatibility path, most code should use ExtendBufferedRel()
1305 * instead, as acquiring the extension lock inside ExtendBufferedRel()
1306 * scales a lot better.
1307 */
1308 if (unlikely(blockNum == P_NEW))
1309 {
1311
1312 /*
1313 * Since no-one else can be looking at the page contents yet, there is
1314 * no difference between an exclusive lock and a cleanup-strength
1315 * lock.
1316 */
1318 flags |= EB_LOCK_FIRST;
1319
1320 return ExtendBufferedRel(BMR_REL(rel), forkNum, strategy, flags);
1321 }
1322
1323 if (rel)
1324 persistence = rel->rd_rel->relpersistence;
1325 else
1326 persistence = smgr_persistence;
1327
1330 {
1331 bool found;
1332
1333 buffer = PinBufferForBlock(rel, smgr, persistence,
1334 forkNum, blockNum, strategy, &found);
1335 ZeroAndLockBuffer(buffer, mode, found);
1336 return buffer;
1337 }
1338
1339 /*
1340 * Signal that we are going to immediately wait. If we're immediately
1341 * waiting, there is no benefit in actually executing the IO
1342 * asynchronously, it would just add dispatch overhead.
1343 */
1345 if (mode == RBM_ZERO_ON_ERROR)
1347 operation.smgr = smgr;
1348 operation.rel = rel;
1349 operation.persistence = persistence;
1350 operation.forknum = forkNum;
1351 operation.strategy = strategy;
1352 if (StartReadBuffer(&operation,
1353 &buffer,
1354 blockNum,
1355 flags))
1356 WaitReadBuffers(&operation);
1357
1358 return buffer;
1359}
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition bufmgr.c:964
static void ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
Definition bufmgr.c:1131
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition bufmgr.c:1210
void WaitReadBuffers(ReadBuffersOperation *operation)
Definition bufmgr.c:1732
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Definition bufmgr.c:1608
@ RBM_ZERO_ON_ERROR
Definition bufmgr.h:51
#define BMR_REL(p_rel)
Definition bufmgr.h:114
Form_pg_class rd_rel
Definition rel.h:111

References BMR_REL, PrivateRefCountEntry::buffer, EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, ExtendBufferedRel(), fb(), ReadBuffersOperation::forknum, mode, P_NEW, ReadBuffersOperation::persistence, PinBufferForBlock(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelationData::rd_rel, READ_BUFFERS_SYNCHRONOUSLY, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, StartReadBuffer(), ReadBuffersOperation::strategy, unlikely, WaitReadBuffers(), and ZeroAndLockBuffer().

Referenced by ExtendBufferedRelTo(), ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 911 of file bufmgr.c.

913{
914 Buffer buf;
915
916 /*
917 * Reject attempts to read non-local temporary relations; we would be
918 * likely to get wrong data since we have no visibility into the owning
919 * session's local buffers.
920 */
924 errmsg("cannot access temporary tables of other sessions")));
925
926 /*
927 * Read the buffer, and update pgstat counters to reflect a cache hit or
928 * miss.
929 */
931 forkNum, blockNum, mode, strategy);
932
933 return buf;
934}

References buf, ereport, errcode(), errmsg(), ERROR, fb(), mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), bt_recheck_sibling_links(), btvacuumpage(), count_nondeletable_pages(), create_toy_buffer(), fsm_readbuf(), get_raw_page_internal(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), statapprox_heap(), and vm_readbuf().

◆ ReadBuffersCanStartIO()

static bool ReadBuffersCanStartIO ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1664 of file bufmgr.c.

1665{
1666 /*
1667 * If this backend currently has staged IO, we need to submit the pending
1668 * IO before waiting for the right to issue IO, to avoid the potential for
1669 * deadlocks (and, more commonly, unnecessary delays for other backends).
1670 */
1671 if (!nowait && pgaio_have_staged())
1672 {
1673 if (ReadBuffersCanStartIOOnce(buffer, true))
1674 return true;
1675
1676 /*
1677 * Unfortunately StartBufferIO() returning false doesn't allow to
1678 * distinguish between the buffer already being valid and IO already
1679 * being in progress. Since IO already being in progress is quite
1680 * rare, this approach seems fine.
1681 */
1683 }
1684
1685 return ReadBuffersCanStartIOOnce(buffer, nowait);
1686}
bool pgaio_have_staged(void)
Definition aio.c:1107
static bool ReadBuffersCanStartIOOnce(Buffer buffer, bool nowait)
Definition bufmgr.c:1651

References PrivateRefCountEntry::buffer, pgaio_have_staged(), pgaio_submit_staged(), and ReadBuffersCanStartIOOnce().

Referenced by AsyncReadBuffers().

◆ ReadBuffersCanStartIOOnce()

static bool ReadBuffersCanStartIOOnce ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1651 of file bufmgr.c.

1652{
1653 if (BufferIsLocal(buffer))
1654 return StartLocalBufferIO(GetLocalBufferDescriptor(-buffer - 1),
1655 true, nowait);
1656 else
1657 return StartBufferIO(GetBufferDescriptor(buffer - 1), true, nowait);
1658}
bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait)
Definition localbuf.c:523

References PrivateRefCountEntry::buffer, BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), StartBufferIO(), and StartLocalBufferIO().

Referenced by ReadBuffersCanStartIO().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 948 of file bufmgr.c.

951{
952 SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
953
954 return ReadBuffer_common(NULL, smgr,
956 forkNum, blockNum,
957 mode, strategy);
958}

References fb(), INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 803 of file bufmgr.c.

805{
807 BufferTag tag;
809
810 Assert(BufferIsValid(recent_buffer));
811
814 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
815
816 if (BufferIsLocal(recent_buffer))
817 {
818 int b = -recent_buffer - 1;
819
822
823 /* Is it still valid and holding the right tag? */
824 if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
825 {
826 PinLocalBuffer(bufHdr, true);
827
829
830 return true;
831 }
832 }
833 else
834 {
835 bufHdr = GetBufferDescriptor(recent_buffer - 1);
836
837 /*
838 * Is it still valid and holding the right tag? We do an unlocked tag
839 * comparison first, to make it unlikely that we'll increment the
840 * usage counter of the wrong buffer, if someone calls us with a very
841 * out of date recent_buffer. Then we'll check it again if we get the
842 * pin.
843 */
844 if (BufferTagsEqual(&tag, &bufHdr->tag) &&
845 PinBuffer(bufHdr, NULL, true))
846 {
847 if (BufferTagsEqual(&tag, &bufHdr->tag))
848 {
850 return true;
851 }
853 }
854 }
855
856 return false;
857}

References Assert, b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), InitBufferTag(), BufferUsage::local_blks_hit, pg_atomic_read_u64(), pgBufferUsage, PinBuffer(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, and UnpinBuffer().

Referenced by invalidate_rel_block(), and XLogReadBufferExtended().

◆ RelationCopyStorageUsingBuffer()

static void RelationCopyStorageUsingBuffer ( RelFileLocator  srclocator,
RelFileLocator  dstlocator,
ForkNumber  forkNum,
bool  permanent 
)
static

Definition at line 5263 of file bufmgr.c.

5266{
5267 Buffer srcBuf;
5268 Buffer dstBuf;
5269 Page srcPage;
5270 Page dstPage;
5271 bool use_wal;
5272 BlockNumber nblocks;
5273 BlockNumber blkno;
5280
5281 /*
5282 * In general, we want to write WAL whenever wal_level > 'minimal', but we
5283 * can skip it when copying any fork of an unlogged relation other than
5284 * the init fork.
5285 */
5286 use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
5287
5288 /* Get number of blocks in the source relation. */
5290 forkNum);
5291
5292 /* Nothing to copy; just return. */
5293 if (nblocks == 0)
5294 return;
5295
5296 /*
5297 * Bulk extend the destination relation of the same size as the source
5298 * relation before starting to copy block by block.
5299 */
5300 memset(buf.data, 0, BLCKSZ);
5301 smgrextend(smgropen(dstlocator, INVALID_PROC_NUMBER), forkNum, nblocks - 1,
5302 buf.data, true);
5303
5304 /* This is a bulk operation, so use buffer access strategies. */
5307
5308 /* Initialize streaming read */
5309 p.current_blocknum = 0;
5310 p.last_exclusive = nblocks;
5312
5313 /*
5314 * It is safe to use batchmode as block_range_read_stream_cb takes no
5315 * locks.
5316 */
5320 src_smgr,
5322 forkNum,
5324 &p,
5325 0);
5326
5327 /* Iterate over each block of the source relation file. */
5328 for (blkno = 0; blkno < nblocks; blkno++)
5329 {
5331
5332 /* Read block from source relation. */
5336
5340 permanent);
5342
5344
5345 /* Copy page data from the source to the destination. */
5348
5349 /* WAL-log the copied page. */
5350 if (use_wal)
5352
5354
5357 }
5360
5363}
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5518
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
Definition bufmgr.c:948
@ BAS_BULKREAD
Definition bufmgr.h:37
@ BAS_BULKWRITE
Definition bufmgr.h:39
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition freelist.c:643
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
ReadStream * read_stream_begin_smgr_relation(int flags, BufferAccessStrategy strategy, SMgrRelation smgr, char smgr_persistence, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
void read_stream_end(ReadStream *stream)
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define READ_STREAM_FULL
Definition read_stream.h:43
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition smgr.c:620
#define XLogIsNeeded()
Definition xlog.h:111
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)

References Assert, BAS_BULKREAD, BAS_BULKWRITE, block_range_read_stream_cb(), buf, BUFFER_LOCK_SHARE, BufferGetBlockNumber(), BufferGetPage(), CHECK_FOR_INTERRUPTS, BlockRangeReadStreamPrivate::current_blocknum, END_CRIT_SECTION, fb(), FreeAccessStrategy(), GetAccessStrategy(), INIT_FORKNUM, INVALID_PROC_NUMBER, InvalidBuffer, BlockRangeReadStreamPrivate::last_exclusive, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), RBM_ZERO_AND_LOCK, read_stream_begin_smgr_relation(), read_stream_end(), READ_STREAM_FULL, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, ReadBufferWithoutRelcache(), smgrextend(), smgrnblocks(), smgropen(), START_CRIT_SECTION, UnlockReleaseBuffer(), and XLogIsNeeded.

Referenced by CreateAndCopyRelationData().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 4572 of file bufmgr.c.

4573{
4574 if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
4575 {
4576 /*
4577 * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
4578 * tableam returns the size in bytes - but for the purpose of this
4579 * routine, we want the number of blocks. Therefore divide, rounding
4580 * up.
4581 */
4583
4584 szbytes = table_relation_size(relation, forkNum);
4585
4586 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
4587 }
4588 else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
4589 {
4590 return smgrnblocks(RelationGetSmgr(relation), forkNum);
4591 }
4592 else
4593 Assert(false);
4594
4595 return 0; /* keep compiler quiet */
4596}
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition tableam.h:1847

References Assert, fb(), RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 3121 of file bufmgr.c.

3124{
3125 ForkNumber forkNum = MAIN_FORKNUM;
3127
3128 if (BufferIsValid(buffer))
3129 {
3130 Assert(BufferIsPinned(buffer));
3131 if (BufferIsLocal(buffer))
3132 {
3133 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3134 if (bufHdr->tag.blockNum == blockNum &&
3135 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3136 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3137 return buffer;
3138 UnpinLocalBuffer(buffer);
3139 }
3140 else
3141 {
3142 bufHdr = GetBufferDescriptor(buffer - 1);
3143 /* we have pin, so it's ok to examine tag without spinlock */
3144 if (bufHdr->tag.blockNum == blockNum &&
3145 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3146 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3147 return buffer;
3149 }
3150 }
3151
3152 return ReadBuffer(relation, blockNum);
3153}
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:864

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), and heapam_index_fetch_tuple().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 5501 of file bufmgr.c.

5502{
5503 if (!BufferIsValid(buffer))
5504 elog(ERROR, "bad buffer ID: %d", buffer);
5505
5506 if (BufferIsLocal(buffer))
5507 UnpinLocalBuffer(buffer);
5508 else
5509 UnpinBuffer(GetBufferDescriptor(buffer - 1));
5510}

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), BitmapHeapScanNextBlock(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), buffer_create_toy(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndIndexOnlyScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_search(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), grow_rel(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_fetch_next_buffer(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgettup(), heapgettup_pagemode(), invalidate_rel_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), modify_rel_block(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pgstatindex_impl(), read_rel_block_ll(), read_stream_reset(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ ReservePrivateRefCountEntry()

static void ReservePrivateRefCountEntry ( void  )
static

Definition at line 293 of file bufmgr.c.

294{
295 /* Already reserved (or freed), nothing to do */
296 if (ReservedRefCountSlot != -1)
297 return;
298
299 /*
300 * First search for a free entry the array, that'll be sufficient in the
301 * majority of cases.
302 */
303 {
304 int i;
305
306 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
307 {
309 {
311
312 /*
313 * We could return immediately, but iterating till the end of
314 * the array allows compiler-autovectorization.
315 */
316 }
317 }
318
319 if (ReservedRefCountSlot != -1)
320 return;
321 }
322
323 /*
324 * No luck. All array entries are full. Move one array entry into the hash
325 * table.
326 */
327 {
328 /*
329 * Move entry from the current clock position in the array into the
330 * hashtable. Use that slot.
331 */
332 int victim_slot;
335 bool found;
336
337 /* select victim slot */
341
342 /* Better be used, otherwise we shouldn't get here. */
346
347 /* enter victim array entry into hashtable */
351 &found);
352 Assert(!found);
353 /* move data from the entry in the array to the hash entry */
354 hashent->data = victim_entry->data;
355
356 /* clear the now free array slot */
358 victim_entry->buffer = InvalidBuffer;
359
360 /* clear the whole data member, just for future proofing */
361 memset(&victim_entry->data, 0, sizeof(victim_entry->data));
362 victim_entry->data.refcount = 0;
363 victim_entry->data.lockmode = BUFFER_LOCK_UNLOCK;
364
366 }
367}
static uint32 PrivateRefCountClock
Definition bufmgr.c:251
@ HASH_ENTER
Definition hsearch.h:114

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, fb(), HASH_ENTER, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountClock, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountSlot.

Referenced by BufferAlloc(), EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetPrivateRefCountEntrySlow(), GetVictimBuffer(), MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), MarkDirtyUnpinnedBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ ResOwnerPrintBuffer()

static char * ResOwnerPrintBuffer ( Datum  res)
static

Definition at line 7460 of file bufmgr.c.

7461{
7463}
static int32 DatumGetInt32(Datum X)
Definition postgres.h:212

References DatumGetInt32(), and DebugPrintBufferRefcount().

◆ ResOwnerPrintBufferIO()

static char * ResOwnerPrintBufferIO ( Datum  res)
static

Definition at line 7410 of file bufmgr.c.

7411{
7412 Buffer buffer = DatumGetInt32(res);
7413
7414 return psprintf("lost track of buffer IO on buffer %d", buffer);
7415}

References PrivateRefCountEntry::buffer, DatumGetInt32(), and psprintf().

◆ ResOwnerReleaseBuffer()

static void ResOwnerReleaseBuffer ( Datum  res)
static

Definition at line 7424 of file bufmgr.c.

7425{
7426 Buffer buffer = DatumGetInt32(res);
7427
7428 /* Like ReleaseBuffer, but don't call ResourceOwnerForgetBuffer */
7429 if (!BufferIsValid(buffer))
7430 elog(ERROR, "bad buffer ID: %d", buffer);
7431
7432 if (BufferIsLocal(buffer))
7434 else
7435 {
7437
7438 ref = GetPrivateRefCountEntry(buffer, false);
7439
7440 /* not having a private refcount would imply resowner corruption */
7441 Assert(ref != NULL);
7442
7443 /*
7444 * If the buffer was locked at the time of the resowner release,
7445 * release the lock now. This should only happen after errors.
7446 */
7447 if (ref->data.lockmode != BUFFER_LOCK_UNLOCK)
7448 {
7449 BufferDesc *buf = GetBufferDescriptor(buffer - 1);
7450
7451 HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
7452 BufferLockUnlock(buffer, buf);
7453 }
7454
7456 }
7457}
static void UnpinBufferNoOwner(BufferDesc *buf)
Definition bufmgr.c:3369
void UnpinLocalBufferNoOwner(Buffer buffer)
Definition localbuf.c:848

References Assert, buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), BufferLockUnlock(), DatumGetInt32(), elog, ERROR, fb(), GetBufferDescriptor(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, UnpinBufferNoOwner(), and UnpinLocalBufferNoOwner().

◆ ResOwnerReleaseBufferIO()

static void ResOwnerReleaseBufferIO ( Datum  res)
static

Definition at line 7402 of file bufmgr.c.

7403{
7404 Buffer buffer = DatumGetInt32(res);
7405
7406 AbortBufferIO(buffer);
7407}
static void AbortBufferIO(Buffer buffer)
Definition bufmgr.c:6999

References AbortBufferIO(), PrivateRefCountEntry::buffer, and DatumGetInt32().

◆ rlocator_comparator()

static int rlocator_comparator ( const void p1,
const void p2 
)
static

Definition at line 7070 of file bufmgr.c.

7071{
7072 RelFileLocator n1 = *(const RelFileLocator *) p1;
7073 RelFileLocator n2 = *(const RelFileLocator *) p2;
7074
7075 if (n1.relNumber < n2.relNumber)
7076 return -1;
7077 else if (n1.relNumber > n2.relNumber)
7078 return 1;
7079
7080 if (n1.dbOid < n2.dbOid)
7081 return -1;
7082 else if (n1.dbOid > n2.dbOid)
7083 return 1;
7084
7085 if (n1.spcOid < n2.spcOid)
7086 return -1;
7087 else if (n1.spcOid > n2.spcOid)
7088 return 1;
7089 else
7090 return 0;
7091}

References fb().

Referenced by buffertag_comparator(), DropRelationsAllBuffers(), and FlushRelationsAllBuffers().

◆ ScheduleBufferTagForWriteback()

void ScheduleBufferTagForWriteback ( WritebackContext wb_context,
IOContext  io_context,
BufferTag tag 
)

Definition at line 7269 of file bufmgr.c.

7271{
7272 PendingWriteback *pending;
7273
7274 /*
7275 * As pg_flush_data() doesn't do anything with fsync disabled, there's no
7276 * point in tracking in that case.
7277 */
7279 !enableFsync)
7280 return;
7281
7282 /*
7283 * Add buffer to the pending writeback array, unless writeback control is
7284 * disabled.
7285 */
7286 if (*wb_context->max_pending > 0)
7287 {
7289
7290 pending = &wb_context->pending_writebacks[wb_context->nr_pending++];
7291
7292 pending->tag = *tag;
7293 }
7294
7295 /*
7296 * Perform pending flushes if the writeback limit is exceeded. This
7297 * includes the case where previously an item has been added, but control
7298 * is now disabled.
7299 */
7300 if (wb_context->nr_pending >= *wb_context->max_pending)
7302}
bool enableFsync
Definition globals.c:129
#define WRITEBACK_MAX_PENDING_FLUSHES

References Assert, enableFsync, fb(), IO_DIRECT_DATA, io_direct_flags, IssuePendingWritebacks(), PendingWriteback::tag, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by GetVictimBuffer(), and SyncOneBuffer().

◆ shared_buffer_readv_complete()

static PgAioResult shared_buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 8452 of file bufmgr.c.

8454{
8456}

References buffer_readv_complete(), and fb().

◆ shared_buffer_readv_complete_local()

static PgAioResult shared_buffer_readv_complete_local ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

◆ shared_buffer_readv_stage()

static void shared_buffer_readv_stage ( PgAioHandle ioh,
uint8  cb_data 
)
static

Definition at line 8446 of file bufmgr.c.

8447{
8448 buffer_stage_common(ioh, false, false);
8449}

References buffer_stage_common(), and fb().

◆ shared_buffer_write_error_callback()

static void shared_buffer_write_error_callback ( void arg)
static

Definition at line 7038 of file bufmgr.c.

7039{
7041
7042 /* Buffer is pinned, so we can read the tag without locking the spinlock */
7043 if (bufHdr != NULL)
7044 errcontext("writing block %u of relation \"%s\"",
7045 bufHdr->tag.blockNum,
7047 BufTagGetForkNum(&bufHdr->tag)).str);
7048}

References arg, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, fb(), and relpathperm.

Referenced by FlushBuffer().

◆ StartBufferIO()

bool StartBufferIO ( BufferDesc buf,
bool  forInput,
bool  nowait 
)

Definition at line 6879 of file bufmgr.c.

6880{
6882
6884
6885 for (;;)
6886 {
6888
6890 break;
6892 if (nowait)
6893 return false;
6894 WaitIO(buf);
6895 }
6896
6897 /* Once we get here, there is definitely no I/O active on this buffer */
6898
6899 /* Check if someone else already did the I/O */
6900 if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
6901 {
6903 return false;
6904 }
6905
6908 0);
6909
6912
6913 return true;
6914}
static void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)

References BM_DIRTY, BM_IO_IN_PROGRESS, BM_VALID, buf, BufferDescriptorGetBuffer(), CurrentResourceOwner, fb(), LockBufHdr(), ResourceOwnerEnlarge(), ResourceOwnerRememberBufferIO(), UnlockBufHdr(), UnlockBufHdrExt(), and WaitIO().

Referenced by buffer_call_start_io(), ExtendBufferedRelShared(), FlushBuffer(), read_rel_block_ll(), ReadBuffersCanStartIOOnce(), and ZeroAndLockBuffer().

◆ StartReadBuffer()

bool StartReadBuffer ( ReadBuffersOperation operation,
Buffer buffer,
BlockNumber  blocknum,
int  flags 
)

Definition at line 1608 of file bufmgr.c.

1612{
1613 int nblocks = 1;
1614 bool result;
1615
1616 result = StartReadBuffersImpl(operation, buffer, blocknum, &nblocks, flags,
1617 false /* single block, no forwarding */ );
1618 Assert(nblocks == 1); /* single block can't be short */
1619
1620 return result;
1621}
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
Definition bufmgr.c:1362

References Assert, PrivateRefCountEntry::buffer, and StartReadBuffersImpl().

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ StartReadBuffers()

bool StartReadBuffers ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int nblocks,
int  flags 
)

Definition at line 1589 of file bufmgr.c.

1594{
1595 return StartReadBuffersImpl(operation, buffers, blockNum, nblocks, flags,
1596 true /* expect forwarded buffers */ );
1597}

References StartReadBuffersImpl().

Referenced by read_stream_start_pending_read().

◆ StartReadBuffersImpl()

static pg_attribute_always_inline bool StartReadBuffersImpl ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int nblocks,
int  flags,
bool  allow_forwarding 
)
static

Definition at line 1362 of file bufmgr.c.

1368{
1369 int actual_nblocks = *nblocks;
1370 int maxcombine = 0;
1371 bool did_start_io;
1372
1373 Assert(*nblocks == 1 || allow_forwarding);
1374 Assert(*nblocks > 0);
1375 Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
1376
1377 for (int i = 0; i < actual_nblocks; ++i)
1378 {
1379 bool found;
1380
1381 if (allow_forwarding && buffers[i] != InvalidBuffer)
1382 {
1384
1385 /*
1386 * This is a buffer that was pinned by an earlier call to
1387 * StartReadBuffers(), but couldn't be handled in one operation at
1388 * that time. The operation was split, and the caller has passed
1389 * an already pinned buffer back to us to handle the rest of the
1390 * operation. It must continue at the expected block number.
1391 */
1392 Assert(BufferGetBlockNumber(buffers[i]) == blockNum + i);
1393
1394 /*
1395 * It might be an already valid buffer (a hit) that followed the
1396 * final contiguous block of an earlier I/O (a miss) marking the
1397 * end of it, or a buffer that some other backend has since made
1398 * valid by performing the I/O for us, in which case we can handle
1399 * it as a hit now. It is safe to check for a BM_VALID flag with
1400 * a relaxed load, because we got a fresh view of it while pinning
1401 * it in the previous call.
1402 *
1403 * On the other hand if we don't see BM_VALID yet, it must be an
1404 * I/O that was split by the previous call and we need to try to
1405 * start a new I/O from this block. We're also racing against any
1406 * other backend that might start the I/O or even manage to mark
1407 * it BM_VALID after this check, but StartBufferIO() will handle
1408 * those cases.
1409 */
1410 if (BufferIsLocal(buffers[i]))
1411 bufHdr = GetLocalBufferDescriptor(-buffers[i] - 1);
1412 else
1413 bufHdr = GetBufferDescriptor(buffers[i] - 1);
1415 found = pg_atomic_read_u64(&bufHdr->state) & BM_VALID;
1416 }
1417 else
1418 {
1419 buffers[i] = PinBufferForBlock(operation->rel,
1420 operation->smgr,
1421 operation->persistence,
1422 operation->forknum,
1423 blockNum + i,
1424 operation->strategy,
1425 &found);
1426 }
1427
1428 if (found)
1429 {
1430 /*
1431 * We have a hit. If it's the first block in the requested range,
1432 * we can return it immediately and report that WaitReadBuffers()
1433 * does not need to be called. If the initial value of *nblocks
1434 * was larger, the caller will have to call again for the rest.
1435 */
1436 if (i == 0)
1437 {
1438 *nblocks = 1;
1439
1440#ifdef USE_ASSERT_CHECKING
1441
1442 /*
1443 * Initialize enough of ReadBuffersOperation to make
1444 * CheckReadBuffersOperation() work. Outside of assertions
1445 * that's not necessary when no IO is issued.
1446 */
1447 operation->buffers = buffers;
1448 operation->blocknum = blockNum;
1449 operation->nblocks = 1;
1450 operation->nblocks_done = 1;
1451 CheckReadBuffersOperation(operation, true);
1452#endif
1453 return false;
1454 }
1455
1456 /*
1457 * Otherwise we already have an I/O to perform, but this block
1458 * can't be included as it is already valid. Split the I/O here.
1459 * There may or may not be more blocks requiring I/O after this
1460 * one, we haven't checked, but they can't be contiguous with this
1461 * one in the way. We'll leave this buffer pinned, forwarding it
1462 * to the next call, avoiding the need to unpin it here and re-pin
1463 * it in the next call.
1464 */
1465 actual_nblocks = i;
1466 break;
1467 }
1468 else
1469 {
1470 /*
1471 * Check how many blocks we can cover with the same IO. The smgr
1472 * implementation might e.g. be limited due to a segment boundary.
1473 */
1474 if (i == 0 && actual_nblocks > 1)
1475 {
1476 maxcombine = smgrmaxcombine(operation->smgr,
1477 operation->forknum,
1478 blockNum);
1480 {
1481 elog(DEBUG2, "limiting nblocks at %u from %u to %u",
1482 blockNum, actual_nblocks, maxcombine);
1484 }
1485 }
1486 }
1487 }
1488 *nblocks = actual_nblocks;
1489
1490 /* Populate information needed for I/O. */
1491 operation->buffers = buffers;
1492 operation->blocknum = blockNum;
1493 operation->flags = flags;
1494 operation->nblocks = actual_nblocks;
1495 operation->nblocks_done = 0;
1496 pgaio_wref_clear(&operation->io_wref);
1497
1498 /*
1499 * When using AIO, start the IO in the background. If not, issue prefetch
1500 * requests if desired by the caller.
1501 *
1502 * The reason we have a dedicated path for IOMETHOD_SYNC here is to
1503 * de-risk the introduction of AIO somewhat. It's a large architectural
1504 * change, with lots of chances for unanticipated performance effects.
1505 *
1506 * Use of IOMETHOD_SYNC already leads to not actually performing IO
1507 * asynchronously, but without the check here we'd execute IO earlier than
1508 * we used to. Eventually this IOMETHOD_SYNC specific path should go away.
1509 */
1510 if (io_method != IOMETHOD_SYNC)
1511 {
1512 /*
1513 * Try to start IO asynchronously. It's possible that no IO needs to
1514 * be started, if another backend already performed the IO.
1515 *
1516 * Note that if an IO is started, it might not cover the entire
1517 * requested range, e.g. because an intermediary block has been read
1518 * in by another backend. In that case any "trailing" buffers we
1519 * already pinned above will be "forwarded" by read_stream.c to the
1520 * next call to StartReadBuffers().
1521 *
1522 * This is signalled to the caller by decrementing *nblocks *and*
1523 * reducing operation->nblocks. The latter is done here, but not below
1524 * WaitReadBuffers(), as in WaitReadBuffers() we can't "shorten" the
1525 * overall read size anymore, we need to retry until done in its
1526 * entirety or until failed.
1527 */
1528 did_start_io = AsyncReadBuffers(operation, nblocks);
1529
1530 operation->nblocks = *nblocks;
1531 }
1532 else
1533 {
1534 operation->flags |= READ_BUFFERS_SYNCHRONOUSLY;
1535
1536 if (flags & READ_BUFFERS_ISSUE_ADVICE)
1537 {
1538 /*
1539 * In theory we should only do this if PinBufferForBlock() had to
1540 * allocate new buffers above. That way, if two calls to
1541 * StartReadBuffers() were made for the same blocks before
1542 * WaitReadBuffers(), only the first would issue the advice.
1543 * That'd be a better simulation of true asynchronous I/O, which
1544 * would only start the I/O once, but isn't done here for
1545 * simplicity.
1546 */
1547 smgrprefetch(operation->smgr,
1548 operation->forknum,
1549 blockNum,
1551 }
1552
1553 /*
1554 * Indicate that WaitReadBuffers() should be called. WaitReadBuffers()
1555 * will initiate the necessary IO.
1556 */
1557 did_start_io = true;
1558 }
1559
1561
1562 return did_start_io;
1563}
int io_method
Definition aio.c:74
@ IOMETHOD_SYNC
Definition aio.h:34
static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete)
Definition bufmgr.c:1627
static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
Definition bufmgr.c:1864
#define READ_BUFFERS_ISSUE_ADVICE
Definition bufmgr.h:124
uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition smgr.c:697

References Assert, AsyncReadBuffers(), ReadBuffersOperation::blocknum, BM_TAG_VALID, BM_VALID, BufferGetBlockNumber(), BufferIsLocal, ReadBuffersOperation::buffers, CheckReadBuffersOperation(), DEBUG2, elog, fb(), ReadBuffersOperation::flags, ReadBuffersOperation::forknum, GetBufferDescriptor(), GetLocalBufferDescriptor(), i, InvalidBuffer, io_method, ReadBuffersOperation::io_wref, IOMETHOD_SYNC, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, pg_atomic_read_u64(), pgaio_wref_clear(), PinBufferForBlock(), READ_BUFFERS_ISSUE_ADVICE, READ_BUFFERS_SYNCHRONOUSLY, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, smgrmaxcombine(), smgrprefetch(), ReadBuffersOperation::strategy, and unlikely.

Referenced by StartReadBuffer(), and StartReadBuffers().

◆ SyncOneBuffer()

static int SyncOneBuffer ( int  buf_id,
bool  skip_recently_used,
WritebackContext wb_context 
)
static

Definition at line 4033 of file bufmgr.c.

4034{
4036 int result = 0;
4038 BufferTag tag;
4039
4040 /* Make sure we can handle the pin */
4043
4044 /*
4045 * Check whether buffer needs writing.
4046 *
4047 * We can make this check without taking the buffer content lock so long
4048 * as we mark pages dirty in access methods *before* logging changes with
4049 * XLogInsert(): if someone marks the buffer dirty just after our check we
4050 * don't worry because our checkpoint.redo points before log record for
4051 * upcoming changes and so we are not required to write such dirty buffer.
4052 */
4054
4057 {
4058 result |= BUF_REUSABLE;
4059 }
4060 else if (skip_recently_used)
4061 {
4062 /* Caller told us not to write recently-used buffers */
4064 return result;
4065 }
4066
4067 if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
4068 {
4069 /* It's clean, so nothing to do */
4071 return result;
4072 }
4073
4074 /*
4075 * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
4076 * buffer is clean by the time we've locked it.)
4077 */
4079
4081
4082 tag = bufHdr->tag;
4083
4085
4086 /*
4087 * SyncOneBuffer() is only called by checkpointer and bgwriter, so
4088 * IOContext will always be IOCONTEXT_NORMAL.
4089 */
4091
4092 return result | BUF_WRITTEN;
4093}

References BM_DIRTY, BM_VALID, BUF_REUSABLE, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BUF_WRITTEN, CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), UnlockBufHdr(), and UnpinBuffer().

Referenced by BgBufferSync(), and BufferSync().

◆ TerminateBufferIO()

void TerminateBufferIO ( BufferDesc buf,
bool  clear_dirty,
uint64  set_flag_bits,
bool  forget_owner,
bool  release_aio 
)

Definition at line 6937 of file bufmgr.c.

6939{
6942 int refcount_change = 0;
6943
6945
6948
6949 /* Clear earlier errors, if this IO failed, it'll be marked again */
6951
6954
6955 if (release_aio)
6956 {
6957 /* release ownership by the AIO subsystem */
6959 refcount_change = -1;
6960 pgaio_wref_clear(&buf->io_wref);
6961 }
6962
6966
6967 if (forget_owner)
6970
6972
6973 /*
6974 * Support LockBufferForCleanup()
6975 *
6976 * We may have just released the last pin other than the waiter's. In most
6977 * cases, this backend holds another pin on the buffer. But, if, for
6978 * example, this backend is completing an IO issued by another backend, it
6979 * may be time to wake the waiter.
6980 */
6983}
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
static void WakePinCountWaiter(BufferDesc *buf)
Definition bufmgr.c:3324
void ConditionVariableBroadcast(ConditionVariable *cv)

References Assert, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_JUST_DIRTIED, BM_PIN_COUNT_WAITER, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetIOCV(), ConditionVariableBroadcast(), CurrentResourceOwner, fb(), LockBufHdr(), pgaio_wref_clear(), ResourceOwnerForgetBufferIO(), UnlockBufHdrExt(), and WakePinCountWaiter().

Referenced by AbortBufferIO(), buffer_call_terminate_io(), buffer_readv_complete_one(), ExtendBufferedRelShared(), FlushBuffer(), and ZeroAndLockBuffer().

◆ TrackNewBufferPin()

void TrackNewBufferPin ( Buffer  buf)
inline

Definition at line 3416 of file bufmgr.c.

3417{
3419
3421 ref->data.refcount++;
3422
3424
3425 /*
3426 * This is the first pin for this page by this backend, mark its page as
3427 * defined to valgrind. While the page contents might not actually be
3428 * valid yet, we don't currently guarantee that such pages are marked
3429 * undefined or non-accessible.
3430 *
3431 * It's not necessarily the prettiest to do this here, but otherwise we'd
3432 * need this block of code in multiple places.
3433 */
3435 BLCKSZ);
3436}
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition bufmgr.c:373

References buf, BufHdrGetBlock, CurrentResourceOwner, fb(), GetBufferDescriptor(), NewPrivateRefCountEntry(), ResourceOwnerRememberBuffer(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by GetBufferFromRing(), PinBuffer(), PinBuffer_Locked(), and StrategyGetBuffer().

◆ ts_ckpt_progress_comparator()

static int ts_ckpt_progress_comparator ( Datum  a,
Datum  b,
void arg 
)
static

Definition at line 7234 of file bufmgr.c.

7235{
7238
7239 /* we want a min-heap, so return 1 for the a < b */
7240 if (sa->progress < sb->progress)
7241 return 1;
7242 else if (sa->progress == sb->progress)
7243 return 0;
7244 else
7245 return -1;
7246}

References a, b, DatumGetPointer(), and fb().

Referenced by BufferSync().

◆ UnlockBuffer()

void UnlockBuffer ( Buffer  buffer)

Definition at line 6405 of file bufmgr.c.

6406{
6408
6409 Assert(BufferIsPinned(buffer));
6410 if (BufferIsLocal(buffer))
6411 return; /* local buffers need no lock */
6412
6413 buf_hdr = GetBufferDescriptor(buffer - 1);
6414 BufferLockUnlock(buffer, buf_hdr);
6415}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockUnlock(), fb(), and GetBufferDescriptor().

Referenced by LockBuffer().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 5709 of file bufmgr.c.

5710{
5712
5713 if (buf)
5714 {
5716 uint64 unset_bits = 0;
5717
5719
5720 /*
5721 * Don't complain if flag bit not set; it could have been reset but we
5722 * got a cancel/die interrupt before getting the signal.
5723 */
5724 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5725 buf->wait_backend_pgprocno == MyProcNumber)
5727
5729 0, unset_bits,
5730 0);
5731
5733 }
5734}

References BM_PIN_COUNT_WAITER, buf, fb(), LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdrExt().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 5518 of file bufmgr.c.

5519{
5521 ReleaseBuffer(buffer);
5522}

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), gin_refind_parent(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_get_sequence_data(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ UnpinBuffer()

◆ UnpinBufferNoOwner()

static void UnpinBufferNoOwner ( BufferDesc buf)
static

Definition at line 3369 of file bufmgr.c.

3370{
3373
3375
3376 /* not moving as we're likely deleting it soon anyway */
3377 ref = GetPrivateRefCountEntry(b, false);
3378 Assert(ref != NULL);
3379 Assert(ref->data.refcount > 0);
3380 ref->data.refcount--;
3381 if (ref->data.refcount == 0)
3382 {
3384
3385 /*
3386 * Mark buffer non-accessible to Valgrind.
3387 *
3388 * Note that the buffer may have already been marked non-accessible
3389 * within access method code that enforces that buffers are only
3390 * accessed while a buffer lock is held.
3391 */
3393
3394 /*
3395 * I'd better not still hold the buffer content lock. Can't use
3396 * BufferIsLockedByMe(), as that asserts the buffer is pinned.
3397 */
3399
3400 /* decrement the shared reference count */
3402
3403 /* Support LockBufferForCleanup() */
3406
3408 }
3409}
static uint64 pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
Definition atomics.h:541
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
Definition bufmgr.c:551

References Assert, b, BM_PIN_COUNT_WAITER, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, BufferLockHeldByMe(), BufHdrGetBlock, fb(), ForgetPrivateRefCountEntry(), GetPrivateRefCountEntry(), pg_atomic_fetch_sub_u64(), VALGRIND_MAKE_MEM_NOACCESS, and WakePinCountWaiter().

Referenced by ResOwnerReleaseBuffer(), and UnpinBuffer().

◆ WaitBufHdrUnlocked()

pg_noinline uint64 WaitBufHdrUnlocked ( BufferDesc buf)

◆ WaitIO()

static void WaitIO ( BufferDesc buf)
static

Definition at line 6800 of file bufmgr.c.

6801{
6803
6805 for (;;)
6806 {
6809
6810 /*
6811 * It may not be necessary to acquire the spinlock to check the flag
6812 * here, but since this test is essential for correctness, we'd better
6813 * play it safe.
6814 */
6816
6817 /*
6818 * Copy the wait reference while holding the spinlock. This protects
6819 * against a concurrent TerminateBufferIO() in another backend from
6820 * clearing the wref while it's being read.
6821 */
6822 iow = buf->io_wref;
6824
6825 /* no IO in progress, we don't need to wait */
6827 break;
6828
6829 /*
6830 * The buffer has asynchronous IO in progress, wait for it to
6831 * complete.
6832 */
6833 if (pgaio_wref_valid(&iow))
6834 {
6836
6837 /*
6838 * The AIO subsystem internally uses condition variables and thus
6839 * might remove this backend from the BufferDesc's CV. While that
6840 * wouldn't cause a correctness issue (the first CV sleep just
6841 * immediately returns if not already registered), it seems worth
6842 * avoiding unnecessary loop iterations, given that we take care
6843 * to do so at the start of the function.
6844 */
6846 continue;
6847 }
6848
6849 /* wait on BufferDesc->cv, e.g. for concurrent synchronous IO */
6851 }
6853}
void pgaio_wref_wait(PgAioWaitRef *iow)
Definition aio.c:991
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)

References BM_IO_IN_PROGRESS, buf, BufferDescriptorGetIOCV(), ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), fb(), LockBufHdr(), pgaio_wref_valid(), pgaio_wref_wait(), and UnlockBufHdr().

Referenced by InvalidateBuffer(), and StartBufferIO().

◆ WaitReadBuffers()

void WaitReadBuffers ( ReadBuffersOperation operation)

Definition at line 1732 of file bufmgr.c.

1733{
1734 PgAioReturn *aio_ret = &operation->io_return;
1737
1738 if (operation->persistence == RELPERSISTENCE_TEMP)
1739 {
1742 }
1743 else
1744 {
1747 }
1748
1749 /*
1750 * If we get here without an IO operation having been issued, the
1751 * io_method == IOMETHOD_SYNC path must have been used. Otherwise the
1752 * caller should not have called WaitReadBuffers().
1753 *
1754 * In the case of IOMETHOD_SYNC, we start - as we used to before the
1755 * introducing of AIO - the IO in WaitReadBuffers(). This is done as part
1756 * of the retry logic below, no extra code is required.
1757 *
1758 * This path is expected to eventually go away.
1759 */
1760 if (!pgaio_wref_valid(&operation->io_wref) && io_method != IOMETHOD_SYNC)
1761 elog(ERROR, "waiting for read operation that didn't read");
1762
1763 /*
1764 * To handle partial reads, and IOMETHOD_SYNC, we re-issue IO until we're
1765 * done. We may need multiple retries, not just because we could get
1766 * multiple partial reads, but also because some of the remaining
1767 * to-be-read buffers may have been read in by other backends, limiting
1768 * the IO size.
1769 */
1770 while (true)
1771 {
1773
1774 CheckReadBuffersOperation(operation, false);
1775
1776 /*
1777 * If there is an IO associated with the operation, we may need to
1778 * wait for it.
1779 */
1780 if (pgaio_wref_valid(&operation->io_wref))
1781 {
1782 /*
1783 * Track the time spent waiting for the IO to complete. As
1784 * tracking a wait even if we don't actually need to wait
1785 *
1786 * a) is not cheap, due to the timestamping overhead
1787 *
1788 * b) reports some time as waiting, even if we never waited
1789 *
1790 * we first check if we already know the IO is complete.
1791 */
1792 if (aio_ret->result.status == PGAIO_RS_UNKNOWN &&
1793 !pgaio_wref_check_done(&operation->io_wref))
1794 {
1796
1797 pgaio_wref_wait(&operation->io_wref);
1798
1799 /*
1800 * The IO operation itself was already counted earlier, in
1801 * AsyncReadBuffers(), this just accounts for the wait time.
1802 */
1804 io_start, 0, 0);
1805 }
1806 else
1807 {
1808 Assert(pgaio_wref_check_done(&operation->io_wref));
1809 }
1810
1811 /*
1812 * We now are sure the IO completed. Check the results. This
1813 * includes reporting on errors if there were any.
1814 */
1815 ProcessReadBuffersResult(operation);
1816 }
1817
1818 /*
1819 * Most of the time, the one IO we already started, will read in
1820 * everything. But we need to deal with partial reads and buffers not
1821 * needing IO anymore.
1822 */
1823 if (operation->nblocks_done == operation->nblocks)
1824 break;
1825
1827
1828 /*
1829 * This may only complete the IO partially, either because some
1830 * buffers were already valid, or because of a partial read.
1831 *
1832 * NB: In contrast to after the AsyncReadBuffers() call in
1833 * StartReadBuffers(), we do *not* reduce
1834 * ReadBuffersOperation->nblocks here, callers expect the full
1835 * operation to be completed at this point (as more operations may
1836 * have been queued).
1837 */
1839 }
1840
1841 CheckReadBuffersOperation(operation, true);
1842
1843 /* NB: READ_DONE tracepoint was already executed in completion callback */
1844}
bool pgaio_wref_check_done(PgAioWaitRef *iow)
Definition aio.c:1005
static void ProcessReadBuffersResult(ReadBuffersOperation *operation)
Definition bufmgr.c:1693

References Assert, AsyncReadBuffers(), CHECK_FOR_INTERRUPTS, CheckReadBuffersOperation(), elog, ERROR, fb(), io_method, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOMETHOD_SYNC, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_READ, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_RS_UNKNOWN, pgaio_wref_check_done(), pgaio_wref_valid(), pgaio_wref_wait(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), ProcessReadBuffersResult(), ReadBuffersOperation::strategy, and track_io_timing.

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ WakePinCountWaiter()

static void WakePinCountWaiter ( BufferDesc buf)
static

Definition at line 3324 of file bufmgr.c.

3325{
3326 /*
3327 * Acquire the buffer header lock, re-check that there's a waiter. Another
3328 * backend could have unpinned this buffer, and already woken up the
3329 * waiter.
3330 *
3331 * There's no danger of the buffer being replaced after we unpinned it
3332 * above, as it's pinned by the waiter. The waiter removes
3333 * BM_PIN_COUNT_WAITER if it stops waiting for a reason other than this
3334 * backend waking it up.
3335 */
3337
3340 {
3341 /* we just released the last pin other than the waiter's */
3342 int wait_backend_pgprocno = buf->wait_backend_pgprocno;
3343
3346 0);
3347 ProcSendSignal(wait_backend_pgprocno);
3348 }
3349 else
3351}
void ProcSendSignal(ProcNumber procNumber)
Definition proc.c:1992

References BM_PIN_COUNT_WAITER, buf, BUF_STATE_GET_REFCOUNT, fb(), LockBufHdr(), ProcSendSignal(), UnlockBufHdr(), and UnlockBufHdrExt().

Referenced by TerminateBufferIO(), and UnpinBufferNoOwner().

◆ WritebackContextInit()

void WritebackContextInit ( WritebackContext context,
int max_pending 
)

Definition at line 7257 of file bufmgr.c.

7258{
7259 Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES);
7260
7261 context->max_pending = max_pending;
7262 context->nr_pending = 0;
7263}

References Assert, WritebackContext::max_pending, WritebackContext::nr_pending, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by BackgroundWriterMain(), BufferManagerShmemInit(), and BufferSync().

◆ ZeroAndLockBuffer()

static void ZeroAndLockBuffer ( Buffer  buffer,
ReadBufferMode  mode,
bool  already_valid 
)
static

Definition at line 1131 of file bufmgr.c.

1132{
1134 bool need_to_zero;
1135 bool isLocalBuf = BufferIsLocal(buffer);
1136
1138
1139 if (already_valid)
1140 {
1141 /*
1142 * If the caller already knew the buffer was valid, we can skip some
1143 * header interaction. The caller just wants to lock the buffer.
1144 */
1145 need_to_zero = false;
1146 }
1147 else if (isLocalBuf)
1148 {
1149 /* Simple case for non-shared buffers. */
1150 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1151 need_to_zero = StartLocalBufferIO(bufHdr, true, false);
1152 }
1153 else
1154 {
1155 /*
1156 * Take BM_IO_IN_PROGRESS, or discover that BM_VALID has been set
1157 * concurrently. Even though we aren't doing I/O, that ensures that
1158 * we don't zero a page that someone else has pinned. An exclusive
1159 * content lock wouldn't be enough, because readers are allowed to
1160 * drop the content lock after determining that a tuple is visible
1161 * (see buffer access rules in README).
1162 */
1163 bufHdr = GetBufferDescriptor(buffer - 1);
1164 need_to_zero = StartBufferIO(bufHdr, true, false);
1165 }
1166
1167 if (need_to_zero)
1168 {
1169 memset(BufferGetPage(buffer), 0, BLCKSZ);
1170
1171 /*
1172 * Grab the buffer content lock before marking the page as valid, to
1173 * make sure that no other backend sees the zeroed page before the
1174 * caller has had a chance to initialize it.
1175 *
1176 * Since no-one else can be looking at the page contents yet, there is
1177 * no difference between an exclusive lock and a cleanup-strength
1178 * lock. (Note that we cannot use LockBuffer() or
1179 * LockBufferForCleanup() here, because they assert that the buffer is
1180 * already valid.)
1181 */
1182 if (!isLocalBuf)
1184
1185 /* Set BM_VALID, terminate IO, and wake up any waiters */
1186 if (isLocalBuf)
1187 TerminateLocalBufferIO(bufHdr, false, BM_VALID, false);
1188 else
1189 TerminateBufferIO(bufHdr, false, BM_VALID, true, false);
1190 }
1191 else if (!isLocalBuf)
1192 {
1193 /*
1194 * The buffer is valid, so we can't zero it. The caller still expects
1195 * the page to be locked on return.
1196 */
1197 if (mode == RBM_ZERO_AND_LOCK)
1199 else
1200 LockBufferForCleanup(buffer);
1201 }
1202}
void LockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6517

References Assert, BM_VALID, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), BufferIsLocal, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), LockBuffer(), LockBufferForCleanup(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, StartBufferIO(), StartLocalBufferIO(), TerminateBufferIO(), and TerminateLocalBufferIO().

Referenced by ReadBuffer_common().

Variable Documentation

◆ aio_local_buffer_readv_cb

const PgAioHandleCallbacks aio_local_buffer_readv_cb
Initial value:
= {
.complete_local = local_buffer_readv_complete,
}
static PgAioResult local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8503
static void local_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
Definition bufmgr.c:8497
static void buffer_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
Definition bufmgr.c:8351

Definition at line 8519 of file bufmgr.c.

8519 {
8520 .stage = local_buffer_readv_stage,
8521
8522 /*
8523 * Note that this, in contrast to the shared_buffers case, uses
8524 * complete_local, as only the issuing backend has access to the required
8525 * datastructures. This is important in case the IO completion may be
8526 * consumed incidentally by another backend.
8527 */
8528 .complete_local = local_buffer_readv_complete,
8529 .report = buffer_readv_report,
8530};

◆ aio_shared_buffer_readv_cb

const PgAioHandleCallbacks aio_shared_buffer_readv_cb
Initial value:
= {
.complete_shared = shared_buffer_readv_complete,
}
static PgAioResult shared_buffer_readv_complete_local(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8466
static void shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
Definition bufmgr.c:8446
static PgAioResult shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8452

Definition at line 8510 of file bufmgr.c.

8510 {
8512 .complete_shared = shared_buffer_readv_complete,
8513 /* need a local callback to report checksum failures */
8514 .complete_local = shared_buffer_readv_complete_local,
8515 .report = buffer_readv_report,
8516};

◆ backend_flush_after

int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER

Definition at line 209 of file bufmgr.c.

Referenced by BufferManagerShmemInit().

◆ bgwriter_flush_after

int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER

Definition at line 208 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

int bgwriter_lru_maxpages = 100

Definition at line 174 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

double bgwriter_lru_multiplier = 2.0

Definition at line 175 of file bufmgr.c.

Referenced by BgBufferSync().

◆ buffer_io_resowner_desc

const ResourceOwnerDesc buffer_io_resowner_desc
Initial value:
=
{
.name = "buffer io",
.release_priority = RELEASE_PRIO_BUFFER_IOS,
.ReleaseResource = ResOwnerReleaseBufferIO,
.DebugPrint = ResOwnerPrintBufferIO
}
static void ResOwnerReleaseBufferIO(Datum res)
Definition bufmgr.c:7402
static char * ResOwnerPrintBufferIO(Datum res)
Definition bufmgr.c:7410
#define RELEASE_PRIO_BUFFER_IOS
Definition resowner.h:62
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition resowner.h:54

Definition at line 269 of file bufmgr.c.

270{
271 .name = "buffer io",
272 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
273 .release_priority = RELEASE_PRIO_BUFFER_IOS,
274 .ReleaseResource = ResOwnerReleaseBufferIO,
275 .DebugPrint = ResOwnerPrintBufferIO
276};

Referenced by ResourceOwnerForgetBufferIO(), and ResourceOwnerRememberBufferIO().

◆ buffer_resowner_desc

const ResourceOwnerDesc buffer_resowner_desc
Initial value:
=
{
.name = "buffer",
.release_priority = RELEASE_PRIO_BUFFER_PINS,
.ReleaseResource = ResOwnerReleaseBuffer,
.DebugPrint = ResOwnerPrintBuffer
}
static void ResOwnerReleaseBuffer(Datum res)
Definition bufmgr.c:7424
static char * ResOwnerPrintBuffer(Datum res)
Definition bufmgr.c:7460
#define RELEASE_PRIO_BUFFER_PINS
Definition resowner.h:63

Definition at line 278 of file bufmgr.c.

279{
280 .name = "buffer",
281 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
282 .release_priority = RELEASE_PRIO_BUFFER_PINS,
283 .ReleaseResource = ResOwnerReleaseBuffer,
284 .DebugPrint = ResOwnerPrintBuffer
285};

Referenced by ResourceOwnerForgetBuffer(), and ResourceOwnerRememberBuffer().

◆ checkpoint_flush_after

int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER

Definition at line 207 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

◆ io_combine_limit

◆ io_combine_limit_guc

int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT

Definition at line 200 of file bufmgr.c.

Referenced by assign_io_max_combine_limit().

◆ io_max_combine_limit

◆ maintenance_io_concurrency

◆ MaxProportionalPins

uint32 MaxProportionalPins
static

Definition at line 255 of file bufmgr.c.

Referenced by GetAdditionalPinLimit(), GetPinLimit(), and InitBufferManagerAccess().

◆ PinCountWaitBuf

BufferDesc* PinCountWaitBuf = NULL
static

Definition at line 212 of file bufmgr.c.

Referenced by LockBufferForCleanup(), and UnlockBuffers().

◆ PrivateRefCountArray

◆ PrivateRefCountArrayKeys

◆ PrivateRefCountClock

uint32 PrivateRefCountClock = 0
static

Definition at line 251 of file bufmgr.c.

Referenced by ReservePrivateRefCountEntry().

◆ PrivateRefCountEntryLast

int PrivateRefCountEntryLast = -1
static

◆ PrivateRefCountHash

◆ PrivateRefCountOverflowed

◆ ReservedRefCountSlot

int ReservedRefCountSlot = -1
static

◆ track_io_timing

◆ zero_damaged_pages

bool zero_damaged_pages = false

Definition at line 173 of file bufmgr.c.

Referenced by AsyncReadBuffers(), mdreadv(), and read_rel_block_ll().