PostgreSQL Source Code git master
Loading...
Searching...
No Matches
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/aio.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/proclist.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include "lib/sort_template.h"
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountData
 
struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define READV_COUNT_BITS   7
 
#define READV_COUNT_MASK   ((1 << READV_COUNT_BITS) - 1)
 

Typedefs

typedef struct PrivateRefCountData PrivateRefCountData
 
typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static void ResOwnerReleaseBufferIO (Datum res)
 
static charResOwnerPrintBufferIO (Datum res)
 
static void ResOwnerReleaseBuffer (Datum res)
 
static charResOwnerPrintBuffer (Datum res)
 
static pg_noinline PrivateRefCountEntryGetPrivateRefCountEntrySlow (Buffer buffer, bool do_move)
 
static Buffer ReadBuffer_common (Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static BlockNumber ExtendBufferedRelCommon (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static BlockNumber ExtendBufferedRelShared (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void UnpinBufferNoOwner (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static void AbortBufferIO (Buffer buffer)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
 
static bool AsyncReadBuffers (ReadBuffersOperation *operation, int *nblocks_progress)
 
static void CheckReadBuffersOperation (ReadBuffersOperation *operation, bool is_complete)
 
static Buffer GetVictimBuffer (BufferAccessStrategy strategy, IOContext io_context)
 
static void FlushUnlockedBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
static void BufferLockAcquire (Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockUnlock (Buffer buffer, BufferDesc *buf_hdr)
 
static bool BufferLockConditional (Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
 
static bool BufferLockHeldByMeInMode (BufferDesc *buf_hdr, BufferLockMode mode)
 
static bool BufferLockHeldByMe (BufferDesc *buf_hdr)
 
static void BufferLockDisown (Buffer buffer, BufferDesc *buf_hdr)
 
static int BufferLockDisownInternal (Buffer buffer, BufferDesc *buf_hdr)
 
static bool BufferLockAttempt (BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockQueueSelf (BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockDequeueSelf (BufferDesc *buf_hdr)
 
static void BufferLockWakeup (BufferDesc *buf_hdr, bool unlocked)
 
static void BufferLockProcessRelease (BufferDesc *buf_hdr, BufferLockMode mode, uint64 lockstate)
 
static uint64 BufferLockReleaseSub (BufferLockMode mode)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
static void ZeroAndLockBuffer (Buffer buffer, ReadBufferMode mode, bool already_valid)
 
static pg_attribute_always_inline Buffer PinBufferForBlock (Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static pg_attribute_always_inline bool StartReadBuffersImpl (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
static bool ReadBuffersCanStartIOOnce (Buffer buffer, bool nowait)
 
static bool ReadBuffersCanStartIO (Buffer buffer, bool nowait)
 
static void ProcessReadBuffersResult (ReadBuffersOperation *operation)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
static void InvalidateBuffer (BufferDesc *buf)
 
static bool InvalidateVictimBuffer (BufferDesc *buf_hdr)
 
uint32 GetPinLimit (void)
 
uint32 GetAdditionalPinLimit (void)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
bool BufferIsLockedByMe (Buffer buffer)
 
bool BufferIsLockedByMeInMode (Buffer buffer, BufferLockMode mode)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
static void WakePinCountWaiter (BufferDesc *buf)
 
void TrackNewBufferPin (Buffer buf)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferManagerAccess (void)
 
charDebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void UnlockBuffer (Buffer buffer)
 
void LockBufferInternal (Buffer buffer, BufferLockMode mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
bool StartBufferIO (BufferDesc *buf, bool forInput, bool nowait)
 
void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint64 set_flag_bits, bool forget_owner, bool release_aio)
 
uint64 LockBufHdr (BufferDesc *desc)
 
pg_noinline uint64 WaitBufHdrUnlocked (BufferDesc *buf)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *wb_context, IOContext io_context)
 
static bool EvictUnpinnedBufferInternal (BufferDesc *desc, bool *buffer_flushed)
 
bool EvictUnpinnedBuffer (Buffer buf, bool *buffer_flushed)
 
void EvictAllUnpinnedBuffers (int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
void EvictRelUnpinnedBuffers (Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
static bool MarkDirtyUnpinnedBufferInternal (Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
 
bool MarkDirtyUnpinnedBuffer (Buffer buf, bool *buffer_already_dirty)
 
void MarkDirtyRelUnpinnedBuffers (Relation rel, int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
void MarkDirtyAllUnpinnedBuffers (int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
static pg_attribute_always_inline void buffer_stage_common (PgAioHandle *ioh, bool is_write, bool is_temp)
 
static void buffer_readv_decode_error (PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
 
static void buffer_readv_encode_error (PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
 
static pg_attribute_always_inline void buffer_readv_complete_one (PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
 
static pg_attribute_always_inline PgAioResult buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
 
static void buffer_readv_report (PgAioResult result, const PgAioTargetData *td, int elevel)
 
static void shared_buffer_readv_stage (PgAioHandle *ioh, uint8 cb_data)
 
static PgAioResult shared_buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 
static PgAioResult shared_buffer_readv_complete_local (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 
static void local_buffer_readv_stage (PgAioHandle *ioh, uint8 cb_data)
 
static PgAioResult local_buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int io_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT
 
int io_max_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescPinCountWaitBuf = NULL
 
static Buffer PrivateRefCountArrayKeys [REFCOUNT_ARRAY_ENTRIES]
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static int ReservedRefCountSlot = -1
 
static int PrivateRefCountEntryLast = -1
 
static uint32 MaxProportionalPins
 
const ResourceOwnerDesc buffer_io_resowner_desc
 
const ResourceOwnerDesc buffer_resowner_desc
 
const PgAioHandleCallbacks aio_shared_buffer_readv_cb
 
const PgAioHandleCallbacks aio_local_buffer_readv_cb
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 92 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 82 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 81 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 74 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
false \
: \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition bufmgr.c:528
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
int32 * LocalRefCount
Definition localbuf.c:49
static int fb(int x)

Definition at line 589 of file bufmgr.c.

593 : \
595 (LocalRefCount[-(bufnum) - 1] > 0) \
596 : \
598)

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 73 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 77 of file bufmgr.c.

◆ READV_COUNT_BITS

#define READV_COUNT_BITS   7

◆ READV_COUNT_MASK

#define READV_COUNT_MASK   ((1 << READV_COUNT_BITS) - 1)

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 129 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 84 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 3440 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 3440 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 3442 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 3442 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 3439 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 3439 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 3441 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 3441 of file bufmgr.c.

◆ ST_SORT [1/2]

Definition at line 3438 of file bufmgr.c.

◆ ST_SORT [2/2]

Definition at line 3438 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

◆ PrivateRefCountData

◆ PrivateRefCountEntry

◆ SMgrSortArray

Function Documentation

◆ AbortBufferIO()

static void AbortBufferIO ( Buffer  buffer)
static

Definition at line 7009 of file bufmgr.c.

7010{
7011 BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1);
7013
7016
7017 if (!(buf_state & BM_VALID))
7018 {
7021 }
7022 else
7023 {
7026
7027 /* Issue notice if this is not the first failure... */
7028 if (buf_state & BM_IO_ERROR)
7029 {
7030 /* Buffer is pinned, so we can read tag without spinlock */
7033 errmsg("could not write block %u of %s",
7034 buf_hdr->tag.blockNum,
7036 BufTagGetForkNum(&buf_hdr->tag)).str),
7037 errdetail("Multiple failures --- write error might be permanent.")));
7038 }
7039 }
7040
7041 TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false, false);
7042}
#define BM_TAG_VALID
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc)
#define BM_DIRTY
#define BM_IO_IN_PROGRESS
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
#define BM_IO_ERROR
static BufferDesc * GetBufferDescriptor(uint32 id)
uint64 LockBufHdr(BufferDesc *desc)
Definition bufmgr.c:7107
void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint64 set_flag_bits, bool forget_owner, bool release_aio)
Definition bufmgr.c:6947
#define Assert(condition)
Definition c.h:873
uint64_t uint64
Definition c.h:547
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define WARNING
Definition elog.h:36
#define ereport(elevel,...)
Definition elog.h:150
#define relpathperm(rlocator, forknum)
Definition relpath.h:146

References Assert, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), fb(), GetBufferDescriptor(), LockBufHdr(), relpathperm, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by ResOwnerReleaseBufferIO().

◆ AsyncReadBuffers()

static bool AsyncReadBuffers ( ReadBuffersOperation operation,
int nblocks_progress 
)
static

Definition at line 1864 of file bufmgr.c.

1865{
1866 Buffer *buffers = &operation->buffers[0];
1867 int flags = operation->flags;
1868 BlockNumber blocknum = operation->blocknum;
1869 ForkNumber forknum = operation->forknum;
1870 char persistence = operation->persistence;
1871 int16 nblocks_done = operation->nblocks_done;
1872 Buffer *io_buffers = &operation->buffers[nblocks_done];
1873 int io_buffers_len = 0;
1875 uint32 ioh_flags = 0;
1879 bool did_start_io;
1880
1881 /*
1882 * When this IO is executed synchronously, either because the caller will
1883 * immediately block waiting for the IO or because IOMETHOD_SYNC is used,
1884 * the AIO subsystem needs to know.
1885 */
1886 if (flags & READ_BUFFERS_SYNCHRONOUSLY)
1888
1889 if (persistence == RELPERSISTENCE_TEMP)
1890 {
1894 }
1895 else
1896 {
1899 }
1900
1901 /*
1902 * If zero_damaged_pages is enabled, add the READ_BUFFERS_ZERO_ON_ERROR
1903 * flag. The reason for that is that, hopefully, zero_damaged_pages isn't
1904 * set globally, but on a per-session basis. The completion callback,
1905 * which may be run in other processes, e.g. in IO workers, may have a
1906 * different value of the zero_damaged_pages GUC.
1907 *
1908 * XXX: We probably should eventually use a different flag for
1909 * zero_damaged_pages, so we can report different log levels / error codes
1910 * for zero_damaged_pages and ZERO_ON_ERROR.
1911 */
1914
1915 /*
1916 * For the same reason as with zero_damaged_pages we need to use this
1917 * backend's ignore_checksum_failure value.
1918 */
1921
1922
1923 /*
1924 * To be allowed to report stats in the local completion callback we need
1925 * to prepare to report stats now. This ensures we can safely report the
1926 * checksum failure even in a critical section.
1927 */
1929
1930 /*
1931 * Get IO handle before ReadBuffersCanStartIO(), as pgaio_io_acquire()
1932 * might block, which we don't want after setting IO_IN_PROGRESS.
1933 *
1934 * If we need to wait for IO before we can get a handle, submit
1935 * already-staged IO first, so that other backends don't need to wait.
1936 * There wouldn't be a deadlock risk, as pgaio_io_acquire() just needs to
1937 * wait for already submitted IO, which doesn't require additional locks,
1938 * but it could still cause undesirable waits.
1939 *
1940 * A secondary benefit is that this would allow us to measure the time in
1941 * pgaio_io_acquire() without causing undue timer overhead in the common,
1942 * non-blocking, case. However, currently the pgstats infrastructure
1943 * doesn't really allow that, as it a) asserts that an operation can't
1944 * have time without operations b) doesn't have an API to report
1945 * "accumulated" time.
1946 */
1948 if (unlikely(!ioh))
1949 {
1951
1953 }
1954
1955 /*
1956 * Check if we can start IO on the first to-be-read buffer.
1957 *
1958 * If an I/O is already in progress in another backend, we want to wait
1959 * for the outcome: either done, or something went wrong and we will
1960 * retry.
1961 */
1962 if (!ReadBuffersCanStartIO(buffers[nblocks_done], false))
1963 {
1964 /*
1965 * Someone else has already completed this block, we're done.
1966 *
1967 * When IO is necessary, ->nblocks_done is updated in
1968 * ProcessReadBuffersResult(), but that is not called if no IO is
1969 * necessary. Thus update here.
1970 */
1971 operation->nblocks_done += 1;
1972 *nblocks_progress = 1;
1973
1975 pgaio_wref_clear(&operation->io_wref);
1976 did_start_io = false;
1977
1978 /*
1979 * Report and track this as a 'hit' for this backend, even though it
1980 * must have started out as a miss in PinBufferForBlock(). The other
1981 * backend will track this as a 'read'.
1982 */
1983 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + operation->nblocks_done,
1984 operation->smgr->smgr_rlocator.locator.spcOid,
1985 operation->smgr->smgr_rlocator.locator.dbOid,
1986 operation->smgr->smgr_rlocator.locator.relNumber,
1987 operation->smgr->smgr_rlocator.backend,
1988 true);
1989
1990 if (persistence == RELPERSISTENCE_TEMP)
1992 else
1994
1995 if (operation->rel)
1996 pgstat_count_buffer_hit(operation->rel);
1997
1999
2000 if (VacuumCostActive)
2002 }
2003 else
2004 {
2006
2007 /* We found a buffer that we need to read in. */
2008 Assert(io_buffers[0] == buffers[nblocks_done]);
2009 io_pages[0] = BufferGetBlock(buffers[nblocks_done]);
2010 io_buffers_len = 1;
2011
2012 /*
2013 * How many neighboring-on-disk blocks can we scatter-read into other
2014 * buffers at the same time? In this case we don't wait if we see an
2015 * I/O already in progress. We already set BM_IO_IN_PROGRESS for the
2016 * head block, so we should get on with that I/O as soon as possible.
2017 */
2018 for (int i = nblocks_done + 1; i < operation->nblocks; i++)
2019 {
2020 if (!ReadBuffersCanStartIO(buffers[i], true))
2021 break;
2022 /* Must be consecutive block numbers. */
2023 Assert(BufferGetBlockNumber(buffers[i - 1]) ==
2024 BufferGetBlockNumber(buffers[i]) - 1);
2025 Assert(io_buffers[io_buffers_len] == buffers[i]);
2026
2027 io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
2028 }
2029
2030 /* get a reference to wait for in WaitReadBuffers() */
2031 pgaio_io_get_wref(ioh, &operation->io_wref);
2032
2033 /* provide the list of buffers to the completion callbacks */
2035
2037 persistence == RELPERSISTENCE_TEMP ?
2040 flags);
2041
2043
2044 /* ---
2045 * Even though we're trying to issue IO asynchronously, track the time
2046 * in smgrstartreadv():
2047 * - if io_method == IOMETHOD_SYNC, we will always perform the IO
2048 * immediately
2049 * - the io method might not support the IO (e.g. worker IO for a temp
2050 * table)
2051 * ---
2052 */
2054 smgrstartreadv(ioh, operation->smgr, forknum,
2055 blocknum + nblocks_done,
2059
2060 if (persistence == RELPERSISTENCE_TEMP)
2062 else
2064
2065 /*
2066 * Track vacuum cost when issuing IO, not after waiting for it.
2067 * Otherwise we could end up issuing a lot of IO in a short timespan,
2068 * despite a low cost limit.
2069 */
2070 if (VacuumCostActive)
2072
2074 did_start_io = true;
2075 }
2076
2077 return did_start_io;
2078}
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition aio.c:162
void pgaio_wref_clear(PgAioWaitRef *iow)
Definition aio.c:964
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
Definition aio.c:366
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition aio.c:330
void pgaio_submit_staged(void)
Definition aio.c:1123
void pgaio_io_release(PgAioHandle *ioh)
Definition aio.c:240
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition aio.c:188
@ PGAIO_HCB_LOCAL_BUFFER_READV
Definition aio.h:200
@ PGAIO_HCB_SHARED_BUFFER_READV
Definition aio.h:198
@ PGAIO_HF_SYNCHRONOUS
Definition aio.h:70
@ PGAIO_HF_REFERENCES_LOCAL
Definition aio.h:60
void pgaio_io_set_handle_data_32(PgAioHandle *ioh, uint32 *data, uint8 len)
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
bool track_io_timing
Definition bufmgr.c:176
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4356
static bool ReadBuffersCanStartIO(Buffer buffer, bool nowait)
Definition bufmgr.c:1664
bool zero_damaged_pages
Definition bufmgr.c:173
#define READ_BUFFERS_ZERO_ON_ERROR
Definition bufmgr.h:122
static Block BufferGetBlock(Buffer buffer)
Definition bufmgr.h:433
#define MAX_IO_COMBINE_LIMIT
Definition bufmgr.h:173
#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES
Definition bufmgr.h:126
#define READ_BUFFERS_SYNCHRONOUSLY
Definition bufmgr.h:128
bool ignore_checksum_failure
Definition bufpage.c:27
int16_t int16
Definition c.h:541
#define unlikely(x)
Definition c.h:412
uint32_t uint32
Definition c.h:546
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition freelist.c:747
int VacuumCostPageMiss
Definition globals.c:152
bool VacuumCostActive
Definition globals.c:158
int VacuumCostBalance
Definition globals.c:157
int VacuumCostPageHit
Definition globals.c:151
BufferUsage pgBufferUsage
Definition instrument.c:20
int i
Definition isn.c:77
IOObject
Definition pgstat.h:276
@ IOOBJECT_RELATION
Definition pgstat.h:277
@ IOOBJECT_TEMP_RELATION
Definition pgstat.h:278
IOContext
Definition pgstat.h:285
@ IOCONTEXT_NORMAL
Definition pgstat.h:289
@ IOOP_READ
Definition pgstat.h:315
@ IOOP_HIT
Definition pgstat.h:309
#define pgstat_count_buffer_hit(rel)
Definition pgstat.h:720
void pgstat_prepare_report_checksum_failure(Oid dboid)
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:68
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
ForkNumber
Definition relpath.h:56
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
void smgrstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition smgr.c:753
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 shared_blks_hit
Definition instrument.h:26
ForkNumber forknum
Definition bufmgr.h:137
PgAioWaitRef io_wref
Definition bufmgr.h:150
SMgrRelation smgr
Definition bufmgr.h:135
BufferAccessStrategy strategy
Definition bufmgr.h:138
BlockNumber blocknum
Definition bufmgr.h:146
PgAioReturn io_return
Definition bufmgr.h:151
RelFileLocator locator
RelFileNumber relNumber
RelFileLocatorBackend smgr_rlocator
Definition smgr.h:38

References Assert, RelFileLocatorBackend::backend, ReadBuffersOperation::blocknum, BufferGetBlock(), BufferGetBlockNumber(), ReadBuffersOperation::buffers, CurrentResourceOwner, RelFileLocator::dbOid, fb(), ReadBuffersOperation::flags, ReadBuffersOperation::forknum, i, ignore_checksum_failure, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, IOOP_READ, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, RelFileLocatorBackend::locator, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_HCB_LOCAL_BUFFER_READV, PGAIO_HCB_SHARED_BUFFER_READV, PGAIO_HF_REFERENCES_LOCAL, PGAIO_HF_SYNCHRONOUS, pgaio_io_acquire(), pgaio_io_acquire_nb(), pgaio_io_get_wref(), pgaio_io_register_callbacks(), pgaio_io_release(), pgaio_io_set_flag(), pgaio_io_set_handle_data_32(), pgaio_submit_staged(), pgaio_wref_clear(), pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_io_op(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_prepare_report_checksum_failure(), READ_BUFFERS_IGNORE_CHECKSUM_FAILURES, READ_BUFFERS_SYNCHRONOUSLY, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersCanStartIO(), ReadBuffersOperation::rel, RelFileLocator::relNumber, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, ReadBuffersOperation::smgr, SMgrRelationData::smgr_rlocator, smgrstartreadv(), RelFileLocator::spcOid, ReadBuffersOperation::strategy, track_io_timing, unlikely, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, and zero_damaged_pages.

Referenced by StartReadBuffersImpl(), and WaitReadBuffers().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 4103 of file bufmgr.c.

4104{
4106
4108
4110}
static void CheckForBufferLeaks(void)
Definition bufmgr.c:4173
static int32 PrivateRefCountOverflowed
Definition bufmgr.c:250
void AtEOXact_LocalBuffers(bool isCommit)
Definition localbuf.c:1003

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), fb(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 4155 of file bufmgr.c.

4156{
4157 UnlockBuffers();
4158
4160
4161 /* localbuf.c needs a chance too */
4163}
void UnlockBuffers(void)
Definition bufmgr.c:5709
void AtProcExit_LocalBuffers(void)
Definition localbuf.c:1014

References AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferManagerAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3735 of file bufmgr.c.

3736{
3737 /* info obtained from freelist.c */
3738 int strategy_buf_id;
3741
3742 /*
3743 * Information saved between calls so we can determine the strategy
3744 * point's advance rate and avoid scanning already-cleaned buffers.
3745 */
3746 static bool saved_info_valid = false;
3747 static int prev_strategy_buf_id;
3749 static int next_to_clean;
3750 static uint32 next_passes;
3751
3752 /* Moving averages of allocation rate and clean-buffer density */
3753 static float smoothed_alloc = 0;
3754 static float smoothed_density = 10.0;
3755
3756 /* Potentially these could be tunables, but for now, not */
3757 float smoothing_samples = 16;
3758 float scan_whole_pool_milliseconds = 120000.0;
3759
3760 /* Used to compute how far we scan ahead */
3761 long strategy_delta;
3762 int bufs_to_lap;
3763 int bufs_ahead;
3764 float scans_per_alloc;
3767 int min_scan_buffers;
3768
3769 /* Variables for the scanning loop proper */
3770 int num_to_scan;
3771 int num_written;
3772 int reusable_buffers;
3773
3774 /* Variables for final smoothed_density update */
3775 long new_strategy_delta;
3777
3778 /*
3779 * Find out where the clock-sweep currently is, and how many buffer
3780 * allocations have happened since our last call.
3781 */
3783
3784 /* Report buffer alloc counts to pgstat */
3786
3787 /*
3788 * If we're not running the LRU scan, just stop after doing the stats
3789 * stuff. We mark the saved state invalid so that we can recover sanely
3790 * if LRU scan is turned back on later.
3791 */
3792 if (bgwriter_lru_maxpages <= 0)
3793 {
3794 saved_info_valid = false;
3795 return true;
3796 }
3797
3798 /*
3799 * Compute strategy_delta = how many buffers have been scanned by the
3800 * clock-sweep since last time. If first time through, assume none. Then
3801 * see if we are still ahead of the clock-sweep, and if so, how many
3802 * buffers we could scan before we'd catch up with it and "lap" it. Note:
3803 * weird-looking coding of xxx_passes comparisons are to avoid bogus
3804 * behavior when the passes counts wrap around.
3805 */
3806 if (saved_info_valid)
3807 {
3809
3812
3813 Assert(strategy_delta >= 0);
3814
3815 if ((int32) (next_passes - strategy_passes) > 0)
3816 {
3817 /* we're one pass ahead of the strategy point */
3819#ifdef BGW_DEBUG
3820 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3824#endif
3825 }
3826 else if (next_passes == strategy_passes &&
3828 {
3829 /* on same pass, but ahead or at least not behind */
3831#ifdef BGW_DEBUG
3832 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3836#endif
3837 }
3838 else
3839 {
3840 /*
3841 * We're behind, so skip forward to the strategy point and start
3842 * cleaning from there.
3843 */
3844#ifdef BGW_DEBUG
3845 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3849#endif
3853 }
3854 }
3855 else
3856 {
3857 /*
3858 * Initializing at startup or after LRU scanning had been off. Always
3859 * start at the strategy point.
3860 */
3861#ifdef BGW_DEBUG
3862 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3864#endif
3865 strategy_delta = 0;
3869 }
3870
3871 /* Update saved info for next time */
3874 saved_info_valid = true;
3875
3876 /*
3877 * Compute how many buffers had to be scanned for each new allocation, ie,
3878 * 1/density of reusable buffers, and track a moving average of that.
3879 *
3880 * If the strategy point didn't move, we don't update the density estimate
3881 */
3882 if (strategy_delta > 0 && recent_alloc > 0)
3883 {
3887 }
3888
3889 /*
3890 * Estimate how many reusable buffers there are between the current
3891 * strategy point and where we've scanned ahead to, based on the smoothed
3892 * density estimate.
3893 */
3896
3897 /*
3898 * Track a moving average of recent buffer allocations. Here, rather than
3899 * a true average we want a fast-attack, slow-decline behavior: we
3900 * immediately follow any increase.
3901 */
3902 if (smoothed_alloc <= (float) recent_alloc)
3904 else
3907
3908 /* Scale the estimate by a GUC to allow more aggressive tuning. */
3910
3911 /*
3912 * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3913 * eventually underflow to zero, and the underflows produce annoying
3914 * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3915 * zero, there's no point in tracking smaller and smaller values of
3916 * smoothed_alloc, so just reset it to exactly zero to avoid this
3917 * syndrome. It will pop back up as soon as recent_alloc increases.
3918 */
3919 if (upcoming_alloc_est == 0)
3920 smoothed_alloc = 0;
3921
3922 /*
3923 * Even in cases where there's been little or no buffer allocation
3924 * activity, we want to make a small amount of progress through the buffer
3925 * cache so that as many reusable buffers as possible are clean after an
3926 * idle period.
3927 *
3928 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3929 * the BGW will be called during the scan_whole_pool time; slice the
3930 * buffer pool into that many sections.
3931 */
3933
3935 {
3936#ifdef BGW_DEBUG
3937 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3939#endif
3941 }
3942
3943 /*
3944 * Now write out dirty reusable buffers, working forward from the
3945 * next_to_clean point, until we have lapped the strategy scan, or cleaned
3946 * enough buffers to match our estimate of the next cycle's allocation
3947 * requirements, or hit the bgwriter_lru_maxpages limit.
3948 */
3949
3950 num_to_scan = bufs_to_lap;
3951 num_written = 0;
3953
3954 /* Execute the LRU scan */
3955 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3956 {
3958 wb_context);
3959
3960 if (++next_to_clean >= NBuffers)
3961 {
3962 next_to_clean = 0;
3963 next_passes++;
3964 }
3965 num_to_scan--;
3966
3967 if (sync_state & BUF_WRITTEN)
3968 {
3971 {
3973 break;
3974 }
3975 }
3976 else if (sync_state & BUF_REUSABLE)
3978 }
3979
3981
3982#ifdef BGW_DEBUG
3983 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3986 bufs_to_lap - num_to_scan,
3989#endif
3990
3991 /*
3992 * Consider the above scan as being like a new allocation scan.
3993 * Characterize its density and update the smoothed one based on it. This
3994 * effectively halves the moving average period in cases where both the
3995 * strategy and the background writer are doing some useful scanning,
3996 * which is helpful because a long memory isn't as desirable on the
3997 * density estimates.
3998 */
3999 new_strategy_delta = bufs_to_lap - num_to_scan;
4001 if (new_strategy_delta > 0 && new_recent_alloc > 0)
4002 {
4006
4007#ifdef BGW_DEBUG
4008 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
4011#endif
4012 }
4013
4014 /* Return true if OK to hibernate */
4015 return (bufs_to_lap == 0 && recent_alloc == 0);
4016}
int BgWriterDelay
Definition bgwriter.c:58
#define BUF_REUSABLE
Definition bufmgr.c:82
double bgwriter_lru_multiplier
Definition bufmgr.c:175
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition bufmgr.c:4033
int bgwriter_lru_maxpages
Definition bufmgr.c:174
#define BUF_WRITTEN
Definition bufmgr.c:81
int32_t int32
Definition c.h:542
#define DEBUG2
Definition elog.h:29
#define DEBUG1
Definition elog.h:30
#define elog(elevel,...)
Definition elog.h:226
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition freelist.c:321
int NBuffers
Definition globals.c:142
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition pgstat.h:242
PgStat_Counter maxwritten_clean
Definition pgstat.h:243
PgStat_Counter buf_alloc
Definition pgstat.h:244

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, fb(), PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ buffer_readv_complete()

static pg_attribute_always_inline PgAioResult buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data,
bool  is_temp 
)
static

Definition at line 8258 of file bufmgr.c.

8260{
8261 PgAioResult result = prior_result;
8266 uint8 error_count = 0;
8267 uint8 zeroed_count = 0;
8268 uint8 ignored_count = 0;
8270 uint64 *io_data;
8271 uint8 handle_data_len;
8272
8273 if (is_temp)
8274 {
8275 Assert(td->smgr.is_temp);
8277 }
8278 else
8279 Assert(!td->smgr.is_temp);
8280
8281 /*
8282 * Iterate over all the buffers affected by this IO and call the
8283 * per-buffer completion function for each buffer.
8284 */
8285 io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
8286 for (uint8 buf_off = 0; buf_off < handle_data_len; buf_off++)
8287 {
8289 bool failed;
8290 bool failed_verification = false;
8291 bool failed_checksum = false;
8292 bool zeroed_buffer = false;
8293 bool ignored_checksum = false;
8294
8296
8297 /*
8298 * If the entire I/O failed on a lower-level, each buffer needs to be
8299 * marked as failed. In case of a partial read, the first few buffers
8300 * may be ok.
8301 */
8302 failed =
8304 || prior_result.result <= buf_off;
8305
8306 buffer_readv_complete_one(td, buf_off, buf, cb_data, failed, is_temp,
8310 &zeroed_buffer);
8311
8312 /*
8313 * Track information about the number of different kinds of error
8314 * conditions across all pages, as there can be multiple pages failing
8315 * verification as part of one IO.
8316 */
8319 if (zeroed_buffer && zeroed_count++ == 0)
8321 if (ignored_checksum && ignored_count++ == 0)
8323 if (failed_checksum)
8325 }
8326
8327 /*
8328 * If the smgr read succeeded [partially] and page verification failed for
8329 * some of the pages, adjust the IO's result state appropriately.
8330 */
8331 if (prior_result.status != PGAIO_RS_ERROR &&
8332 (error_count > 0 || ignored_count > 0 || zeroed_count > 0))
8333 {
8334 buffer_readv_encode_error(&result, is_temp,
8335 zeroed_count > 0, ignored_count > 0,
8339 pgaio_result_report(result, td, DEBUG1);
8340 }
8341
8342 /*
8343 * For shared relations this reporting is done in
8344 * shared_buffer_readv_complete_local().
8345 */
8346 if (is_temp && checkfail_count > 0)
8349
8350 return result;
8351}
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
Definition aio.c:355
uint64 * pgaio_io_get_handle_data(PgAioHandle *ioh, uint8 *len)
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
Definition aio_target.c:73
@ PGAIO_RS_ERROR
Definition aio_types.h:84
static pg_attribute_always_inline void buffer_readv_complete_one(PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
Definition bufmgr.c:8114
static void buffer_readv_encode_error(PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
Definition bufmgr.c:8019
uint8_t uint8
Definition c.h:544
ProcNumber MyProcNumber
Definition globals.c:90
static char buf[DEFAULT_XLOG_SEG_SIZE]
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
RelFileLocator rlocator
Definition aio_types.h:65
struct PgAioTargetData::@126 smgr

References Assert, buf, buffer_readv_complete_one(), buffer_readv_encode_error(), BufferIsValid(), RelFileLocator::dbOid, DEBUG1, fb(), PgAioTargetData::is_temp, MyProcNumber, pgaio_io_get_handle_data(), pgaio_io_get_owner(), pgaio_io_get_target_data(), pgaio_result_report(), PGAIO_RS_ERROR, pgstat_report_checksum_failures_in_db(), PgAioTargetData::rlocator, and PgAioTargetData::smgr.

Referenced by local_buffer_readv_complete(), and shared_buffer_readv_complete().

◆ buffer_readv_complete_one()

static pg_attribute_always_inline void buffer_readv_complete_one ( PgAioTargetData td,
uint8  buf_off,
Buffer  buffer,
uint8  flags,
bool  failed,
bool  is_temp,
bool buffer_invalid,
bool failed_checksum,
bool ignored_checksum,
bool zeroed_buffer 
)
static

Definition at line 8114 of file bufmgr.c.

8120{
8121 BufferDesc *buf_hdr = is_temp ?
8122 GetLocalBufferDescriptor(-buffer - 1)
8123 : GetBufferDescriptor(buffer - 1);
8124 BufferTag tag = buf_hdr->tag;
8125 char *bufdata = BufferGetBlock(buffer);
8127 int piv_flags;
8128
8129 /* check that the buffer is in the expected state for a read */
8130#ifdef USE_ASSERT_CHECKING
8131 {
8133
8136 /* temp buffers don't use BM_IO_IN_PROGRESS */
8137 if (!is_temp)
8140 }
8141#endif
8142
8143 *buffer_invalid = false;
8144 *failed_checksum = false;
8145 *ignored_checksum = false;
8146 *zeroed_buffer = false;
8147
8148 /*
8149 * We ask PageIsVerified() to only log the message about checksum errors,
8150 * as the completion might be run in any backend (or IO workers). We will
8151 * report checksum errors in buffer_readv_report().
8152 */
8154
8155 /* the local zero_damaged_pages may differ from the definer's */
8158
8159 /* Check for garbage data. */
8160 if (!failed)
8161 {
8162 /*
8163 * If the buffer is not currently pinned by this backend, e.g. because
8164 * we're completing this IO after an error, the buffer data will have
8165 * been marked as inaccessible when the buffer was unpinned. The AIO
8166 * subsystem holds a pin, but that doesn't prevent the buffer from
8167 * having been marked as inaccessible. The completion might also be
8168 * executed in a different process.
8169 */
8170#ifdef USE_VALGRIND
8171 if (!BufferIsPinned(buffer))
8173#endif
8174
8175 if (!PageIsVerified((Page) bufdata, tag.blockNum, piv_flags,
8177 {
8178 if (flags & READ_BUFFERS_ZERO_ON_ERROR)
8179 {
8180 memset(bufdata, 0, BLCKSZ);
8181 *zeroed_buffer = true;
8182 }
8183 else
8184 {
8185 *buffer_invalid = true;
8186 /* mark buffer as having failed */
8187 failed = true;
8188 }
8189 }
8190 else if (*failed_checksum)
8191 *ignored_checksum = true;
8192
8193 /* undo what we did above */
8194#ifdef USE_VALGRIND
8195 if (!BufferIsPinned(buffer))
8197#endif
8198
8199 /*
8200 * Immediately log a message about the invalid page, but only to the
8201 * server log. The reason to do so immediately is that this may be
8202 * executed in a different backend than the one that originated the
8203 * request. The reason to do so immediately is that the originator
8204 * might not process the query result immediately (because it is busy
8205 * doing another part of query processing) or at all (e.g. if it was
8206 * cancelled or errored out due to another IO also failing). The
8207 * definer of the IO will emit an ERROR or WARNING when processing the
8208 * IO's results
8209 *
8210 * To avoid duplicating the code to emit these log messages, we reuse
8211 * buffer_readv_report().
8212 */
8214 {
8215 PgAioResult result_one = {0};
8216
8221 *zeroed_buffer ? 1 : 0,
8222 *failed_checksum ? 1 : 0,
8225 }
8226 }
8227
8228 /* Terminate I/O and set BM_VALID. */
8229 set_flag_bits = failed ? BM_IO_ERROR : BM_VALID;
8230 if (is_temp)
8232 else
8233 TerminateBufferIO(buf_hdr, false, set_flag_bits, false, true);
8234
8235 /*
8236 * Call the BUFFER_READ_DONE tracepoint in the callback, even though the
8237 * callback may not be executed in the same backend that called
8238 * BUFFER_READ_START. The alternative would be to defer calling the
8239 * tracepoint to a later point (e.g. the local completion callback for
8240 * shared buffer reads), which seems even less helpful.
8241 */
8243 tag.blockNum,
8244 tag.spcOid,
8245 tag.dbOid,
8246 tag.relNumber,
8248 false);
8249}
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition bufmgr.c:589
bool PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
Definition bufpage.c:94
#define PIV_LOG_LOG
Definition bufpage.h:468
PageData * Page
Definition bufpage.h:81
#define PIV_IGNORE_CHECKSUM_FAILURE
Definition bufpage.h:469
#define LOG_SERVER_ONLY
Definition elog.h:32
#define false
void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty, uint64 set_flag_bits, bool release_aio)
Definition localbuf.c:562
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition memdebug.h:26
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition memdebug.h:27
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
BlockNumber blockNum
RelFileNumber relNumber
ForkNumber forkNum

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, buffer_readv_encode_error(), BufferGetBlock(), BufferIsPinned, buftag::dbOid, fb(), buftag::forkNum, GetBufferDescriptor(), GetLocalBufferDescriptor(), INVALID_PROC_NUMBER, LOG_SERVER_ONLY, MyProcNumber, PageIsVerified(), pg_atomic_read_u64(), pgaio_result_report(), PIV_IGNORE_CHECKSUM_FAILURE, PIV_LOG_LOG, READ_BUFFERS_IGNORE_CHECKSUM_FAILURES, READ_BUFFERS_ZERO_ON_ERROR, buftag::relNumber, buftag::spcOid, TerminateBufferIO(), TerminateLocalBufferIO(), VALGRIND_MAKE_MEM_DEFINED, and VALGRIND_MAKE_MEM_NOACCESS.

Referenced by buffer_readv_complete().

◆ buffer_readv_decode_error()

static void buffer_readv_decode_error ( PgAioResult  result,
bool zeroed_any,
bool ignored_any,
uint8 zeroed_or_error_count,
uint8 checkfail_count,
uint8 first_off 
)
inlinestatic

Definition at line 7977 of file bufmgr.c.

7983{
7984 uint32 rem_error = result.error_data;
7985
7986 /* see static asserts in buffer_readv_encode_error */
7987#define READV_COUNT_BITS 7
7988#define READV_COUNT_MASK ((1 << READV_COUNT_BITS) - 1)
7989
7990 *zeroed_any = rem_error & 1;
7991 rem_error >>= 1;
7992
7993 *ignored_any = rem_error & 1;
7994 rem_error >>= 1;
7995
7998
8001
8004}
#define READV_COUNT_BITS
#define READV_COUNT_MASK
uint32 error_data
Definition aio_types.h:111

References PgAioResult::error_data, fb(), READV_COUNT_BITS, and READV_COUNT_MASK.

Referenced by buffer_readv_encode_error(), buffer_readv_report(), and shared_buffer_readv_complete_local().

◆ buffer_readv_encode_error()

static void buffer_readv_encode_error ( PgAioResult result,
bool  is_temp,
bool  zeroed_any,
bool  ignored_any,
uint8  error_count,
uint8  zeroed_count,
uint8  checkfail_count,
uint8  first_error_off,
uint8  first_zeroed_off,
uint8  first_ignored_off 
)
inlinestatic

Definition at line 8019 of file bufmgr.c.

8029{
8030
8031 uint8 shift = 0;
8035
8037 "PG_IOV_MAX is bigger than reserved space for error data");
8039 "PGAIO_RESULT_ERROR_BITS is insufficient for buffer_readv");
8040
8041 /*
8042 * We only have space to encode one offset - but luckily that's good
8043 * enough. If there is an error, the error is the interesting offset, same
8044 * with a zeroed buffer vs an ignored buffer.
8045 */
8046 if (error_count > 0)
8048 else if (zeroed_count > 0)
8050 else
8052
8053 Assert(!zeroed_any || error_count == 0);
8054
8055 result->error_data = 0;
8056
8057 result->error_data |= zeroed_any << shift;
8058 shift += 1;
8059
8060 result->error_data |= ignored_any << shift;
8061 shift += 1;
8062
8063 result->error_data |= ((uint32) zeroed_or_error_count) << shift;
8064 shift += READV_COUNT_BITS;
8065
8066 result->error_data |= ((uint32) checkfail_count) << shift;
8067 shift += READV_COUNT_BITS;
8068
8069 result->error_data |= ((uint32) first_off) << shift;
8070 shift += READV_COUNT_BITS;
8071
8072 result->id = is_temp ? PGAIO_HCB_LOCAL_BUFFER_READV :
8074
8075 if (error_count > 0)
8076 result->status = PGAIO_RS_ERROR;
8077 else
8078 result->status = PGAIO_RS_WARNING;
8079
8080 /*
8081 * The encoding is complicated enough to warrant cross-checking it against
8082 * the decode function.
8083 */
8084#ifdef USE_ASSERT_CHECKING
8085 {
8086 bool zeroed_any_2,
8091
8096 &first_off_2);
8102 }
8103#endif
8104
8105#undef READV_COUNT_BITS
8106#undef READV_COUNT_MASK
8107}
#define PGAIO_RESULT_ERROR_BITS
Definition aio_types.h:98
@ PGAIO_RS_WARNING
Definition aio_types.h:83
static void buffer_readv_decode_error(PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
Definition bufmgr.c:7977
#define StaticAssertDecl(condition, errmessage)
Definition c.h:942
#define PG_IOV_MAX
Definition pg_iovec.h:47
uint32 status
Definition aio_types.h:108
uint32 id
Definition aio_types.h:105

References Assert, buffer_readv_decode_error(), PgAioResult::error_data, fb(), PgAioResult::id, PG_IOV_MAX, PGAIO_HCB_LOCAL_BUFFER_READV, PGAIO_HCB_SHARED_BUFFER_READV, PGAIO_RESULT_ERROR_BITS, PGAIO_RS_ERROR, PGAIO_RS_WARNING, READV_COUNT_BITS, StaticAssertDecl, and PgAioResult::status.

Referenced by buffer_readv_complete(), and buffer_readv_complete_one().

◆ buffer_readv_report()

static void buffer_readv_report ( PgAioResult  result,
const PgAioTargetData td,
int  elevel 
)
static

Definition at line 8361 of file bufmgr.c.

8363{
8364 int nblocks = td->smgr.nblocks;
8365 BlockNumber first = td->smgr.blockNum;
8366 BlockNumber last = first + nblocks - 1;
8369 RelPathStr rpath =
8371 bool zeroed_any,
8375 first_off;
8377 const char *msg_one,
8378 *msg_mult,
8379 *det_mult,
8380 *hint_mult;
8381
8385 &first_off);
8386
8387 /*
8388 * Treat a read that had both zeroed buffers *and* ignored checksums as a
8389 * special case, it's too irregular to be emitted the same way as the
8390 * other cases.
8391 */
8392 if (zeroed_any && ignored_any)
8393 {
8395 Assert(nblocks > 1); /* same block can't be both zeroed and ignored */
8396 Assert(result.status != PGAIO_RS_ERROR);
8398
8399 ereport(elevel,
8401 errmsg("zeroing %u page(s) and ignoring %u checksum failure(s) among blocks %u..%u of relation \"%s\"",
8402 affected_count, checkfail_count, first, last, rpath.str),
8403 affected_count > 1 ?
8404 errdetail("Block %u held the first zeroed page.",
8405 first + first_off) : 0,
8406 errhint_plural("See server log for details about the other %d invalid block.",
8407 "See server log for details about the other %d invalid blocks.",
8410 return;
8411 }
8412
8413 /*
8414 * The other messages are highly repetitive. To avoid duplicating a long
8415 * and complicated ereport(), gather the translated format strings
8416 * separately and then do one common ereport.
8417 */
8418 if (result.status == PGAIO_RS_ERROR)
8419 {
8420 Assert(!zeroed_any); /* can't have invalid pages when zeroing them */
8422 msg_one = _("invalid page in block %u of relation \"%s\"");
8423 msg_mult = _("%u invalid pages among blocks %u..%u of relation \"%s\"");
8424 det_mult = _("Block %u held the first invalid page.");
8425 hint_mult = _("See server log for the other %u invalid block(s).");
8426 }
8427 else if (zeroed_any && !ignored_any)
8428 {
8430 msg_one = _("invalid page in block %u of relation \"%s\"; zeroing out page");
8431 msg_mult = _("zeroing out %u invalid pages among blocks %u..%u of relation \"%s\"");
8432 det_mult = _("Block %u held the first zeroed page.");
8433 hint_mult = _("See server log for the other %u zeroed block(s).");
8434 }
8435 else if (!zeroed_any && ignored_any)
8436 {
8438 msg_one = _("ignoring checksum failure in block %u of relation \"%s\"");
8439 msg_mult = _("ignoring %u checksum failures among blocks %u..%u of relation \"%s\"");
8440 det_mult = _("Block %u held the first ignored page.");
8441 hint_mult = _("See server log for the other %u ignored block(s).");
8442 }
8443 else
8445
8446 ereport(elevel,
8448 affected_count == 1 ?
8449 errmsg_internal(msg_one, first + first_off, rpath.str) :
8450 errmsg_internal(msg_mult, affected_count, first, last, rpath.str),
8453}
#define pg_unreachable()
Definition c.h:341
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
int errdetail_internal(const char *fmt,...)
Definition elog.c:1243
int errhint_internal(const char *fmt,...)
Definition elog.c:1352
int errhint_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition elog.c:1373
#define _(x)
Definition elog.c:91
const char * str
#define ERRCODE_DATA_CORRUPTED
int ProcNumber
Definition procnumber.h:24
#define relpathbackend(rlocator, backend, forknum)
Definition relpath.h:141
char str[REL_PATH_STR_MAXLEN+1]
Definition relpath.h:123
BlockNumber blockNum
Definition aio_types.h:66
BlockNumber nblocks
Definition aio_types.h:67
ForkNumber forkNum
Definition aio_types.h:68

References _, Assert, PgAioTargetData::blockNum, buffer_readv_decode_error(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errdetail(), errdetail_internal(), errhint_internal(), errhint_plural(), errmsg(), errmsg_internal(), fb(), PgAioTargetData::forkNum, INVALID_PROC_NUMBER, PgAioTargetData::is_temp, MyProcNumber, PgAioTargetData::nblocks, pg_unreachable, PGAIO_RS_ERROR, relpathbackend, PgAioTargetData::rlocator, PgAioTargetData::smgr, PgAioResult::status, and RelPathStr::str.

◆ buffer_stage_common()

static pg_attribute_always_inline void buffer_stage_common ( PgAioHandle ioh,
bool  is_write,
bool  is_temp 
)
static

Definition at line 7870 of file bufmgr.c.

7871{
7872 uint64 *io_data;
7873 uint8 handle_data_len;
7876
7877 io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
7878
7880
7881 /* iterate over all buffers affected by the vectored readv/writev */
7882 for (int i = 0; i < handle_data_len; i++)
7883 {
7884 Buffer buffer = (Buffer) io_data[i];
7885 BufferDesc *buf_hdr = is_temp ?
7886 GetLocalBufferDescriptor(-buffer - 1)
7887 : GetBufferDescriptor(buffer - 1);
7889
7890 /*
7891 * Check that all the buffers are actually ones that could conceivably
7892 * be done in one IO, i.e. are sequential. This is the last
7893 * buffer-aware code before IO is actually executed and confusion
7894 * about which buffers are targeted by IO can be hard to debug, making
7895 * it worth doing extra-paranoid checks.
7896 */
7897 if (i == 0)
7898 first = buf_hdr->tag;
7899 else
7900 {
7901 Assert(buf_hdr->tag.relNumber == first.relNumber);
7902 Assert(buf_hdr->tag.blockNum == first.blockNum + i);
7903 }
7904
7905 if (is_temp)
7907 else
7909
7910 /* verify the buffer is in the expected state */
7912 if (is_write)
7913 {
7916 }
7917 else
7918 {
7921 }
7922
7923 /* temp buffers don't use BM_IO_IN_PROGRESS */
7924 if (!is_temp)
7926
7928
7929 /*
7930 * Reflect that the buffer is now owned by the AIO subsystem.
7931 *
7932 * For local buffers: This can't be done just via LocalRefCount, as
7933 * one might initially think, as this backend could error out while
7934 * AIO is still in progress, releasing all the pins by the backend
7935 * itself.
7936 *
7937 * This pin is released again in TerminateBufferIO().
7938 */
7939 buf_hdr->io_wref = io_ref;
7940
7941 if (is_temp)
7942 {
7945 }
7946 else
7948
7949 /*
7950 * Ensure the content lock that prevents buffer modifications while
7951 * the buffer is being written out is not released early due to an
7952 * error.
7953 */
7954 if (is_write && !is_temp)
7955 {
7957
7958 /*
7959 * Lock is now owned by AIO subsystem.
7960 */
7961 BufferLockDisown(buffer, buf_hdr);
7962 }
7963
7964 /*
7965 * Stop tracking this buffer via the resowner - the AIO system now
7966 * keeps track.
7967 */
7968 if (!is_temp)
7970 }
7971}
static void pg_atomic_unlocked_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:494
#define BUF_REFCOUNT_ONE
static uint64 UnlockBufHdrExt(BufferDesc *desc, uint64 old_buf_state, uint64 set_bits, uint64 unset_bits, int refcount_change)
static void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
#define BUF_STATE_GET_REFCOUNT(state)
static void BufferLockDisown(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:6128
static bool BufferLockHeldByMe(BufferDesc *buf_hdr)
Definition bufmgr.c:6400
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
BufferTag tag

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferLockDisown(), BufferLockHeldByMe(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, LockBufHdr(), pg_atomic_read_u64(), pg_atomic_unlocked_write_u64(), PG_USED_FOR_ASSERTS_ONLY, pgaio_io_get_handle_data(), pgaio_io_get_wref(), ResourceOwnerForgetBufferIO(), and UnlockBufHdrExt().

Referenced by local_buffer_readv_stage(), and shared_buffer_readv_stage().

◆ BufferAlloc()

static pg_attribute_always_inline BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr,
IOContext  io_context 
)
inlinestatic

Definition at line 2100 of file bufmgr.c.

2104{
2105 BufferTag newTag; /* identity of requested block */
2106 uint32 newHash; /* hash value for newTag */
2107 LWLock *newPartitionLock; /* buffer partition lock for it */
2108 int existing_buf_id;
2112 uint64 set_bits = 0;
2113
2114 /* Make sure we will have room to remember the buffer pin */
2117
2118 /* create a tag so we can lookup the buffer */
2119 InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
2120
2121 /* determine its hash code and partition lock ID */
2124
2125 /* see if the block is in the buffer pool already */
2128 if (existing_buf_id >= 0)
2129 {
2130 BufferDesc *buf;
2131 bool valid;
2132
2133 /*
2134 * Found it. Now, pin the buffer so no one can steal it from the
2135 * buffer pool, and check to see if the correct data has been loaded
2136 * into the buffer.
2137 */
2139
2140 valid = PinBuffer(buf, strategy, false);
2141
2142 /* Can release the mapping lock as soon as we've pinned it */
2144
2145 *foundPtr = true;
2146
2147 if (!valid)
2148 {
2149 /*
2150 * We can only get here if (a) someone else is still reading in
2151 * the page, (b) a previous read attempt failed, or (c) someone
2152 * called StartReadBuffers() but not yet WaitReadBuffers().
2153 */
2154 *foundPtr = false;
2155 }
2156
2157 return buf;
2158 }
2159
2160 /*
2161 * Didn't find it in the buffer pool. We'll have to initialize a new
2162 * buffer. Remember to unlock the mapping lock while doing the work.
2163 */
2165
2166 /*
2167 * Acquire a victim buffer. Somebody else might try to do the same, we
2168 * don't hold any conflicting locks. If so we'll have to undo our work
2169 * later.
2170 */
2173
2174 /*
2175 * Try to make a hashtable entry for the buffer under its new tag. If
2176 * somebody else inserted another buffer for the tag, we'll release the
2177 * victim buffer we acquired and use the already inserted one.
2178 */
2181 if (existing_buf_id >= 0)
2182 {
2184 bool valid;
2185
2186 /*
2187 * Got a collision. Someone has already done what we were about to do.
2188 * We'll just handle this as if it were found in the buffer pool in
2189 * the first place. First, give up the buffer we were planning to
2190 * use.
2191 *
2192 * We could do this after releasing the partition lock, but then we'd
2193 * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry()
2194 * before acquiring the lock, for the rare case of such a collision.
2195 */
2197
2198 /* remaining code should match code at top of routine */
2199
2201
2202 valid = PinBuffer(existing_buf_hdr, strategy, false);
2203
2204 /* Can release the mapping lock as soon as we've pinned it */
2206
2207 *foundPtr = true;
2208
2209 if (!valid)
2210 {
2211 /*
2212 * We can only get here if (a) someone else is still reading in
2213 * the page, (b) a previous read attempt failed, or (c) someone
2214 * called StartReadBuffers() but not yet WaitReadBuffers().
2215 */
2216 *foundPtr = false;
2217 }
2218
2219 return existing_buf_hdr;
2220 }
2221
2222 /*
2223 * Need to lock the buffer header too in order to change its tag.
2224 */
2226
2227 /* some sanity checks while we hold the buffer header lock */
2230
2231 victim_buf_hdr->tag = newTag;
2232
2233 /*
2234 * Make sure BM_PERMANENT is set for buffers that must be written at every
2235 * checkpoint. Unlogged buffers only need to be written at shutdown
2236 * checkpoints, except for their "init" forks, which need to be treated
2237 * just like permanent relations.
2238 */
2240 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
2242
2244 set_bits, 0, 0);
2245
2247
2248 /*
2249 * Buffer contents are currently invalid.
2250 */
2251 *foundPtr = false;
2252
2253 return victim_buf_hdr;
2254}
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_PERMANENT
#define BUF_USAGECOUNT_ONE
static LWLock * BufMappingPartitionLock(uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition buf_table.c:78
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition buf_table.c:118
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
Definition bufmgr.c:2451
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
Definition bufmgr.c:3181
static void ReservePrivateRefCountEntry(void)
Definition bufmgr.c:293
static void UnpinBuffer(BufferDesc *buf)
Definition bufmgr.c:3360
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
@ INIT_FORKNUM
Definition relpath.h:61
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition resowner.c:449

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetVictimBuffer(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrRelationData::smgr_rlocator, UnlockBufHdrExt(), and UnpinBuffer().

Referenced by PinBufferForBlock().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 4356 of file bufmgr.c.

4357{
4359
4360 Assert(BufferIsPinned(buffer));
4361
4362 if (BufferIsLocal(buffer))
4363 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4364 else
4365 bufHdr = GetBufferDescriptor(buffer - 1);
4366
4367 /* pinned, so OK to read tag without spinlock */
4368 return bufHdr->tag.blockNum;
4369}
#define BufferIsLocal(buffer)
Definition buf.h:37

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, fb(), GetBufferDescriptor(), and GetLocalBufferDescriptor().

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_finish_split(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), AsyncReadBuffers(), BitmapHeapScanNextBlock(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), btvacuumpage(), check_index_page(), CheckReadBuffersOperation(), collect_corrupt_items(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), gistvacuumpage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_would_be_all_visible(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_heap(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum_heap_rel(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), prune_freeze_plan(), read_stream_start_pending_read(), ReadBufferBI(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), StartReadBuffersImpl(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), verify_heapam(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and visibilitymap_set_vmbits().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 4634 of file bufmgr.c.

4635{
4636 char *page = BufferGetPage(buffer);
4638 XLogRecPtr lsn;
4639
4640 /*
4641 * If we don't need locking for correctness, fastpath out.
4642 */
4643 if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
4644 return PageGetLSN(page);
4645
4646 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4647 Assert(BufferIsValid(buffer));
4648 Assert(BufferIsPinned(buffer));
4649
4650 bufHdr = GetBufferDescriptor(buffer - 1);
4652 lsn = PageGetLSN(page);
4654
4655 return lsn;
4656}
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
static XLogRecPtr PageGetLSN(const PageData *page)
Definition bufpage.h:385
#define XLogHintBitIsNeeded()
Definition xlog.h:122
uint64 XLogRecPtr
Definition xlogdefs.h:21

References Assert, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), fb(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_killitems(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 4377 of file bufmgr.c.

4379{
4381
4382 /* Do the same checks as BufferGetBlockNumber. */
4383 Assert(BufferIsPinned(buffer));
4384
4385 if (BufferIsLocal(buffer))
4386 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4387 else
4388 bufHdr = GetBufferDescriptor(buffer - 1);
4389
4390 /* pinned, so OK to read tag without spinlock */
4391 *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4392 *forknum = BufTagGetForkNum(&bufHdr->tag);
4393 *blknum = bufHdr->tag.blockNum;
4394}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), GetBufferDescriptor(), and GetLocalBufferDescriptor().

Referenced by fsm_search_avail(), ginRedoInsertEntry(), heap_inplace_update_and_unlock(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 3024 of file bufmgr.c.

3025{
3027
3028 Assert(BufferIsPinned(buffer));
3029
3030 if (BufferIsLocal(buffer))
3031 {
3032 int bufid = -buffer - 1;
3033
3035 /* Content locks are not maintained for local buffers. */
3036 }
3037 else
3038 {
3039 bufHdr = GetBufferDescriptor(buffer - 1);
3041 }
3042
3043 return pg_atomic_read_u64(&bufHdr->state) & BM_DIRTY;
3044}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2997
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220

References Assert, BM_DIRTY, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and pg_atomic_read_u64().

Referenced by heap_multi_insert(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), log_heap_prune_and_freeze(), and XLogRegisterBuffer().

◆ BufferIsLockedByMe()

bool BufferIsLockedByMe ( Buffer  buffer)

Definition at line 2971 of file bufmgr.c.

2972{
2974
2975 Assert(BufferIsPinned(buffer));
2976
2977 if (BufferIsLocal(buffer))
2978 {
2979 /* Content locks are not maintained for local buffers. */
2980 return true;
2981 }
2982 else
2983 {
2984 bufHdr = GetBufferDescriptor(buffer - 1);
2985 return BufferLockHeldByMe(bufHdr);
2986 }
2987}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockHeldByMe(), fb(), and GetBufferDescriptor().

Referenced by FlushOneBuffer(), and MarkBufferDirtyHint().

◆ BufferIsLockedByMeInMode()

bool BufferIsLockedByMeInMode ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 2997 of file bufmgr.c.

2998{
3000
3001 Assert(BufferIsPinned(buffer));
3002
3003 if (BufferIsLocal(buffer))
3004 {
3005 /* Content locks are not maintained for local buffers. */
3006 return true;
3007 }
3008 else
3009 {
3010 bufHdr = GetBufferDescriptor(buffer - 1);
3012 }
3013}
static bool BufferLockHeldByMeInMode(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:6382
static PgChecksumMode mode

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockHeldByMeInMode(), fb(), GetBufferDescriptor(), and mode.

Referenced by BufferIsDirty(), HeapTupleSetHintBits(), identify_and_fix_vm_corruption(), IsBufferCleanupOK(), MarkBufferDirty(), visibilitymap_set(), visibilitymap_set_vmbits(), and XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 4604 of file bufmgr.c.

4605{
4607
4608 /* Local buffers are used only for temp relations. */
4609 if (BufferIsLocal(buffer))
4610 return false;
4611
4612 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4613 Assert(BufferIsValid(buffer));
4614 Assert(BufferIsPinned(buffer));
4615
4616 /*
4617 * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
4618 * need not bother with the buffer header spinlock. Even if someone else
4619 * changes the buffer header state while we're doing this, the state is
4620 * changed atomically, so we'll read the old value or the new value, but
4621 * not random garbage.
4622 */
4623 bufHdr = GetBufferDescriptor(buffer - 1);
4624 return (pg_atomic_read_u64(&bufHdr->state) & BM_PERMANENT) != 0;
4625}

References Assert, BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), fb(), GetBufferDescriptor(), and pg_atomic_read_u64().

Referenced by SetHintBits().

◆ BufferLockAcquire()

static void BufferLockAcquire ( Buffer  buffer,
BufferDesc buf_hdr,
BufferLockMode  mode 
)
inlinestatic

Definition at line 5755 of file bufmgr.c.

5756{
5757 PrivateRefCountEntry *entry;
5758 int extraWaits = 0;
5759
5760 /*
5761 * Get reference to the refcount entry before we hold the lock, it seems
5762 * better to do before holding the lock.
5763 */
5764 entry = GetPrivateRefCountEntry(buffer, true);
5765
5766 /*
5767 * We better not already hold a lock on the buffer.
5768 */
5770
5771 /*
5772 * Lock out cancel/die interrupts until we exit the code section protected
5773 * by the content lock. This ensures that interrupts will not interfere
5774 * with manipulations of data structures in shared memory.
5775 */
5777
5778 for (;;)
5779 {
5780 uint32 wait_event = 0; /* initialized to avoid compiler warning */
5781 bool mustwait;
5782
5783 /*
5784 * Try to grab the lock the first time, we're not in the waitqueue
5785 * yet/anymore.
5786 */
5788
5789 if (likely(!mustwait))
5790 {
5791 break;
5792 }
5793
5794 /*
5795 * Ok, at this point we couldn't grab the lock on the first try. We
5796 * cannot simply queue ourselves to the end of the list and wait to be
5797 * woken up because by now the lock could long have been released.
5798 * Instead add us to the queue and try to grab the lock again. If we
5799 * succeed we need to revert the queuing and be happy, otherwise we
5800 * recheck the lock. If we still couldn't grab it, we know that the
5801 * other locker will see our queue entries when releasing since they
5802 * existed before we checked for the lock.
5803 */
5804
5805 /* add to the queue */
5807
5808 /* we're now guaranteed to be woken up if necessary */
5810
5811 /* ok, grabbed the lock the second time round, need to undo queueing */
5812 if (!mustwait)
5813 {
5815 break;
5816 }
5817
5818 switch (mode)
5819 {
5822 break;
5825 break;
5826 case BUFFER_LOCK_SHARE:
5828 break;
5829 case BUFFER_LOCK_UNLOCK:
5831
5832 }
5834
5835 /*
5836 * Wait until awakened.
5837 *
5838 * It is possible that we get awakened for a reason other than being
5839 * signaled by BufferLockWakeup(). If so, loop back and wait again.
5840 * Once we've gotten the lock, re-increment the sema by the number of
5841 * additional signals received.
5842 */
5843 for (;;)
5844 {
5847 break;
5848 extraWaits++;
5849 }
5850
5852
5853 /* Retrying, allow BufferLockRelease to release waiters again. */
5855 }
5856
5857 /* Remember that we now hold this lock */
5858 entry->data.lockmode = mode;
5859
5860 /*
5861 * Fix the process wait semaphore's count for any absorbed wakeups.
5862 */
5863 while (unlikely(extraWaits-- > 0))
5865}
static uint64 pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_)
Definition atomics.h:551
#define BM_LOCK_WAKE_IN_PROGRESS
static bool BufferLockAttempt(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5953
static void BufferLockDequeueSelf(BufferDesc *buf_hdr)
Definition bufmgr.c:6060
static void BufferLockQueueSelf(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:6020
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition bufmgr.c:493
@ BUFFER_LOCK_SHARE_EXCLUSIVE
Definition bufmgr.h:215
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
#define likely(x)
Definition c.h:411
@ LW_WS_NOT_WAITING
Definition lwlock.h:30
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
void PGSemaphoreUnlock(PGSemaphore sema)
Definition posix_sema.c:335
void PGSemaphoreLock(PGSemaphore sema)
Definition posix_sema.c:315
PGPROC * MyProc
Definition proc.c:67
PGSemaphore sem
Definition proc.h:184
uint8 lwWaiting
Definition proc.h:240
BufferLockMode lockmode
Definition bufmgr.c:109
PrivateRefCountData data
Definition bufmgr.c:125
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85

References Assert, BM_LOCK_WAKE_IN_PROGRESS, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferLockAttempt(), BufferLockDequeueSelf(), BufferLockQueueSelf(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, likely, PrivateRefCountData::lockmode, LW_WS_NOT_WAITING, PGPROC::lwWaiting, mode, MyProc, pg_atomic_fetch_and_u64(), pg_unreachable, PGSemaphoreLock(), PGSemaphoreUnlock(), pgstat_report_wait_end(), pgstat_report_wait_start(), PGPROC::sem, and unlikely.

Referenced by FlushUnlockedBuffer(), LockBufferInternal(), and MarkDirtyUnpinnedBufferInternal().

◆ BufferLockAttempt()

static bool BufferLockAttempt ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
inlinestatic

Definition at line 5953 of file bufmgr.c.

5954{
5956
5957 /*
5958 * Read once outside the loop, later iterations will get the newer value
5959 * via compare & exchange.
5960 */
5962
5963 /* loop until we've determined whether we could acquire the lock or not */
5964 while (true)
5965 {
5967 bool lock_free;
5968
5970
5972 {
5973 lock_free = (old_state & BM_LOCK_MASK) == 0;
5974 if (lock_free)
5976 }
5978 {
5980 if (lock_free)
5982 }
5983 else
5984 {
5986 if (lock_free)
5988 }
5989
5990 /*
5991 * Attempt to swap in the state we are expecting. If we didn't see
5992 * lock to be free, that's just the old value. If we saw it as free,
5993 * we'll attempt to mark it acquired. The reason that we always swap
5994 * in the value is that this doubles as a memory barrier. We could try
5995 * to be smarter and only swap in values if we saw the lock as free,
5996 * but benchmark haven't shown it as beneficial so far.
5997 *
5998 * Retry if the value changed since we last looked at it.
5999 */
6002 {
6003 if (lock_free)
6004 {
6005 /* Great! Got the lock. */
6006 return false;
6007 }
6008 else
6009 return true; /* somebody else has the lock */
6010 }
6011 }
6012
6014}
static bool pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition atomics.h:522
#define BM_LOCK_VAL_SHARED
#define BM_LOCK_VAL_EXCLUSIVE
#define BM_LOCK_MASK
#define BM_LOCK_VAL_SHARE_EXCLUSIVE

References BM_LOCK_MASK, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_VAL_SHARE_EXCLUSIVE, BM_LOCK_VAL_SHARED, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE_EXCLUSIVE, fb(), likely, mode, pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), and pg_unreachable.

Referenced by BufferLockAcquire(), and BufferLockConditional().

◆ BufferLockConditional()

static bool BufferLockConditional ( Buffer  buffer,
BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 5907 of file bufmgr.c.

5908{
5909 PrivateRefCountEntry *entry = GetPrivateRefCountEntry(buffer, true);
5910 bool mustwait;
5911
5912 /*
5913 * As described above, if we're trying to lock a buffer this backend
5914 * already has locked, return false, independent of the existing and
5915 * desired lock level.
5916 */
5917 if (entry->data.lockmode != BUFFER_LOCK_UNLOCK)
5918 return false;
5919
5920 /*
5921 * Lock out cancel/die interrupts until we exit the code section protected
5922 * by the content lock. This ensures that interrupts will not interfere
5923 * with manipulations of data structures in shared memory.
5924 */
5926
5927 /* Check for the lock */
5929
5930 if (mustwait)
5931 {
5932 /* Failed to get lock, so release interrupt holdoff */
5934 }
5935 else
5936 {
5937 entry->data.lockmode = mode;
5938 }
5939
5940 return !mustwait;
5941}
#define RESUME_INTERRUPTS()
Definition miscadmin.h:136

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferLockAttempt(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, PrivateRefCountData::lockmode, mode, and RESUME_INTERRUPTS.

Referenced by ConditionalLockBuffer(), and GetVictimBuffer().

◆ BufferLockDequeueSelf()

static void BufferLockDequeueSelf ( BufferDesc buf_hdr)
static

Definition at line 6060 of file bufmgr.c.

6061{
6062 bool on_waitlist;
6063
6065
6067 if (on_waitlist)
6068 proclist_delete(&buf_hdr->lock_waiters, MyProcNumber, lwWaitLink);
6069
6070 if (proclist_is_empty(&buf_hdr->lock_waiters) &&
6072 {
6074 }
6075
6076 /* XXX: combine with fetch_and above? */
6078
6079 /* clear waiting state again, nice for debugging */
6080 if (on_waitlist)
6082 else
6083 {
6084 int extraWaits = 0;
6085
6086
6087 /*
6088 * Somebody else dequeued us and has or will wake us up. Deal with the
6089 * superfluous absorption of a wakeup.
6090 */
6091
6092 /*
6093 * Clear BM_LOCK_WAKE_IN_PROGRESS if somebody woke us before we
6094 * removed ourselves - they'll have set it.
6095 */
6097
6098 /*
6099 * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
6100 * get reset at some inconvenient point later. Most of the time this
6101 * will immediately return.
6102 */
6103 for (;;)
6104 {
6107 break;
6108 extraWaits++;
6109 }
6110
6111 /*
6112 * Fix the process wait semaphore's count for any absorbed wakeups.
6113 */
6114 while (extraWaits-- > 0)
6116 }
6117}
#define BM_LOCK_HAS_WAITERS
@ LW_WS_WAITING
Definition lwlock.h:31
#define proclist_delete(list, procno, link_member)
Definition proclist.h:187
static bool proclist_is_empty(const proclist_head *list)
Definition proclist.h:38

References BM_LOCK_HAS_WAITERS, BM_LOCK_WAKE_IN_PROGRESS, fb(), LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_WAITING, PGPROC::lwWaiting, MyProc, MyProcNumber, pg_atomic_fetch_and_u64(), pg_atomic_read_u64(), PGSemaphoreLock(), PGSemaphoreUnlock(), proclist_delete, proclist_is_empty(), PGPROC::sem, and UnlockBufHdr().

Referenced by BufferLockAcquire().

◆ BufferLockDisown()

static void BufferLockDisown ( Buffer  buffer,
BufferDesc buf_hdr 
)
inlinestatic

Definition at line 6128 of file bufmgr.c.

6129{
6132}
static int BufferLockDisownInternal(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:6142

References PrivateRefCountEntry::buffer, BufferLockDisownInternal(), fb(), and RESUME_INTERRUPTS.

Referenced by buffer_stage_common().

◆ BufferLockDisownInternal()

static int BufferLockDisownInternal ( Buffer  buffer,
BufferDesc buf_hdr 
)
inlinestatic

Definition at line 6142 of file bufmgr.c.

6143{
6146
6147 ref = GetPrivateRefCountEntry(buffer, false);
6148 if (ref == NULL)
6149 elog(ERROR, "lock %d is not held", buffer);
6150 mode = ref->data.lockmode;
6151 ref->data.lockmode = BUFFER_LOCK_UNLOCK;
6152
6153 return mode;
6154}
BufferLockMode
Definition bufmgr.h:204
#define ERROR
Definition elog.h:39

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, elog, ERROR, fb(), GetPrivateRefCountEntry(), and mode.

Referenced by BufferLockDisown(), and BufferLockUnlock().

◆ BufferLockHeldByMe()

static bool BufferLockHeldByMe ( BufferDesc buf_hdr)
static

Definition at line 6400 of file bufmgr.c.

6401{
6402 PrivateRefCountEntry *entry =
6404
6405 if (!entry)
6406 return false;
6407 else
6408 return entry->data.lockmode != BUFFER_LOCK_UNLOCK;
6409}
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)

References BUFFER_LOCK_UNLOCK, BufferDescriptorGetBuffer(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), and PrivateRefCountData::lockmode.

Referenced by buffer_stage_common(), BufferIsLockedByMe(), and UnpinBufferNoOwner().

◆ BufferLockHeldByMeInMode()

static bool BufferLockHeldByMeInMode ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 6382 of file bufmgr.c.

6383{
6384 PrivateRefCountEntry *entry =
6386
6387 if (!entry)
6388 return false;
6389 else
6390 return entry->data.lockmode == mode;
6391}

References BufferDescriptorGetBuffer(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), PrivateRefCountData::lockmode, and mode.

Referenced by BufferIsLockedByMeInMode().

◆ BufferLockProcessRelease()

static void BufferLockProcessRelease ( BufferDesc buf_hdr,
BufferLockMode  mode,
uint64  lockstate 
)
static

Definition at line 6327 of file bufmgr.c.

6328{
6329 bool check_waiters = false;
6330 bool wake_exclusive = false;
6331
6332 /* nobody else can have that kind of lock */
6334
6335 /*
6336 * If we're still waiting for backends to get scheduled, don't wake them
6337 * up again. Otherwise check if we need to look through the waitqueue to
6338 * wake other backends.
6339 */
6342 {
6343 if ((lockstate & BM_LOCK_MASK) == 0)
6344 {
6345 /*
6346 * We released a lock and the lock was, in that moment, free. We
6347 * therefore can wake waiters for any kind of lock.
6348 */
6349 check_waiters = true;
6350 wake_exclusive = true;
6351 }
6353 {
6354 /*
6355 * We released the lock, but another backend still holds a lock.
6356 * We can't have released an exclusive lock, as there couldn't
6357 * have been other lock holders. If we released a share lock, no
6358 * waiters need to be woken up, as there must be other share
6359 * lockers. However, if we held a share-exclusive lock, another
6360 * backend now could acquire a share-exclusive lock.
6361 */
6362 check_waiters = true;
6363 wake_exclusive = false;
6364 }
6365 }
6366
6367 /*
6368 * As waking up waiters requires the spinlock to be acquired, only do so
6369 * if necessary.
6370 */
6371 if (check_waiters)
6373}
static void BufferLockWakeup(BufferDesc *buf_hdr, bool unlocked)
Definition bufmgr.c:6162

References Assert, BM_LOCK_HAS_WAITERS, BM_LOCK_MASK, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_WAKE_IN_PROGRESS, BUFFER_LOCK_SHARE_EXCLUSIVE, BufferLockWakeup(), fb(), and mode.

Referenced by BufferLockUnlock().

◆ BufferLockQueueSelf()

static void BufferLockQueueSelf ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 6020 of file bufmgr.c.

6021{
6022 /*
6023 * If we don't have a PGPROC structure, there's no way to wait. This
6024 * should never occur, since MyProc should only be null during shared
6025 * memory initialization.
6026 */
6027 if (MyProc == NULL)
6028 elog(PANIC, "cannot wait without a PGPROC structure");
6029
6031 elog(PANIC, "queueing for lock while waiting on another one");
6032
6034
6035 /* setting the flag is protected by the spinlock */
6037
6038 /*
6039 * These are currently used both for lwlocks and buffer content locks,
6040 * which is acceptable, although not pretty, because a backend can't wait
6041 * for both types of locks at the same time.
6042 */
6045
6046 proclist_push_tail(&buf_hdr->lock_waiters, MyProcNumber, lwWaitLink);
6047
6048 /* Can release the mutex now */
6050}
static uint64 pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_)
Definition atomics.h:560
#define PANIC
Definition elog.h:42
#define proclist_push_tail(list, procno, link_member)
Definition proclist.h:191
uint8 lwWaitMode
Definition proc.h:241

References BM_LOCK_HAS_WAITERS, elog, fb(), LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_WAITING, PGPROC::lwWaiting, PGPROC::lwWaitMode, mode, MyProc, MyProcNumber, PANIC, pg_atomic_fetch_or_u64(), proclist_push_tail, and UnlockBufHdr().

Referenced by BufferLockAcquire().

◆ BufferLockReleaseSub()

static uint64 BufferLockReleaseSub ( BufferLockMode  mode)
inlinestatic

Definition at line 6298 of file bufmgr.c.

6299{
6300 /*
6301 * Turns out that a switch() leads gcc to generate sufficiently worse code
6302 * for this to show up in profiles...
6303 */
6305 return BM_LOCK_VAL_EXCLUSIVE;
6308 else
6309 {
6311 return BM_LOCK_VAL_SHARED;
6312 }
6313
6314 return 0; /* keep compiler quiet */
6315}

References Assert, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_VAL_SHARE_EXCLUSIVE, BM_LOCK_VAL_SHARED, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, and mode.

Referenced by BufferLockUnlock().

◆ BufferLockUnlock()

static void BufferLockUnlock ( Buffer  buffer,
BufferDesc buf_hdr 
)
static

Definition at line 5871 of file bufmgr.c.

5872{
5875 uint64 sub;
5876
5878
5879 /*
5880 * Release my hold on lock, after that it can immediately be acquired by
5881 * others, even if we still have to wakeup other waiters.
5882 */
5884
5886
5888
5889 /*
5890 * Now okay to allow cancel/die interrupts.
5891 */
5893}
static uint64 pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
Definition atomics.h:578
static void BufferLockProcessRelease(BufferDesc *buf_hdr, BufferLockMode mode, uint64 lockstate)
Definition bufmgr.c:6327
static uint64 BufferLockReleaseSub(BufferLockMode mode)
Definition bufmgr.c:6298

References PrivateRefCountEntry::buffer, BufferLockDisownInternal(), BufferLockProcessRelease(), BufferLockReleaseSub(), fb(), mode, pg_atomic_sub_fetch_u64(), and RESUME_INTERRUPTS.

Referenced by FlushUnlockedBuffer(), MarkDirtyUnpinnedBufferInternal(), ResOwnerReleaseBuffer(), and UnlockBuffer().

◆ BufferLockWakeup()

static void BufferLockWakeup ( BufferDesc buf_hdr,
bool  unlocked 
)
static

Definition at line 6162 of file bufmgr.c.

6163{
6164 bool new_wake_in_progress = false;
6165 bool wake_share_exclusive = true;
6168
6170
6171 /* lock wait list while collecting backends to wake up */
6173
6174 proclist_foreach_modify(iter, &buf_hdr->lock_waiters, lwWaitLink)
6175 {
6176 PGPROC *waiter = GetPGProcByNumber(iter.cur);
6177
6178 /*
6179 * Already woke up a conflicting lock, so skip over this wait list
6180 * entry.
6181 */
6183 continue;
6185 continue;
6186
6187 proclist_delete(&buf_hdr->lock_waiters, iter.cur, lwWaitLink);
6188 proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
6189
6190 /*
6191 * Prevent additional wakeups until retryer gets to run. Backends that
6192 * are just waiting for the lock to become free don't retry
6193 * automatically.
6194 */
6195 new_wake_in_progress = true;
6196
6197 /*
6198 * Signal that the process isn't on the wait list anymore. This allows
6199 * BufferLockDequeueSelf() to remove itself from the waitlist with a
6200 * proclist_delete(), rather than having to check if it has been
6201 * removed from the list.
6202 */
6203 Assert(waiter->lwWaiting == LW_WS_WAITING);
6205
6206 /*
6207 * Don't wakeup further waiters after waking a conflicting waiter.
6208 */
6209 if (waiter->lwWaitMode == BUFFER_LOCK_SHARE)
6210 {
6211 /*
6212 * Share locks conflict with exclusive locks.
6213 */
6214 wake_exclusive = false;
6215 }
6216 else if (waiter->lwWaitMode == BUFFER_LOCK_SHARE_EXCLUSIVE)
6217 {
6218 /*
6219 * Share-exclusive locks conflict with share-exclusive and
6220 * exclusive locks.
6221 */
6222 wake_exclusive = false;
6223 wake_share_exclusive = false;
6224 }
6225 else if (waiter->lwWaitMode == BUFFER_LOCK_EXCLUSIVE)
6226 {
6227 /*
6228 * Exclusive locks conflict with all other locks, there's no point
6229 * in waking up anybody else.
6230 */
6231 break;
6232 }
6233 }
6234
6236
6237 /* unset required flags, and release lock, in one fell swoop */
6238 {
6241
6243 while (true)
6244 {
6246
6247 /* compute desired flags */
6248
6251 else
6253
6254 if (proclist_is_empty(&buf_hdr->lock_waiters))
6256
6257 desired_state &= ~BM_LOCKED; /* release lock */
6258
6261 break;
6262 }
6263 }
6264
6265 /* Awaken any waiters I removed from the queue. */
6266 proclist_foreach_modify(iter, &wakeup, lwWaitLink)
6267 {
6268 PGPROC *waiter = GetPGProcByNumber(iter.cur);
6269
6270 proclist_delete(&wakeup, iter.cur, lwWaitLink);
6271
6272 /*
6273 * Guarantee that lwWaiting being unset only becomes visible once the
6274 * unlink from the link has completed. Otherwise the target backend
6275 * could be woken up for other reason and enqueue for a new lock - if
6276 * that happens before the list unlink happens, the list would end up
6277 * being corrupted.
6278 *
6279 * The barrier pairs with the LockBufHdr() when enqueuing for another
6280 * lock.
6281 */
6283 waiter->lwWaiting = LW_WS_NOT_WAITING;
6284 PGSemaphoreUnlock(waiter->sem);
6285 }
6286}
#define pg_write_barrier()
Definition atomics.h:155
@ LW_WS_PENDING_WAKEUP
Definition lwlock.h:32
#define GetPGProcByNumber(n)
Definition proc.h:440
static void proclist_init(proclist_head *list)
Definition proclist.h:29
#define proclist_foreach_modify(iter, lhead, link_member)
Definition proclist.h:206
Definition proc.h:180
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]

References Assert, BM_LOCK_HAS_WAITERS, BM_LOCK_WAKE_IN_PROGRESS, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, proclist_mutable_iter::cur, fb(), GetPGProcByNumber, LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_PENDING_WAKEUP, LW_WS_WAITING, PGPROC::lwWaiting, PGPROC::lwWaitMode, pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), pg_write_barrier, PGSemaphoreUnlock(), proclist_delete, proclist_foreach_modify, proclist_init(), proclist_is_empty(), proclist_push_tail, PGPROC::sem, and wakeup.

Referenced by BufferLockProcessRelease().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 3456 of file bufmgr.c.

3457{
3459 int buf_id;
3460 int num_to_scan;
3461 int num_spaces;
3462 int num_processed;
3463 int num_written;
3465 Oid last_tsid;
3467 int i;
3468 uint64 mask = BM_DIRTY;
3470
3471 /*
3472 * Unless this is a shutdown checkpoint or we have been explicitly told,
3473 * we write only permanent, dirty buffers. But at shutdown or end of
3474 * recovery, we write all dirty buffers.
3475 */
3478 mask |= BM_PERMANENT;
3479
3480 /*
3481 * Loop over all buffers, and mark the ones that need to be written with
3482 * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
3483 * can estimate how much work needs to be done.
3484 *
3485 * This allows us to write only those pages that were dirty when the
3486 * checkpoint began, and not those that get dirtied while it proceeds.
3487 * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
3488 * later in this function, or by normal backends or the bgwriter cleaning
3489 * scan, the flag is cleared. Any buffer dirtied after this point won't
3490 * have the flag set.
3491 *
3492 * Note that if we fail to write some buffer, we may leave buffers with
3493 * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
3494 * certainly need to be written for the next checkpoint attempt, too.
3495 */
3496 num_to_scan = 0;
3497 for (buf_id = 0; buf_id < NBuffers; buf_id++)
3498 {
3500 uint64 set_bits = 0;
3501
3502 /*
3503 * Header spinlock is enough to examine BM_DIRTY, see comment in
3504 * SyncOneBuffer.
3505 */
3507
3508 if ((buf_state & mask) == mask)
3509 {
3510 CkptSortItem *item;
3511
3513
3514 item = &CkptBufferIds[num_to_scan++];
3515 item->buf_id = buf_id;
3516 item->tsId = bufHdr->tag.spcOid;
3517 item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
3518 item->forkNum = BufTagGetForkNum(&bufHdr->tag);
3519 item->blockNum = bufHdr->tag.blockNum;
3520 }
3521
3523 set_bits, 0,
3524 0);
3525
3526 /* Check for barrier events in case NBuffers is large. */
3529 }
3530
3531 if (num_to_scan == 0)
3532 return; /* nothing to do */
3533
3535
3537
3538 /*
3539 * Sort buffers that need to be written to reduce the likelihood of random
3540 * IO. The sorting is also important for the implementation of balancing
3541 * writes between tablespaces. Without balancing writes we'd potentially
3542 * end up writing to the tablespaces one-by-one; possibly overloading the
3543 * underlying system.
3544 */
3546
3547 num_spaces = 0;
3548
3549 /*
3550 * Allocate progress status for each tablespace with buffers that need to
3551 * be flushed. This requires the to-be-flushed array to be sorted.
3552 */
3554 for (i = 0; i < num_to_scan; i++)
3555 {
3556 CkptTsStatus *s;
3557 Oid cur_tsid;
3558
3560
3561 /*
3562 * Grow array of per-tablespace status structs, every time a new
3563 * tablespace is found.
3564 */
3566 {
3567 Size sz;
3568
3569 num_spaces++;
3570
3571 /*
3572 * Not worth adding grow-by-power-of-2 logic here - even with a
3573 * few hundred tablespaces this should be fine.
3574 */
3575 sz = sizeof(CkptTsStatus) * num_spaces;
3576
3577 if (per_ts_stat == NULL)
3579 else
3581
3582 s = &per_ts_stat[num_spaces - 1];
3583 memset(s, 0, sizeof(*s));
3584 s->tsId = cur_tsid;
3585
3586 /*
3587 * The first buffer in this tablespace. As CkptBufferIds is sorted
3588 * by tablespace all (s->num_to_scan) buffers in this tablespace
3589 * will follow afterwards.
3590 */
3591 s->index = i;
3592
3593 /*
3594 * progress_slice will be determined once we know how many buffers
3595 * are in each tablespace, i.e. after this loop.
3596 */
3597
3599 }
3600 else
3601 {
3602 s = &per_ts_stat[num_spaces - 1];
3603 }
3604
3605 s->num_to_scan++;
3606
3607 /* Check for barrier events. */
3610 }
3611
3612 Assert(num_spaces > 0);
3613
3614 /*
3615 * Build a min-heap over the write-progress in the individual tablespaces,
3616 * and compute how large a portion of the total progress a single
3617 * processed buffer is.
3618 */
3621 NULL);
3622
3623 for (i = 0; i < num_spaces; i++)
3624 {
3626
3627 ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
3628
3630 }
3631
3633
3634 /*
3635 * Iterate through to-be-checkpointed buffers and write the ones (still)
3636 * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
3637 * tablespaces; otherwise the sorting would lead to only one tablespace
3638 * receiving writes at a time, making inefficient use of the hardware.
3639 */
3640 num_processed = 0;
3641 num_written = 0;
3642 while (!binaryheap_empty(ts_heap))
3643 {
3647
3648 buf_id = CkptBufferIds[ts_stat->index].buf_id;
3649 Assert(buf_id != -1);
3650
3651 bufHdr = GetBufferDescriptor(buf_id);
3652
3653 num_processed++;
3654
3655 /*
3656 * We don't need to acquire the lock here, because we're only looking
3657 * at a single bit. It's possible that someone else writes the buffer
3658 * and clears the flag right after we check, but that doesn't matter
3659 * since SyncOneBuffer will then do nothing. However, there is a
3660 * further race condition: it's conceivable that between the time we
3661 * examine the bit here and the time SyncOneBuffer acquires the lock,
3662 * someone else not only wrote the buffer but replaced it with another
3663 * page and dirtied it. In that improbable case, SyncOneBuffer will
3664 * write the buffer though we didn't need to. It doesn't seem worth
3665 * guarding against this, though.
3666 */
3668 {
3669 if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
3670 {
3673 num_written++;
3674 }
3675 }
3676
3677 /*
3678 * Measure progress independent of actually having to flush the buffer
3679 * - otherwise writing become unbalanced.
3680 */
3681 ts_stat->progress += ts_stat->progress_slice;
3682 ts_stat->num_scanned++;
3683 ts_stat->index++;
3684
3685 /* Have all the buffers from the tablespace been processed? */
3686 if (ts_stat->num_scanned == ts_stat->num_to_scan)
3687 {
3689 }
3690 else
3691 {
3692 /* update heap with the new progress */
3694 }
3695
3696 /*
3697 * Sleep to throttle our I/O rate.
3698 *
3699 * (This will check for barrier events even if it doesn't sleep.)
3700 */
3701 CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
3702 }
3703
3704 /*
3705 * Issue all pending flushes. Only checkpointer calls BufferSync(), so
3706 * IOContext will always be IOCONTEXT_NORMAL.
3707 */
3709
3711 per_ts_stat = NULL;
3713
3714 /*
3715 * Update checkpoint statistics. As noted above, this doesn't include
3716 * buffers written by other backends or bgwriter scan.
3717 */
3719
3721}
void binaryheap_build(binaryheap *heap)
Definition binaryheap.c:136
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:253
bh_node_type binaryheap_first(binaryheap *heap)
Definition binaryheap.c:175
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition binaryheap.c:190
void binaryheap_free(binaryheap *heap)
Definition binaryheap.c:73
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:114
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition binaryheap.c:37
#define binaryheap_empty(h)
Definition binaryheap.h:65
CkptSortItem * CkptBufferIds
Definition buf_init.c:26
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_CHECKPOINT_NEEDED
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition bufmgr.c:7244
int checkpoint_flush_after
Definition bufmgr.c:207
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition bufmgr.c:7267
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
Definition bufmgr.c:7329
double float8
Definition c.h:644
size_t Size
Definition c.h:619
void CheckpointWriteDelay(int flags, double progress)
volatile sig_atomic_t ProcSignalBarrierPending
Definition globals.c:40
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
#define InvalidOid
unsigned int Oid
void ProcessProcSignalBarrier(void)
Definition procsignal.c:499
int ckpt_bufs_written
Definition xlog.h:178
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition bufmgr.c:148
int num_to_scan
Definition bufmgr.c:151
PgStat_Counter buffers_written
Definition pgstat.h:266
CheckpointStatsData CheckpointStats
Definition xlog.c:212
#define CHECKPOINT_FLUSH_UNLOGGED
Definition xlog.h:154
#define CHECKPOINT_END_OF_RECOVERY
Definition xlog.h:151
#define CHECKPOINT_IS_SHUTDOWN
Definition xlog.h:150

References Assert, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buffers_written, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, DatumGetPointer(), fb(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IOCONTEXT_NORMAL, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u64(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), SyncOneBuffer(), ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdrExt(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 7179 of file bufmgr.c.

7180{
7181 int ret;
7184
7187
7189
7190 if (ret != 0)
7191 return ret;
7192
7194 return -1;
7196 return 1;
7197
7198 if (ba->blockNum < bb->blockNum)
7199 return -1;
7200 if (ba->blockNum > bb->blockNum)
7201 return 1;
7202
7203 return 0;
7204}
static int rlocator_comparator(const void *p1, const void *p2)
Definition bufmgr.c:7080

References BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), and rlocator_comparator().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 6494 of file bufmgr.c.

6495{
6496 if (BufferIsLocal(buffer))
6497 {
6498 if (LocalRefCount[-buffer - 1] != 1)
6499 elog(ERROR, "incorrect local pin count: %d",
6500 LocalRefCount[-buffer - 1]);
6501 }
6502 else
6503 {
6504 if (GetPrivateRefCount(buffer) != 1)
6505 elog(ERROR, "incorrect local pin count: %d",
6506 GetPrivateRefCount(buffer));
6507 }
6508}

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), lazy_scan_heap(), and LockBufferForCleanup().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 4173 of file bufmgr.c.

4174{
4175#ifdef USE_ASSERT_CHECKING
4176 int RefCountErrors = 0;
4178 int i;
4179 char *s;
4180
4181 /* check the array */
4182 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
4183 {
4185 {
4186 res = &PrivateRefCountArray[i];
4187
4189 elog(WARNING, "buffer refcount leak: %s", s);
4190 pfree(s);
4191
4193 }
4194 }
4195
4196 /* if necessary search the hash */
4198 {
4200
4202 while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
4203 {
4205 elog(WARNING, "buffer refcount leak: %s", s);
4206 pfree(s);
4208 }
4209 }
4210
4211 Assert(RefCountErrors == 0);
4212#endif
4213}
#define InvalidBuffer
Definition buf.h:25
static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES]
Definition bufmgr.c:247
char * DebugPrintBufferRefcount(Buffer buffer)
Definition bufmgr.c:4299
#define REFCOUNT_ARRAY_ENTRIES
Definition bufmgr.c:129
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition bufmgr.c:248
static HTAB * PrivateRefCountHash
Definition bufmgr.c:249
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380

References Assert, PrivateRefCountEntry::buffer, DebugPrintBufferRefcount(), elog, fb(), hash_seq_init(), hash_seq_search(), i, InvalidBuffer, pfree(), PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and WARNING.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 4342 of file bufmgr.c.

4343{
4344 BufferSync(flags);
4345}
static void BufferSync(int flags)
Definition bufmgr.c:3456

References BufferSync().

Referenced by CheckPointGuts().

◆ CheckReadBuffersOperation()

static void CheckReadBuffersOperation ( ReadBuffersOperation operation,
bool  is_complete 
)
static

Definition at line 1627 of file bufmgr.c.

1628{
1629#ifdef USE_ASSERT_CHECKING
1630 Assert(operation->nblocks_done <= operation->nblocks);
1631 Assert(!is_complete || operation->nblocks == operation->nblocks_done);
1632
1633 for (int i = 0; i < operation->nblocks; i++)
1634 {
1635 Buffer buffer = operation->buffers[i];
1636 BufferDesc *buf_hdr = BufferIsLocal(buffer) ?
1637 GetLocalBufferDescriptor(-buffer - 1) :
1638 GetBufferDescriptor(buffer - 1);
1639
1640 Assert(BufferGetBlockNumber(buffer) == operation->blocknum + i);
1642
1643 if (i < operation->nblocks_done)
1645 }
1646#endif
1647}

References Assert, ReadBuffersOperation::blocknum, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufferGetBlockNumber(), BufferIsLocal, ReadBuffersOperation::buffers, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, and pg_atomic_read_u64().

Referenced by StartReadBuffersImpl(), and WaitReadBuffers().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 7213 of file bufmgr.c.

7214{
7215 /* compare tablespace */
7216 if (a->tsId < b->tsId)
7217 return -1;
7218 else if (a->tsId > b->tsId)
7219 return 1;
7220 /* compare relation */
7221 if (a->relNumber < b->relNumber)
7222 return -1;
7223 else if (a->relNumber > b->relNumber)
7224 return 1;
7225 /* compare fork */
7226 else if (a->forkNum < b->forkNum)
7227 return -1;
7228 else if (a->forkNum > b->forkNum)
7229 return 1;
7230 /* compare block number */
7231 else if (a->blockNum < b->blockNum)
7232 return -1;
7233 else if (a->blockNum > b->blockNum)
7234 return 1;
7235 /* equal page IDs are unlikely, but not impossible */
7236 return 0;
7237}
int b
Definition isn.c:74
int a
Definition isn.c:73

References a, and b.

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 6474 of file bufmgr.c.

6475{
6476 BufferDesc *buf;
6477
6478 Assert(BufferIsPinned(buffer));
6479 if (BufferIsLocal(buffer))
6480 return true; /* act as though we got it */
6481
6482 buf = GetBufferDescriptor(buffer - 1);
6483
6485}
static bool BufferLockConditional(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5907

References Assert, buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsPinned, BufferLockConditional(), and GetBufferDescriptor().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 6700 of file bufmgr.c.

6701{
6704 refcount;
6705
6706 Assert(BufferIsValid(buffer));
6707
6708 /* see AIO related comment in LockBufferForCleanup() */
6709
6710 if (BufferIsLocal(buffer))
6711 {
6712 refcount = LocalRefCount[-buffer - 1];
6713 /* There should be exactly one pin */
6714 Assert(refcount > 0);
6715 if (refcount != 1)
6716 return false;
6717 /* Nobody else to wait for */
6718 return true;
6719 }
6720
6721 /* There should be exactly one local pin */
6722 refcount = GetPrivateRefCount(buffer);
6723 Assert(refcount);
6724 if (refcount != 1)
6725 return false;
6726
6727 /* Try to acquire lock */
6728 if (!ConditionalLockBuffer(buffer))
6729 return false;
6730
6731 bufHdr = GetBufferDescriptor(buffer - 1);
6734
6735 Assert(refcount > 0);
6736 if (refcount == 1)
6737 {
6738 /* Successfully acquired exclusive lock with pincount 1 */
6740 return true;
6741 }
6742
6743 /* Failed, so release the lock */
6746 return false;
6747}
bool ConditionalLockBuffer(Buffer buffer)
Definition bufmgr.c:6474
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), fb(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 5377 of file bufmgr.c.

5379{
5380 char relpersistence;
5383
5384 /* Set the relpersistence. */
5385 relpersistence = permanent ?
5387
5390
5391 /*
5392 * Create and copy all forks of the relation. During create database we
5393 * have a separate cleanup mechanism which deletes complete database
5394 * directory. Therefore, each individual relation doesn't need to be
5395 * registered for cleanup.
5396 */
5397 RelationCreateStorage(dst_rlocator, relpersistence, false);
5398
5399 /* copy main fork. */
5401 permanent);
5402
5403 /* copy those extra forks that exist */
5404 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
5405 forkNum <= MAX_FORKNUM; forkNum++)
5406 {
5407 if (smgrexists(src_rel, forkNum))
5408 {
5409 smgrcreate(dst_rel, forkNum, false);
5410
5411 /*
5412 * WAL log creation if the relation is persistent, or this is the
5413 * init fork of an unlogged relation.
5414 */
5415 if (permanent || forkNum == INIT_FORKNUM)
5416 log_smgrcreate(&dst_rlocator, forkNum);
5417
5418 /* Copy a fork's data, block by block. */
5420 permanent);
5421 }
5422 }
5423}
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition bufmgr.c:5263
@ MAIN_FORKNUM
Definition relpath.h:58
#define MAX_FORKNUM
Definition relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition smgr.c:240
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition smgr.c:481
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:462
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition storage.c:122
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition storage.c:187

References fb(), INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char * DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 4299 of file bufmgr.c.

4300{
4301 BufferDesc *buf;
4303 char *result;
4304 ProcNumber backend;
4306
4307 Assert(BufferIsValid(buffer));
4308 if (BufferIsLocal(buffer))
4309 {
4310 buf = GetLocalBufferDescriptor(-buffer - 1);
4311 loccount = LocalRefCount[-buffer - 1];
4312 backend = MyProcNumber;
4313 }
4314 else
4315 {
4316 buf = GetBufferDescriptor(buffer - 1);
4317 loccount = GetPrivateRefCount(buffer);
4318 backend = INVALID_PROC_NUMBER;
4319 }
4320
4321 /* theoretically we should lock the bufHdr here */
4322 buf_state = pg_atomic_read_u64(&buf->state);
4323
4324 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%" PRIx64 ", refcount=%u %d)",
4325 buffer,
4327 BufTagGetForkNum(&buf->tag)).str,
4328 buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
4330 return result;
4331}
#define BUF_FLAG_MASK
char * psprintf(const char *fmt,...)
Definition psprintf.c:43

References Assert, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pg_atomic_read_u64(), psprintf(), and relpathbackend.

Referenced by buffer_call_start_io(), buffer_call_terminate_io(), CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBuffer().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 5030 of file bufmgr.c.

5031{
5032 int i;
5033
5034 /*
5035 * We needn't consider local buffers, since by assumption the target
5036 * database isn't our own.
5037 */
5038
5039 for (i = 0; i < NBuffers; i++)
5040 {
5042
5043 /*
5044 * As in DropRelationBuffers, an unlocked precheck should be safe and
5045 * saves some cycles.
5046 */
5047 if (bufHdr->tag.dbOid != dbid)
5048 continue;
5049
5051 if (bufHdr->tag.dbOid == dbid)
5052 InvalidateBuffer(bufHdr); /* releases spinlock */
5053 else
5055 }
5056}
static void InvalidateBuffer(BufferDesc *buf)
Definition bufmgr.c:2273

References fb(), GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 4680 of file bufmgr.c.

4682{
4683 int i;
4684 int j;
4685 RelFileLocatorBackend rlocator;
4688
4689 rlocator = smgr_reln->smgr_rlocator;
4690
4691 /* If it's a local relation, it's localbuf.c's problem. */
4692 if (RelFileLocatorBackendIsTemp(rlocator))
4693 {
4694 if (rlocator.backend == MyProcNumber)
4695 DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
4697
4698 return;
4699 }
4700
4701 /*
4702 * To remove all the pages of the specified relation forks from the buffer
4703 * pool, we need to scan the entire buffer pool but we can optimize it by
4704 * finding the buffers from BufMapping table provided we know the exact
4705 * size of each fork of the relation. The exact size is required to ensure
4706 * that we don't leave any buffer for the relation being dropped as
4707 * otherwise the background writer or checkpointer can lead to a PANIC
4708 * error while flushing buffers corresponding to files that don't exist.
4709 *
4710 * To know the exact size, we rely on the size cached for each fork by us
4711 * during recovery which limits the optimization to recovery and on
4712 * standbys but we can easily extend it once we have shared cache for
4713 * relation size.
4714 *
4715 * In recovery, we cache the value returned by the first lseek(SEEK_END)
4716 * and the future writes keeps the cached value up-to-date. See
4717 * smgrextend. It is possible that the value of the first lseek is smaller
4718 * than the actual number of existing blocks in the file due to buggy
4719 * Linux kernels that might not have accounted for the recent write. But
4720 * that should be fine because there must not be any buffers after that
4721 * file size.
4722 */
4723 for (i = 0; i < nforks; i++)
4724 {
4725 /* Get the number of blocks for a relation's fork */
4727
4729 {
4731 break;
4732 }
4733
4734 /* calculate the number of blocks to be invalidated */
4736 }
4737
4738 /*
4739 * We apply the optimization iff the total number of blocks to invalidate
4740 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4741 */
4744 {
4745 for (j = 0; j < nforks; j++)
4746 FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4748 return;
4749 }
4750
4751 for (i = 0; i < NBuffers; i++)
4752 {
4754
4755 /*
4756 * We can make this a tad faster by prechecking the buffer tag before
4757 * we attempt to lock the buffer; this saves a lot of lock
4758 * acquisitions in typical cases. It should be safe because the
4759 * caller must have AccessExclusiveLock on the relation, or some other
4760 * reason to be certain that no one is loading new pages of the rel
4761 * into the buffer pool. (Otherwise we might well miss such pages
4762 * entirely.) Therefore, while the tag might be changing while we
4763 * look at it, it can't be changing *to* a value we care about, only
4764 * *away* from such a value. So false negatives are impossible, and
4765 * false positives are safe because we'll recheck after getting the
4766 * buffer lock.
4767 *
4768 * We could check forkNum and blockNum as well as the rlocator, but
4769 * the incremental win from doing so seems small.
4770 */
4771 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4772 continue;
4773
4775
4776 for (j = 0; j < nforks; j++)
4777 {
4778 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4779 BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4780 bufHdr->tag.blockNum >= firstDelBlock[j])
4781 {
4782 InvalidateBuffer(bufHdr); /* releases spinlock */
4783 break;
4784 }
4785 }
4786 if (j >= nforks)
4788 }
4789}
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition bufmgr.c:92
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition bufmgr.c:4970
int j
Definition isn.c:78
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Definition localbuf.c:665
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:847

References RelFileLocatorBackend::backend, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), fb(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, smgrnblocks_cached(), and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4800 of file bufmgr.c.

4801{
4802 int i;
4803 int n = 0;
4804 SMgrRelation *rels;
4805 BlockNumber (*block)[MAX_FORKNUM + 1];
4808 bool cached = true;
4809 bool use_bsearch;
4810
4811 if (nlocators == 0)
4812 return;
4813
4814 rels = palloc_array(SMgrRelation, nlocators); /* non-local relations */
4815
4816 /* If it's a local relation, it's localbuf.c's problem. */
4817 for (i = 0; i < nlocators; i++)
4818 {
4819 if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4820 {
4821 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4822 DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4823 }
4824 else
4825 rels[n++] = smgr_reln[i];
4826 }
4827
4828 /*
4829 * If there are no non-local relations, then we're done. Release the
4830 * memory and return.
4831 */
4832 if (n == 0)
4833 {
4834 pfree(rels);
4835 return;
4836 }
4837
4838 /*
4839 * This is used to remember the number of blocks for all the relations
4840 * forks.
4841 */
4842 block = (BlockNumber (*)[MAX_FORKNUM + 1])
4843 palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4844
4845 /*
4846 * We can avoid scanning the entire buffer pool if we know the exact size
4847 * of each of the given relation forks. See DropRelationBuffers.
4848 */
4849 for (i = 0; i < n && cached; i++)
4850 {
4851 for (int j = 0; j <= MAX_FORKNUM; j++)
4852 {
4853 /* Get the number of blocks for a relation's fork. */
4854 block[i][j] = smgrnblocks_cached(rels[i], j);
4855
4856 /* We need to only consider the relation forks that exists. */
4857 if (block[i][j] == InvalidBlockNumber)
4858 {
4859 if (!smgrexists(rels[i], j))
4860 continue;
4861 cached = false;
4862 break;
4863 }
4864
4865 /* calculate the total number of blocks to be invalidated */
4866 nBlocksToInvalidate += block[i][j];
4867 }
4868 }
4869
4870 /*
4871 * We apply the optimization iff the total number of blocks to invalidate
4872 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4873 */
4875 {
4876 for (i = 0; i < n; i++)
4877 {
4878 for (int j = 0; j <= MAX_FORKNUM; j++)
4879 {
4880 /* ignore relation forks that doesn't exist */
4881 if (!BlockNumberIsValid(block[i][j]))
4882 continue;
4883
4884 /* drop all the buffers for a particular relation fork */
4885 FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4886 j, block[i][j], 0);
4887 }
4888 }
4889
4890 pfree(block);
4891 pfree(rels);
4892 return;
4893 }
4894
4895 pfree(block);
4896 locators = palloc_array(RelFileLocator, n); /* non-local relations */
4897 for (i = 0; i < n; i++)
4898 locators[i] = rels[i]->smgr_rlocator.locator;
4899
4900 /*
4901 * For low number of relations to drop just use a simple walk through, to
4902 * save the bsearch overhead. The threshold to use is rather a guess than
4903 * an exactly determined value, as it depends on many factors (CPU and RAM
4904 * speeds, amount of shared buffers etc.).
4905 */
4907
4908 /* sort the list of rlocators if necessary */
4909 if (use_bsearch)
4911
4912 for (i = 0; i < NBuffers; i++)
4913 {
4914 RelFileLocator *rlocator = NULL;
4916
4917 /*
4918 * As in DropRelationBuffers, an unlocked precheck should be safe and
4919 * saves some cycles.
4920 */
4921
4922 if (!use_bsearch)
4923 {
4924 int j;
4925
4926 for (j = 0; j < n; j++)
4927 {
4929 {
4930 rlocator = &locators[j];
4931 break;
4932 }
4933 }
4934 }
4935 else
4936 {
4937 RelFileLocator locator;
4938
4939 locator = BufTagGetRelFileLocator(&bufHdr->tag);
4940 rlocator = bsearch(&locator,
4941 locators, n, sizeof(RelFileLocator),
4943 }
4944
4945 /* buffer doesn't belong to any of the given relfilelocators; skip it */
4946 if (rlocator == NULL)
4947 continue;
4948
4950 if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4951 InvalidateBuffer(bufHdr); /* releases spinlock */
4952 else
4954 }
4955
4956 pfree(locators);
4957 pfree(rels);
4958}
#define RELS_BSEARCH_THRESHOLD
Definition bufmgr.c:84
#define palloc_array(type, count)
Definition fe_memutils.h:76
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition localbuf.c:702
#define qsort(a, b, c, d)
Definition port.h:495

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), fb(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), palloc_array, pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictAllUnpinnedBuffers()

void EvictAllUnpinnedBuffers ( int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 7571 of file bufmgr.c.

7573{
7574 *buffers_evicted = 0;
7575 *buffers_skipped = 0;
7576 *buffers_flushed = 0;
7577
7578 for (int buf = 1; buf <= NBuffers; buf++)
7579 {
7580 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7582 bool buffer_flushed;
7583
7585
7587 if (!(buf_state & BM_VALID))
7588 continue;
7589
7592
7593 LockBufHdr(desc);
7594
7596 (*buffers_evicted)++;
7597 else
7598 (*buffers_skipped)++;
7599
7600 if (buffer_flushed)
7601 (*buffers_flushed)++;
7602 }
7603}
static bool EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed)
Definition bufmgr.c:7480
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
pg_atomic_uint64 state

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), fb(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u64(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_evict_all().

◆ EvictRelUnpinnedBuffers()

void EvictRelUnpinnedBuffers ( Relation  rel,
int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 7621 of file bufmgr.c.

7623{
7625
7626 *buffers_skipped = 0;
7627 *buffers_evicted = 0;
7628 *buffers_flushed = 0;
7629
7630 for (int buf = 1; buf <= NBuffers; buf++)
7631 {
7632 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7634 bool buffer_flushed;
7635
7637
7638 /* An unlocked precheck should be safe and saves some cycles. */
7639 if ((buf_state & BM_VALID) == 0 ||
7641 continue;
7642
7643 /* Make sure we can pin the buffer. */
7646
7647 buf_state = LockBufHdr(desc);
7648
7649 /* recheck, could have changed without the lock */
7650 if ((buf_state & BM_VALID) == 0 ||
7652 {
7653 UnlockBufHdr(desc);
7654 continue;
7655 }
7656
7658 (*buffers_evicted)++;
7659 else
7660 (*buffers_skipped)++;
7661
7662 if (buffer_flushed)
7663 (*buffers_flushed)++;
7664 }
7665}
#define RelationUsesLocalBuffers(relation)
Definition rel.h:646
RelFileLocator rd_locator
Definition rel.h:57

References Assert, BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), fb(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u64(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_evict_relation().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf,
bool buffer_flushed 
)

◆ EvictUnpinnedBufferInternal()

static bool EvictUnpinnedBufferInternal ( BufferDesc desc,
bool buffer_flushed 
)
static

Definition at line 7480 of file bufmgr.c.

7481{
7483 bool result;
7484
7485 *buffer_flushed = false;
7486
7489
7490 if ((buf_state & BM_VALID) == 0)
7491 {
7492 UnlockBufHdr(desc);
7493 return false;
7494 }
7495
7496 /* Check that it's not pinned already. */
7498 {
7499 UnlockBufHdr(desc);
7500 return false;
7501 }
7502
7503 PinBuffer_Locked(desc); /* releases spinlock */
7504
7505 /* If it was dirty, try to clean it once. */
7506 if (buf_state & BM_DIRTY)
7507 {
7509 *buffer_flushed = true;
7510 }
7511
7512 /* This will return false if it becomes dirty or someone else pins it. */
7513 result = InvalidateVictimBuffer(desc);
7514
7515 UnpinBuffer(desc);
7516
7517 return result;
7518}
#define BM_LOCKED
static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition bufmgr.c:4553
static void PinBuffer_Locked(BufferDesc *buf)
Definition bufmgr.c:3292
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
Definition bufmgr.c:2374

References Assert, BM_DIRTY, BM_LOCKED, BM_VALID, BUF_STATE_GET_REFCOUNT, fb(), FlushUnlockedBuffer(), InvalidateVictimBuffer(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, pg_atomic_read_u64(), PinBuffer_Locked(), BufferDesc::state, UnlockBufHdr(), and UnpinBuffer().

Referenced by EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), and EvictUnpinnedBuffer().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 964 of file bufmgr.c.

968{
969 Buffer buf;
970 uint32 extend_by = 1;
971
972 ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
973 &buf, &extend_by);
974
975 return buf;
976}
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:996

References buf, ExtendBufferedRelBy(), and fb().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 996 of file bufmgr.c.

1003{
1004 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
1005 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
1006 Assert(extend_by > 0);
1007
1008 if (bmr.relpersistence == '\0')
1009 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
1010
1011 return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1013 buffers, extended_by);
1014}
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:2659

References Assert, ExtendBufferedRelCommon(), fb(), and InvalidBlockNumber.

Referenced by ExtendBufferedRel(), grow_rel(), and RelationAddBlocks().

◆ ExtendBufferedRelCommon()

static BlockNumber ExtendBufferedRelCommon ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2659 of file bufmgr.c.

2667{
2669
2671 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.spcOid,
2672 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.dbOid,
2673 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.relNumber,
2674 BMR_GET_SMGR(bmr)->smgr_rlocator.backend,
2675 extend_by);
2676
2677 if (bmr.relpersistence == RELPERSISTENCE_TEMP)
2680 buffers, &extend_by);
2681 else
2682 first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags,
2684 buffers, &extend_by);
2686
2688 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.spcOid,
2689 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.dbOid,
2690 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.relNumber,
2691 BMR_GET_SMGR(bmr)->smgr_rlocator.backend,
2692 *extended_by,
2693 first_block);
2694
2695 return first_block;
2696}
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:2703
#define BMR_GET_SMGR(bmr)
Definition bufmgr.h:118
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition localbuf.c:346

References BMR_GET_SMGR, ExtendBufferedRelLocal(), ExtendBufferedRelShared(), and fb().

Referenced by ExtendBufferedRelBy(), and ExtendBufferedRelTo().

◆ ExtendBufferedRelShared()

static BlockNumber ExtendBufferedRelShared ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2703 of file bufmgr.c.

2711{
2715
2717
2718 /*
2719 * Acquire victim buffers for extension without holding extension lock.
2720 * Writing out victim buffers is the most expensive part of extending the
2721 * relation, particularly when doing so requires WAL flushes. Zeroing out
2722 * the buffers is also quite expensive, so do that before holding the
2723 * extension lock as well.
2724 *
2725 * These pages are pinned by us and not valid. While we hold the pin they
2726 * can't be acquired as victim buffers by another backend.
2727 */
2728 for (uint32 i = 0; i < extend_by; i++)
2729 {
2731
2732 buffers[i] = GetVictimBuffer(strategy, io_context);
2734
2735 /* new buffers are zero-filled */
2736 MemSet(buf_block, 0, BLCKSZ);
2737 }
2738
2739 /*
2740 * Lock relation against concurrent extensions, unless requested not to.
2741 *
2742 * We use the same extension lock for all forks. That's unnecessarily
2743 * restrictive, but currently extensions for forks don't happen often
2744 * enough to make it worth locking more granularly.
2745 *
2746 * Note that another backend might have extended the relation by the time
2747 * we get the lock.
2748 */
2749 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2751
2752 /*
2753 * If requested, invalidate size cache, so that smgrnblocks asks the
2754 * kernel.
2755 */
2756 if (flags & EB_CLEAR_SIZE_CACHE)
2757 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
2758
2760
2761 /*
2762 * Now that we have the accurate relation size, check if the caller wants
2763 * us to extend to only up to a specific size. If there were concurrent
2764 * extensions, we might have acquired too many buffers and need to release
2765 * them.
2766 */
2768 {
2770
2772 extend_by = 0;
2773 else if ((uint64) first_block + extend_by > extend_upto)
2775
2776 for (uint32 i = extend_by; i < orig_extend_by; i++)
2777 {
2778 BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
2779
2781 }
2782
2783 if (extend_by == 0)
2784 {
2785 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2788 return first_block;
2789 }
2790 }
2791
2792 /* Fail if relation is already at maximum possible length */
2794 ereport(ERROR,
2796 errmsg("cannot extend relation %s beyond %u blocks",
2797 relpath(BMR_GET_SMGR(bmr)->smgr_rlocator, fork).str,
2798 MaxBlockNumber)));
2799
2800 /*
2801 * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
2802 *
2803 * This needs to happen before we extend the relation, because as soon as
2804 * we do, other backends can start to read in those pages.
2805 */
2806 for (uint32 i = 0; i < extend_by; i++)
2807 {
2808 Buffer victim_buf = buffers[i];
2810 BufferTag tag;
2811 uint32 hash;
2813 int existing_id;
2814
2815 /* in case we need to pin an existing buffer below */
2818
2819 InitBufferTag(&tag, &BMR_GET_SMGR(bmr)->smgr_rlocator.locator, fork,
2820 first_block + i);
2821 hash = BufTableHashCode(&tag);
2823
2825
2827
2828 /*
2829 * We get here only in the corner case where we are trying to extend
2830 * the relation but we found a pre-existing buffer. This can happen
2831 * because a prior attempt at extending the relation failed, and
2832 * because mdread doesn't complain about reads beyond EOF (when
2833 * zero_damaged_pages is ON) and so a previous attempt to read a block
2834 * beyond EOF could have left a "valid" zero-filled buffer.
2835 *
2836 * This has also been observed when relation was overwritten by
2837 * external process. Since the legitimate cases should always have
2838 * left a zero-filled buffer, complain if not PageIsNew.
2839 */
2840 if (existing_id >= 0)
2841 {
2844 bool valid;
2845
2846 /*
2847 * Pin the existing buffer before releasing the partition lock,
2848 * preventing it from being evicted.
2849 */
2850 valid = PinBuffer(existing_hdr, strategy, false);
2851
2854
2857
2858 if (valid && !PageIsNew((Page) buf_block))
2859 ereport(ERROR,
2860 (errmsg("unexpected data beyond EOF in block %u of relation \"%s\"",
2861 existing_hdr->tag.blockNum,
2862 relpath(BMR_GET_SMGR(bmr)->smgr_rlocator, fork).str)));
2863
2864 /*
2865 * We *must* do smgr[zero]extend before succeeding, else the page
2866 * will not be reserved by the kernel, and the next P_NEW call
2867 * will decide to return the same page. Clear the BM_VALID bit,
2868 * do StartBufferIO() and proceed.
2869 *
2870 * Loop to handle the very small possibility that someone re-sets
2871 * BM_VALID between our clearing it and StartBufferIO inspecting
2872 * it.
2873 */
2874 do
2875 {
2877 } while (!StartBufferIO(existing_hdr, true, false));
2878 }
2879 else
2880 {
2882 uint64 set_bits = 0;
2883
2885
2886 /* some sanity checks while we hold the buffer header lock */
2889
2890 victim_buf_hdr->tag = tag;
2891
2893 if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
2895
2897 set_bits, 0,
2898 0);
2899
2901
2902 /* XXX: could combine the locked operations in it with the above */
2903 StartBufferIO(victim_buf_hdr, true, false);
2904 }
2905 }
2906
2908
2909 /*
2910 * Note: if smgrzeroextend fails, we will end up with buffers that are
2911 * allocated but not marked BM_VALID. The next relation extension will
2912 * still select the same block number (because the relation didn't get any
2913 * longer on disk) and so future attempts to extend the relation will find
2914 * the same buffers (if they have not been recycled) but come right back
2915 * here to try smgrzeroextend again.
2916 *
2917 * We don't need to set checksum for all-zero pages.
2918 */
2920
2921 /*
2922 * Release the file-extension lock; it's now OK for someone else to extend
2923 * the relation some more.
2924 *
2925 * We remove IO_IN_PROGRESS after this, as waking up waiting backends can
2926 * take noticeable time.
2927 */
2928 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2930
2932 io_start, 1, extend_by * BLCKSZ);
2933
2934 /* Set BM_VALID, terminate IO, and wake up any waiters */
2935 for (uint32 i = 0; i < extend_by; i++)
2936 {
2937 Buffer buf = buffers[i];
2939 bool lock = false;
2940
2941 if (flags & EB_LOCK_FIRST && i == 0)
2942 lock = true;
2943 else if (flags & EB_LOCK_TARGET)
2944 {
2946 if (first_block + i + 1 == extend_upto)
2947 lock = true;
2948 }
2949
2950 if (lock)
2952
2953 TerminateBufferIO(buf_hdr, false, BM_VALID, true, false);
2954 }
2955
2957
2959
2960 return first_block;
2961}
#define MaxBlockNumber
Definition block.h:35
#define BM_JUST_DIRTIED
#define BufHdrGetBlock(bufHdr)
Definition bufmgr.c:73
void LimitAdditionalPins(uint32 *additional_pins)
Definition bufmgr.c:2641
bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
Definition bufmgr.c:6889
void * Block
Definition bufmgr.h:26
@ EB_LOCK_TARGET
Definition bufmgr.h:93
@ EB_CLEAR_SIZE_CACHE
Definition bufmgr.h:90
@ EB_SKIP_EXTENSION_LOCK
Definition bufmgr.h:75
@ EB_LOCK_FIRST
Definition bufmgr.h:87
static bool PageIsNew(const PageData *page)
Definition bufpage.h:233
#define MemSet(start, val, len)
Definition c.h:1013
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:474
#define ExclusiveLock
Definition lockdefs.h:42
@ IOOP_EXTEND
Definition pgstat.h:314
static unsigned hash(unsigned *uv, int n)
Definition rege_dfa.c:715
#define relpath(rlocator, forknum)
Definition relpath.h:150
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:819
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition smgr.c:649
int64 shared_blks_written
Definition instrument.h:29

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, BMR_GET_SMGR, buf, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BUFFER_LOCK_EXCLUSIVE, BufferDescriptorGetBuffer(), BufHdrGetBlock, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), CurrentResourceOwner, EB_CLEAR_SIZE_CACHE, EB_LOCK_FIRST, EB_LOCK_TARGET, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), errmsg(), ERROR, ExclusiveLock, fb(), GetBufferDescriptor(), GetVictimBuffer(), hash(), i, INIT_FORKNUM, InitBufferTag(), InvalidBlockNumber, IOContextForStrategy(), IOOBJECT_RELATION, IOOP_EXTEND, LimitAdditionalPins(), LockBuffer(), LockBufHdr(), LockRelationForExtension(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MaxBlockNumber, MemSet, PageIsNew(), pg_atomic_fetch_and_u64(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer(), relpath, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_written, smgrnblocks(), smgrzeroextend(), StartBufferIO(), str, TerminateBufferIO(), track_io_timing, UnlockBufHdrExt(), UnlockRelationForExtension(), and UnpinBuffer().

Referenced by ExtendBufferedRelCommon().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 1025 of file bufmgr.c.

1031{
1033 uint32 extended_by = 0;
1034 Buffer buffer = InvalidBuffer;
1035 Buffer buffers[64];
1036
1037 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
1038 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
1040
1041 if (bmr.relpersistence == '\0')
1042 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
1043
1044 /*
1045 * If desired, create the file if it doesn't exist. If
1046 * smgr_cached_nblocks[fork] is positive then it must exist, no need for
1047 * an smgrexists call.
1048 */
1049 if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
1050 (BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == 0 ||
1051 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == InvalidBlockNumber) &&
1053 {
1055
1056 /* recheck, fork might have been created concurrently */
1059
1061 }
1062
1063 /*
1064 * If requested, invalidate size cache, so that smgrnblocks asks the
1065 * kernel.
1066 */
1067 if (flags & EB_CLEAR_SIZE_CACHE)
1068 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
1069
1070 /*
1071 * Estimate how many pages we'll need to extend by. This avoids acquiring
1072 * unnecessarily many victim buffers.
1073 */
1075
1076 /*
1077 * Since no-one else can be looking at the page contents yet, there is no
1078 * difference between an exclusive lock and a cleanup-strength lock. Note
1079 * that we pass the original mode to ReadBuffer_common() below, when
1080 * falling back to reading the buffer to a concurrent relation extension.
1081 */
1083 flags |= EB_LOCK_TARGET;
1084
1085 while (current_size < extend_to)
1086 {
1087 uint32 num_pages = lengthof(buffers);
1089
1090 if ((uint64) current_size + num_pages > extend_to)
1091 num_pages = extend_to - current_size;
1092
1093 first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1094 num_pages, extend_to,
1095 buffers, &extended_by);
1096
1098 Assert(num_pages != 0 || current_size >= extend_to);
1099
1100 for (uint32 i = 0; i < extended_by; i++)
1101 {
1102 if (first_block + i != extend_to - 1)
1103 ReleaseBuffer(buffers[i]);
1104 else
1105 buffer = buffers[i];
1106 }
1107 }
1108
1109 /*
1110 * It's possible that another backend concurrently extended the relation.
1111 * In that case read the buffer.
1112 *
1113 * XXX: Should we control this via a flag?
1114 */
1115 if (buffer == InvalidBuffer)
1116 {
1117 Assert(extended_by == 0);
1118 buffer = ReadBuffer_common(bmr.rel, BMR_GET_SMGR(bmr), bmr.relpersistence,
1119 fork, extend_to - 1, mode, strategy);
1120 }
1121
1122 return buffer;
1123}
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:1293
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5501
@ EB_PERFORMING_RECOVERY
Definition bufmgr.h:78
@ EB_CREATE_FORK_IF_NEEDED
Definition bufmgr.h:84
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition bufmgr.h:49
@ RBM_ZERO_AND_LOCK
Definition bufmgr.h:47
#define lengthof(array)
Definition c.h:803
static int64 current_size

References Assert, BMR_GET_SMGR, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), fb(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, ReadBuffer_common(), ReleaseBuffer(), smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 4970 of file bufmgr.c.

4973{
4974 BlockNumber curBlock;
4975
4976 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4977 {
4978 uint32 bufHash; /* hash value for tag */
4979 BufferTag bufTag; /* identity of requested block */
4980 LWLock *bufPartitionLock; /* buffer partition lock for it */
4981 int buf_id;
4983
4984 /* create a tag so we can lookup the buffer */
4985 InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4986
4987 /* determine its hash code and partition lock ID */
4990
4991 /* Check that it is in the buffer pool. If not, do nothing. */
4993 buf_id = BufTableLookup(&bufTag, bufHash);
4995
4996 if (buf_id < 0)
4997 continue;
4998
4999 bufHdr = GetBufferDescriptor(buf_id);
5000
5001 /*
5002 * We need to lock the buffer header and recheck if the buffer is
5003 * still associated with the same block because the buffer could be
5004 * evicted by some other backend loading blocks for a different
5005 * relation after we release lock on the BufMapping table.
5006 */
5008
5009 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
5010 BufTagGetForkNum(&bufHdr->tag) == forkNum &&
5011 bufHdr->tag.blockNum >= firstDelBlock)
5012 InvalidateBuffer(bufHdr); /* releases spinlock */
5013 else
5015 }
5016}

References BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), fb(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 4416 of file bufmgr.c.

4418{
4420 ErrorContextCallback errcallback;
4423 char *bufToWrite;
4425
4426 /*
4427 * Try to start an I/O operation. If StartBufferIO returns false, then
4428 * someone else flushed the buffer before we could, so we need not do
4429 * anything.
4430 */
4431 if (!StartBufferIO(buf, false, false))
4432 return;
4433
4434 /* Setup error traceback support for ereport() */
4436 errcallback.arg = buf;
4437 errcallback.previous = error_context_stack;
4438 error_context_stack = &errcallback;
4439
4440 /* Find smgr relation for buffer */
4441 if (reln == NULL)
4443
4445 buf->tag.blockNum,
4446 reln->smgr_rlocator.locator.spcOid,
4447 reln->smgr_rlocator.locator.dbOid,
4448 reln->smgr_rlocator.locator.relNumber);
4449
4451
4452 /*
4453 * Run PageGetLSN while holding header lock, since we don't have the
4454 * buffer locked exclusively in all cases.
4455 */
4457
4458 /* To check if block content changes while flushing. - vadim 01/17/97 */
4460 0, BM_JUST_DIRTIED,
4461 0);
4462
4463 /*
4464 * Force XLOG flush up to buffer's LSN. This implements the basic WAL
4465 * rule that log updates must hit disk before any of the data-file changes
4466 * they describe do.
4467 *
4468 * However, this rule does not apply to unlogged relations, which will be
4469 * lost after a crash anyway. Most unlogged relation pages do not bear
4470 * LSNs since we never emit WAL records for them, and therefore flushing
4471 * up through the buffer LSN would be useless, but harmless. However,
4472 * GiST indexes use LSNs internally to track page-splits, and therefore
4473 * unlogged GiST pages bear "fake" LSNs generated by
4474 * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
4475 * LSN counter could advance past the WAL insertion point; and if it did
4476 * happen, attempting to flush WAL through that location would fail, with
4477 * disastrous system-wide consequences. To make sure that can't happen,
4478 * skip the flush if the buffer isn't permanent.
4479 */
4480 if (buf_state & BM_PERMANENT)
4482
4483 /*
4484 * Now it's safe to write the buffer to disk. Note that no one else should
4485 * have been able to write it, while we were busy with log flushing,
4486 * because we got the exclusive right to perform I/O by setting the
4487 * BM_IO_IN_PROGRESS bit.
4488 */
4490
4491 /*
4492 * Update page checksum if desired. Since we have only shared lock on the
4493 * buffer, other processes might be updating hint bits in it, so we must
4494 * copy the page to private storage if we do checksumming.
4495 */
4496 bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
4497
4499
4500 /*
4501 * bufToWrite is either the shared buffer or a copy, as appropriate.
4502 */
4504 BufTagGetForkNum(&buf->tag),
4505 buf->tag.blockNum,
4506 bufToWrite,
4507 false);
4508
4509 /*
4510 * When a strategy is in use, only flushes of dirty buffers already in the
4511 * strategy ring are counted as strategy writes (IOCONTEXT
4512 * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
4513 * statistics tracking.
4514 *
4515 * If a shared buffer initially added to the ring must be flushed before
4516 * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
4517 *
4518 * If a shared buffer which was added to the ring later because the
4519 * current strategy buffer is pinned or in use or because all strategy
4520 * buffers were dirty and rejected (for BAS_BULKREAD operations only)
4521 * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
4522 * (from_ring will be false).
4523 *
4524 * When a strategy is not in use, the write can only be a "regular" write
4525 * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
4526 */
4529
4531
4532 /*
4533 * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
4534 * end the BM_IO_IN_PROGRESS state.
4535 */
4536 TerminateBufferIO(buf, true, 0, true, false);
4537
4539 buf->tag.blockNum,
4540 reln->smgr_rlocator.locator.spcOid,
4541 reln->smgr_rlocator.locator.dbOid,
4542 reln->smgr_rlocator.locator.relNumber);
4543
4544 /* Pop the error context stack */
4545 error_context_stack = errcallback.previous;
4546}
#define BufferGetLSN(bufHdr)
Definition bufmgr.c:74
static void shared_buffer_write_error_callback(void *arg)
Definition bufmgr.c:7048
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition bufpage.c:1509
ErrorContextCallback * error_context_stack
Definition elog.c:95
@ IOOP_WRITE
Definition pgstat.h:316
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition smgr.h:131
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2783

References ErrorContextCallback::arg, BM_JUST_DIRTIED, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, error_context_stack, fb(), INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITE, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), ErrorContextCallback::previous, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), smgropen(), smgrwrite(), StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdrExt(), and XLogFlush().

Referenced by FlushOneBuffer(), FlushUnlockedBuffer(), and GetVictimBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 5441 of file bufmgr.c.

5442{
5443 int i;
5445
5446 for (i = 0; i < NBuffers; i++)
5447 {
5449
5451
5452 /*
5453 * As in DropRelationBuffers, an unlocked precheck should be safe and
5454 * saves some cycles.
5455 */
5456 if (bufHdr->tag.dbOid != dbid)
5457 continue;
5458
5459 /* Make sure we can handle the pin */
5462
5464 if (bufHdr->tag.dbOid == dbid &&
5466 {
5470 }
5471 else
5473 }
5474}

References BM_DIRTY, BM_VALID, CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 5481 of file bufmgr.c.

5482{
5484
5485 /* currently not needed, but no fundamental reason not to support */
5486 Assert(!BufferIsLocal(buffer));
5487
5488 Assert(BufferIsPinned(buffer));
5489
5490 bufHdr = GetBufferDescriptor(buffer - 1);
5491
5492 Assert(BufferIsLockedByMe(buffer));
5493
5495}
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition bufmgr.c:4416
bool BufferIsLockedByMe(Buffer buffer)
Definition bufmgr.c:2971

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsLockedByMe(), BufferIsPinned, fb(), FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), invalidate_rel_block(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 5077 of file bufmgr.c.

5078{
5079 int i;
5081 SMgrRelation srel = RelationGetSmgr(rel);
5082
5083 if (RelationUsesLocalBuffers(rel))
5084 {
5085 for (i = 0; i < NLocBuffer; i++)
5086 {
5088
5090 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5091 ((buf_state = pg_atomic_read_u64(&bufHdr->state)) &
5092 (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5093 {
5094 ErrorContextCallback errcallback;
5095
5096 /* Setup error traceback support for ereport() */
5098 errcallback.arg = bufHdr;
5099 errcallback.previous = error_context_stack;
5100 error_context_stack = &errcallback;
5101
5102 /* Make sure we can handle the pin */
5105
5106 /*
5107 * Pin/unpin mostly to make valgrind work, but it also seems
5108 * like the right thing to do.
5109 */
5110 PinLocalBuffer(bufHdr, false);
5111
5112
5113 FlushLocalBuffer(bufHdr, srel);
5114
5116
5117 /* Pop the error context stack */
5118 error_context_stack = errcallback.previous;
5119 }
5120 }
5121
5122 return;
5123 }
5124
5125 for (i = 0; i < NBuffers; i++)
5126 {
5128
5130
5131 /*
5132 * As in DropRelationBuffers, an unlocked precheck should be safe and
5133 * saves some cycles.
5134 */
5136 continue;
5137
5138 /* Make sure we can handle the pin */
5141
5143 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5145 {
5149 }
5150 else
5152 }
5153}
static void local_buffer_write_error_callback(void *arg)
Definition bufmgr.c:7064
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
Definition localbuf.c:183
void UnpinLocalBuffer(Buffer buffer)
Definition localbuf.c:841
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition localbuf.c:805
int NLocBuffer
Definition localbuf.c:45
static SMgrRelation RelationGetSmgr(Relation rel)
Definition rel.h:576

References ErrorContextCallback::arg, BM_DIRTY, BM_VALID, BufferDescriptorGetBuffer(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, fb(), FlushLocalBuffer(), FlushUnlockedBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, local_buffer_write_error_callback(), LockBufHdr(), NBuffers, NLocBuffer, pg_atomic_read_u64(), PinBuffer_Locked(), PinLocalBuffer(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 5165 of file bufmgr.c.

5166{
5167 int i;
5169 bool use_bsearch;
5170
5171 if (nrels == 0)
5172 return;
5173
5174 /* fill-in array for qsort */
5176
5177 for (i = 0; i < nrels; i++)
5178 {
5179 Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
5180
5181 srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
5182 srels[i].srel = smgrs[i];
5183 }
5184
5185 /*
5186 * Save the bsearch overhead for low number of relations to sync. See
5187 * DropRelationsAllBuffers for details.
5188 */
5190
5191 /* sort the list of SMgrRelations if necessary */
5192 if (use_bsearch)
5193 qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
5194
5195 for (i = 0; i < NBuffers; i++)
5196 {
5200
5201 /*
5202 * As in DropRelationBuffers, an unlocked precheck should be safe and
5203 * saves some cycles.
5204 */
5205
5206 if (!use_bsearch)
5207 {
5208 int j;
5209
5210 for (j = 0; j < nrels; j++)
5211 {
5212 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
5213 {
5214 srelent = &srels[j];
5215 break;
5216 }
5217 }
5218 }
5219 else
5220 {
5221 RelFileLocator rlocator;
5222
5223 rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
5224 srelent = bsearch(&rlocator,
5225 srels, nrels, sizeof(SMgrSortArray),
5227 }
5228
5229 /* buffer doesn't belong to any of the given relfilelocators; skip it */
5230 if (srelent == NULL)
5231 continue;
5232
5233 /* Make sure we can handle the pin */
5236
5238 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
5240 {
5244 }
5245 else
5247 }
5248
5249 pfree(srels);
5250}

References Assert, BM_DIRTY, BM_VALID, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, LockBufHdr(), NBuffers, palloc_array, pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), rlocator_comparator(), UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ FlushUnlockedBuffer()

static void FlushUnlockedBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 4553 of file bufmgr.c.

4555{
4557
4560 BufferLockUnlock(buffer, buf);
4561}
static void BufferLockAcquire(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5755
static void BufferLockUnlock(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:5871

References buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_SHARE, BufferDescriptorGetBuffer(), BufferLockAcquire(), BufferLockUnlock(), fb(), FlushBuffer(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by EvictUnpinnedBufferInternal(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 551 of file bufmgr.c.

552{
553 Assert(ref->data.refcount == 0);
554 Assert(ref->data.lockmode == BUFFER_LOCK_UNLOCK);
555
556 if (ref >= &PrivateRefCountArray[0] &&
558 {
559 ref->buffer = InvalidBuffer;
561
562
563 /*
564 * Mark the just used entry as reserved - in many scenarios that
565 * allows us to avoid ever having to search the array/hash for free
566 * entries.
567 */
569 }
570 else
571 {
572 bool found;
573 Buffer buffer = ref->buffer;
574
576 Assert(found);
579 }
580}
static int ReservedRefCountSlot
Definition bufmgr.c:252
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_REMOVE
Definition hsearch.h:115

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, fb(), HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountSlot.

Referenced by UnpinBufferNoOwner().

◆ GetAdditionalPinLimit()

uint32 GetAdditionalPinLimit ( void  )

Definition at line 2615 of file bufmgr.c.

2616{
2618
2619 /*
2620 * We get the number of "overflowed" pins for free, but don't know the
2621 * number of pins in PrivateRefCountArray. The cost of calculating that
2622 * exactly doesn't seem worth it, so just assume the max.
2623 */
2625
2626 /* Is this backend already holding more than its fair share? */
2628 return 0;
2629
2631}
static uint32 MaxProportionalPins
Definition bufmgr.c:255

References fb(), MaxProportionalPins, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by LimitAdditionalPins(), and read_stream_start_pending_read().

◆ GetPinLimit()

uint32 GetPinLimit ( void  )

Definition at line 2603 of file bufmgr.c.

2604{
2605 return MaxProportionalPins;
2606}

References MaxProportionalPins.

Referenced by GetAccessStrategy(), and read_stream_begin_impl().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 528 of file bufmgr.c.

529{
531
532 Assert(BufferIsValid(buffer));
533 Assert(!BufferIsLocal(buffer));
534
535 /*
536 * Not moving the entry - that's ok for the current users, but we might
537 * want to change this one day.
538 */
539 ref = GetPrivateRefCountEntry(buffer, false);
540
541 if (ref == NULL)
542 return 0;
543 return ref->data.refcount;
544}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), fb(), and GetPrivateRefCountEntry().

Referenced by CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), and MarkBufferDirtyHint().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
inlinestatic

Definition at line 493 of file bufmgr.c.

494{
495 Assert(BufferIsValid(buffer));
496 Assert(!BufferIsLocal(buffer));
497
498 /*
499 * It's very common to look up the same buffer repeatedly. To make that
500 * fast, we have a one-entry cache.
501 *
502 * In contrast to the loop in GetPrivateRefCountEntrySlow(), here it
503 * faster to check PrivateRefCountArray[].buffer, as in the case of a hit
504 * fewer addresses are computed and fewer cachelines are accessed. Whereas
505 * in GetPrivateRefCountEntrySlow()'s case, checking
506 * PrivateRefCountArrayKeys saves a lot of memory accesses.
507 */
508 if (likely(PrivateRefCountEntryLast != -1) &&
510 {
512 }
513
514 /*
515 * The code for the cached lookup is small enough to be worth inlining
516 * into the caller. In the miss case however, that empirically doesn't
517 * seem worth it.
518 */
519 return GetPrivateRefCountEntrySlow(buffer, do_move);
520}
static pg_noinline PrivateRefCountEntry * GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move)
Definition bufmgr.c:404
static int PrivateRefCountEntryLast
Definition bufmgr.c:253

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), fb(), GetPrivateRefCountEntrySlow(), likely, PrivateRefCountArray, and PrivateRefCountEntryLast.

Referenced by BufferLockAcquire(), BufferLockConditional(), BufferLockDisownInternal(), BufferLockHeldByMe(), BufferLockHeldByMeInMode(), GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), ResOwnerReleaseBuffer(), and UnpinBufferNoOwner().

◆ GetPrivateRefCountEntrySlow()

static pg_noinline PrivateRefCountEntry * GetPrivateRefCountEntrySlow ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 404 of file bufmgr.c.

405{
407 int match = -1;
408 int i;
409
410 /*
411 * First search for references in the array, that'll be sufficient in the
412 * majority of cases.
413 */
414 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
415 {
416 if (PrivateRefCountArrayKeys[i] == buffer)
417 {
418 match = i;
419 /* see ReservePrivateRefCountEntry() for why we don't return */
420 }
421 }
422
423 if (likely(match != -1))
424 {
425 /* update cache for the next lookup */
427
428 return &PrivateRefCountArray[match];
429 }
430
431 /*
432 * By here we know that the buffer, if already pinned, isn't residing in
433 * the array.
434 *
435 * Only look up the buffer in the hashtable if we've previously overflowed
436 * into it.
437 */
439 return NULL;
440
442
443 if (res == NULL)
444 return NULL;
445 else if (!do_move)
446 {
447 /* caller doesn't want us to move the hash entry into the array */
448 return res;
449 }
450 else
451 {
452 /* move buffer from hashtable into the free array slot */
453 bool found;
455
456 /* Ensure there's a free array slot */
458
459 /* Use up the reserved slot */
463 Assert(free->buffer == InvalidBuffer);
464
465 /* and fill it */
466 free->buffer = buffer;
467 free->data = res->data;
469 /* update cache for the next lookup */
471
473
474
475 /* delete from hashtable */
477 Assert(found);
480
481 return free;
482 }
483}
@ HASH_FIND
Definition hsearch.h:113
#define free(a)

References Assert, PrivateRefCountEntry::buffer, PrivateRefCountEntry::data, fb(), free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, likely, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountEntryLast, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountSlot, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCountEntry().

◆ GetVictimBuffer()

static Buffer GetVictimBuffer ( BufferAccessStrategy  strategy,
IOContext  io_context 
)
static

Definition at line 2451 of file bufmgr.c.

2452{
2454 Buffer buf;
2456 bool from_ring;
2457
2458 /*
2459 * Ensure, before we pin a victim buffer, that there's a free refcount
2460 * entry and resource owner slot for the pin.
2461 */
2464
2465 /* we return here if a prospective victim buffer gets used concurrently */
2466again:
2467
2468 /*
2469 * Select a victim buffer. The buffer is returned pinned and owned by
2470 * this backend.
2471 */
2474
2475 /*
2476 * We shouldn't have any other pins for this buffer.
2477 */
2479
2480 /*
2481 * If the buffer was dirty, try to write it out. There is a race
2482 * condition here, in that someone might dirty it after we released the
2483 * buffer header lock above, or even while we are writing it out (since
2484 * our share-lock won't prevent hint-bit updates). We will recheck the
2485 * dirty bit after re-locking the buffer header.
2486 */
2487 if (buf_state & BM_DIRTY)
2488 {
2491
2492 /*
2493 * We need a share-lock on the buffer contents to write it out (else
2494 * we might write invalid data, eg because someone else is compacting
2495 * the page contents while we write). We must use a conditional lock
2496 * acquisition here to avoid deadlock. Even though the buffer was not
2497 * pinned (and therefore surely not locked) when StrategyGetBuffer
2498 * returned it, someone else could have pinned and exclusive-locked it
2499 * by the time we get here. If we try to get the lock unconditionally,
2500 * we'd block waiting for them; if they later block waiting for us,
2501 * deadlock ensues. (This has been observed to happen when two
2502 * backends are both trying to split btree index pages, and the second
2503 * one just happens to be trying to split the page the first one got
2504 * from StrategyGetBuffer.)
2505 */
2507 {
2508 /*
2509 * Someone else has locked the buffer, so give it up and loop back
2510 * to get another one.
2511 */
2513 goto again;
2514 }
2515
2516 /*
2517 * If using a nondefault strategy, and writing the buffer would
2518 * require a WAL flush, let the strategy decide whether to go ahead
2519 * and write/reuse the buffer or to choose another victim. We need a
2520 * lock to inspect the page LSN, so this can't be done inside
2521 * StrategyGetBuffer.
2522 */
2523 if (strategy != NULL)
2524 {
2525 XLogRecPtr lsn;
2526
2527 /* Read the LSN while holding buffer header lock */
2529 lsn = BufferGetLSN(buf_hdr);
2531
2532 if (XLogNeedsFlush(lsn)
2533 && StrategyRejectBuffer(strategy, buf_hdr, from_ring))
2534 {
2537 goto again;
2538 }
2539 }
2540
2541 /* OK, do the I/O */
2544
2546 &buf_hdr->tag);
2547 }
2548
2549
2550 if (buf_state & BM_VALID)
2551 {
2552 /*
2553 * When a BufferAccessStrategy is in use, blocks evicted from shared
2554 * buffers are counted as IOOP_EVICT in the corresponding context
2555 * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
2556 * strategy in two cases: 1) while initially claiming buffers for the
2557 * strategy ring 2) to replace an existing strategy ring buffer
2558 * because it is pinned or in use and cannot be reused.
2559 *
2560 * Blocks evicted from buffers already in the strategy ring are
2561 * counted as IOOP_REUSE in the corresponding strategy context.
2562 *
2563 * At this point, we can accurately count evictions and reuses,
2564 * because we have successfully claimed the valid buffer. Previously,
2565 * we may have been forced to release the buffer due to concurrent
2566 * pinners or erroring out.
2567 */
2569 from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0);
2570 }
2571
2572 /*
2573 * If the buffer has an entry in the buffer mapping table, delete it. This
2574 * can fail because another backend could have pinned or dirtied the
2575 * buffer.
2576 */
2578 {
2580 goto again;
2581 }
2582
2583 /* a final set of sanity checks */
2584#ifdef USE_ASSERT_CHECKING
2586
2589
2591#endif
2592
2593 return buf;
2594}
WritebackContext BackendWritebackContext
Definition buf_init.c:25
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition bufmgr.c:6494
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
Definition bufmgr.c:7279
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint64 *buf_state, bool *from_ring)
Definition freelist.c:174
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
Definition freelist.c:787
@ IOOP_EVICT
Definition pgstat.h:307
@ IOOP_REUSE
Definition pgstat.h:310
bool XLogNeedsFlush(XLogRecPtr record)
Definition xlog.c:3145

References Assert, BackendWritebackContext, BM_DIRTY, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetBuffer(), BufferGetLSN, BufferLockConditional(), CheckBufferIsPinnedOnce(), CurrentResourceOwner, fb(), FlushBuffer(), InvalidateVictimBuffer(), IOOBJECT_RELATION, IOOP_EVICT, IOOP_REUSE, LockBuffer(), LockBufHdr(), pg_atomic_read_u64(), pgstat_count_io_op(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), StrategyGetBuffer(), StrategyRejectBuffer(), UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by BufferAlloc(), and ExtendBufferedRelShared().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 6674 of file bufmgr.c.

6675{
6677
6678 /*
6679 * If we get woken slowly then it's possible that the Startup process was
6680 * already woken by other backends before we got here. Also possible that
6681 * we get here by multiple interrupts or interrupts at inappropriate
6682 * times, so make sure we do nothing if the bufid is not set.
6683 */
6684 if (bufid < 0)
6685 return false;
6686
6687 if (GetPrivateRefCount(bufid + 1) > 0)
6688 return true;
6689
6690 return false;
6691}
int GetStartupBufferPinWaitBufId(void)
Definition proc.c:770

References fb(), GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 5533 of file bufmgr.c.

5534{
5535 Assert(BufferIsPinned(buffer));
5537 if (BufferIsLocal(buffer))
5538 LocalRefCount[-buffer - 1]++;
5539 else
5540 {
5542
5543 ref = GetPrivateRefCountEntry(buffer, true);
5544 Assert(ref != NULL);
5545 ref->data.refcount++;
5546 }
5548}
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, fb(), GetPrivateRefCountEntry(), LocalRefCount, ResourceOwnerEnlarge(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), RelationAddBlocks(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferManagerAccess()

void InitBufferManagerAccess ( void  )

Definition at line 4120 of file bufmgr.c.

4121{
4123
4124 /*
4125 * An advisory limit on the number of pins each backend should hold, based
4126 * on shared_buffers and the maximum number of connections possible.
4127 * That's very pessimistic, but outside toy-sized shared_buffers it should
4128 * allow plenty of pins. LimitAdditionalPins() and
4129 * GetAdditionalPinLimit() can be used to check the remaining balance.
4130 */
4132
4135
4136 hash_ctl.keysize = sizeof(Buffer);
4137 hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
4138
4139 PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
4141
4142 /*
4143 * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
4144 * the corresponding phase of backend shutdown.
4145 */
4146 Assert(MyProc != NULL);
4148}
static void AtProcExit_Buffers(int code, Datum arg)
Definition bufmgr.c:4155
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
int MaxBackends
Definition globals.c:146
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
#define NUM_AUXILIARY_PROCS
Definition proc.h:463

References Assert, AtProcExit_Buffers(), fb(), HASH_BLOBS, hash_create(), HASH_ELEM, MaxBackends, MaxProportionalPins, MyProc, NBuffers, NUM_AUXILIARY_PROCS, on_shmem_exit(), PrivateRefCountArray, PrivateRefCountArrayKeys, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 2273 of file bufmgr.c.

2274{
2276 uint32 oldHash; /* hash value for oldTag */
2277 LWLock *oldPartitionLock; /* buffer partition lock for it */
2280
2281 /* Save the original buffer tag before dropping the spinlock */
2282 oldTag = buf->tag;
2283
2285
2286 /*
2287 * Need to compute the old tag's hashcode and partition lock ID. XXX is it
2288 * worth storing the hashcode in BufferDesc so we need not recompute it
2289 * here? Probably not.
2290 */
2293
2294retry:
2295
2296 /*
2297 * Acquire exclusive mapping lock in preparation for changing the buffer's
2298 * association.
2299 */
2301
2302 /* Re-lock the buffer header */
2304
2305 /* If it's changed while we were waiting for lock, do nothing */
2306 if (!BufferTagsEqual(&buf->tag, &oldTag))
2307 {
2310 return;
2311 }
2312
2313 /*
2314 * We assume the reason for it to be pinned is that either we were
2315 * asynchronously reading the page in before erroring out or someone else
2316 * is flushing the page out. Wait for the IO to finish. (This could be
2317 * an infinite loop if the refcount is messed up... it would be nice to
2318 * time out after awhile, but there seems no way to be sure how many loops
2319 * may be needed. Note that if the other guy has pinned the buffer but
2320 * not yet done StartBufferIO, WaitIO will fall through and we'll
2321 * effectively be busy-looping here.)
2322 */
2324 {
2327 /* safety check: should definitely not be our *own* pin */
2329 elog(ERROR, "buffer is pinned in InvalidateBuffer");
2330 WaitIO(buf);
2331 goto retry;
2332 }
2333
2334 /*
2335 * An invalidated buffer should not have any backends waiting to lock the
2336 * buffer, therefore BM_LOCK_WAKE_IN_PROGRESS should not be set.
2337 */
2339
2340 /*
2341 * Clear out the buffer's tag and flags. We must do this to ensure that
2342 * linear scans of the buffer array don't think the buffer is valid.
2343 */
2345 ClearBufferTag(&buf->tag);
2346
2348 0,
2350 0);
2351
2352 /*
2353 * Remove the buffer from the lookup hashtable, if it was in there.
2354 */
2355 if (oldFlags & BM_TAG_VALID)
2357
2358 /*
2359 * Done with mapping lock.
2360 */
2362}
#define BUF_USAGECOUNT_MASK
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static void ClearBufferTag(BufferTag *tag)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition buf_table.c:148
static void WaitIO(BufferDesc *buf)
Definition bufmgr.c:6810

References Assert, BM_LOCK_WAKE_IN_PROGRESS, BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog, ERROR, fb(), GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), UnlockBufHdr(), UnlockBufHdrExt(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ InvalidateVictimBuffer()

static bool InvalidateVictimBuffer ( BufferDesc buf_hdr)
static

Definition at line 2374 of file bufmgr.c.

2375{
2377 uint32 hash;
2379 BufferTag tag;
2380
2382
2383 /* have buffer pinned, so it's safe to read tag without lock */
2384 tag = buf_hdr->tag;
2385
2386 hash = BufTableHashCode(&tag);
2388
2390
2391 /* lock the buffer header */
2393
2394 /*
2395 * We have the buffer pinned nobody else should have been able to unset
2396 * this concurrently.
2397 */
2400 Assert(BufferTagsEqual(&buf_hdr->tag, &tag));
2401
2402 /*
2403 * If somebody else pinned the buffer since, or even worse, dirtied it,
2404 * give up on this buffer: It's clearly in use.
2405 */
2407 {
2409
2412
2413 return false;
2414 }
2415
2416 /*
2417 * An invalidated buffer should not have any backends waiting to lock the
2418 * buffer, therefore BM_LOCK_WAKE_IN_PROGRESS should not be set.
2419 */
2421
2422 /*
2423 * Clear out the buffer's tag and flags and usagecount. This is not
2424 * strictly required, as BM_TAG_VALID/BM_VALID needs to be checked before
2425 * doing anything with the buffer. But currently it's beneficial, as the
2426 * cheaper pre-check for several linear scans of shared buffers use the
2427 * tag (see e.g. FlushDatabaseBuffers()).
2428 */
2429 ClearBufferTag(&buf_hdr->tag);
2431 0,
2433 0);
2434
2436
2437 /* finally delete buffer from the buffer mapping table */
2438 BufTableDelete(&tag, hash);
2439
2441
2446
2447 return true;
2448}

References Assert, BM_DIRTY, BM_LOCK_WAKE_IN_PROGRESS, BM_TAG_VALID, BM_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), fb(), GetPrivateRefCount(), hash(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u64(), UnlockBufHdr(), and UnlockBufHdrExt().

Referenced by EvictUnpinnedBufferInternal(), and GetVictimBuffer().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 6758 of file bufmgr.c.

6759{
6762
6763 Assert(BufferIsValid(buffer));
6764
6765 /* see AIO related comment in LockBufferForCleanup() */
6766
6767 if (BufferIsLocal(buffer))
6768 {
6769 /* There should be exactly one pin */
6770 if (LocalRefCount[-buffer - 1] != 1)
6771 return false;
6772 /* Nobody else to wait for */
6773 return true;
6774 }
6775
6776 /* There should be exactly one local pin */
6777 if (GetPrivateRefCount(buffer) != 1)
6778 return false;
6779
6780 bufHdr = GetBufferDescriptor(buffer - 1);
6781
6782 /* caller must hold exclusive lock on buffer */
6784
6786
6789 {
6790 /* pincount is OK. */
6792 return true;
6793 }
6794
6796 return false;
6797}

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsValid(), fb(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext wb_context,
IOContext  io_context 
)

Definition at line 7329 of file bufmgr.c.

7330{
7332 int i;
7333
7334 if (wb_context->nr_pending == 0)
7335 return;
7336
7337 /*
7338 * Executing the writes in-order can make them a lot faster, and allows to
7339 * merge writeback requests to consecutive blocks into larger writebacks.
7340 */
7341 sort_pending_writebacks(wb_context->pending_writebacks,
7342 wb_context->nr_pending);
7343
7345
7346 /*
7347 * Coalesce neighbouring writes, but nothing else. For that we iterate
7348 * through the, now sorted, array of pending flushes, and look forward to
7349 * find all neighbouring (or identical) writes.
7350 */
7351 for (i = 0; i < wb_context->nr_pending; i++)
7352 {
7356 int ahead;
7357 BufferTag tag;
7359 Size nblocks = 1;
7360
7361 cur = &wb_context->pending_writebacks[i];
7362 tag = cur->tag;
7364
7365 /*
7366 * Peek ahead, into following writeback requests, to see if they can
7367 * be combined with the current one.
7368 */
7369 for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
7370 {
7371
7372 next = &wb_context->pending_writebacks[i + ahead + 1];
7373
7374 /* different file, stop */
7376 BufTagGetRelFileLocator(&next->tag)) ||
7377 BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
7378 break;
7379
7380 /* ok, block queued twice, skip */
7381 if (cur->tag.blockNum == next->tag.blockNum)
7382 continue;
7383
7384 /* only merge consecutive writes */
7385 if (cur->tag.blockNum + 1 != next->tag.blockNum)
7386 break;
7387
7388 nblocks++;
7389 cur = next;
7390 }
7391
7392 i += ahead;
7393
7394 /* and finally tell the kernel to write the data to storage */
7396 smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
7397 }
7398
7399 /*
7400 * Assume that writeback requests are only issued for buffers containing
7401 * blocks of permanent relations.
7402 */
7404 IOOP_WRITEBACK, io_start, wb_context->nr_pending, 0);
7405
7406 wb_context->nr_pending = 0;
7407}
static int32 next
Definition blutils.c:225
struct cursor * cur
Definition ecpg.c:29
@ IOOP_WRITEBACK
Definition pgstat.h:311
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition smgr.c:805

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, fb(), i, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITEBACK, next, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RelFileLocatorEquals, smgropen(), smgrwriteback(), and track_io_timing.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2641 of file bufmgr.c.

2642{
2643 uint32 limit;
2644
2645 if (*additional_pins <= 1)
2646 return;
2647
2648 limit = GetAdditionalPinLimit();
2649 limit = Max(limit, 1);
2650 if (limit < *additional_pins)
2651 *additional_pins = limit;
2652}
uint32 GetAdditionalPinLimit(void)
Definition bufmgr.c:2615
#define Max(x, y)
Definition c.h:991

References fb(), GetAdditionalPinLimit(), and Max.

Referenced by ExtendBufferedRelShared().

◆ local_buffer_readv_complete()

static PgAioResult local_buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 8513 of file bufmgr.c.

8515{
8517}
static pg_attribute_always_inline PgAioResult buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
Definition bufmgr.c:8258

References buffer_readv_complete(), and fb().

◆ local_buffer_readv_stage()

static void local_buffer_readv_stage ( PgAioHandle ioh,
uint8  cb_data 
)
static

Definition at line 8507 of file bufmgr.c.

8508{
8509 buffer_stage_common(ioh, false, true);
8510}
static pg_attribute_always_inline void buffer_stage_common(PgAioHandle *ioh, bool is_write, bool is_temp)
Definition bufmgr.c:7870

References buffer_stage_common(), and fb().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void arg)
static

Definition at line 7064 of file bufmgr.c.

7065{
7067
7068 if (bufHdr != NULL)
7069 errcontext("writing block %u of relation \"%s\"",
7070 bufHdr->tag.blockNum,
7073 BufTagGetForkNum(&bufHdr->tag)).str);
7074}
#define errcontext
Definition elog.h:198
void * arg

References arg, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, fb(), MyProcNumber, and relpathbackend.

Referenced by FlushRelationBuffers().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 6527 of file bufmgr.c.

6528{
6530 TimestampTz waitStart = 0;
6531 bool waiting = false;
6532 bool logged_recovery_conflict = false;
6533
6534 Assert(BufferIsPinned(buffer));
6536
6538
6539 /*
6540 * We do not yet need to be worried about in-progress AIOs holding a pin,
6541 * as we, so far, only support doing reads via AIO and this function can
6542 * only be called once the buffer is valid (i.e. no read can be in
6543 * flight).
6544 */
6545
6546 /* Nobody else to wait for */
6547 if (BufferIsLocal(buffer))
6548 return;
6549
6550 bufHdr = GetBufferDescriptor(buffer - 1);
6551
6552 for (;;)
6553 {
6555 uint64 unset_bits = 0;
6556
6557 /* Try to acquire lock */
6560
6563 {
6564 /* Successfully acquired exclusive lock with pincount 1 */
6566
6567 /*
6568 * Emit the log message if recovery conflict on buffer pin was
6569 * resolved but the startup process waited longer than
6570 * deadlock_timeout for it.
6571 */
6574 waitStart, GetCurrentTimestamp(),
6575 NULL, false);
6576
6577 if (waiting)
6578 {
6579 /* reset ps display to remove the suffix if we added one */
6581 waiting = false;
6582 }
6583 return;
6584 }
6585 /* Failed, so mark myself as waiting for pincount 1 */
6587 {
6590 elog(ERROR, "multiple backends attempting to wait for pincount 1");
6591 }
6592 bufHdr->wait_backend_pgprocno = MyProcNumber;
6596 0);
6598
6599 /* Wait to be signaled by UnpinBuffer() */
6600 if (InHotStandby)
6601 {
6602 if (!waiting)
6603 {
6604 /* adjust the process title to indicate that it's waiting */
6605 set_ps_display_suffix("waiting");
6606 waiting = true;
6607 }
6608
6609 /*
6610 * Emit the log message if the startup process is waiting longer
6611 * than deadlock_timeout for recovery conflict on buffer pin.
6612 *
6613 * Skip this if first time through because the startup process has
6614 * not started waiting yet in this case. So, the wait start
6615 * timestamp is set after this logic.
6616 */
6617 if (waitStart != 0 && !logged_recovery_conflict)
6618 {
6620
6621 if (TimestampDifferenceExceeds(waitStart, now,
6623 {
6625 waitStart, now, NULL, true);
6627 }
6628 }
6629
6630 /*
6631 * Set the wait start timestamp if logging is enabled and first
6632 * time through.
6633 */
6634 if (log_recovery_conflict_waits && waitStart == 0)
6635 waitStart = GetCurrentTimestamp();
6636
6637 /* Publish the bufid that Startup process waits on */
6638 SetStartupBufferPinWaitBufId(buffer - 1);
6639 /* Set alarm and then wait to be signaled by UnpinBuffer() */
6641 /* Reset the published bufid */
6643 }
6644 else
6646
6647 /*
6648 * Remove flag marking us as waiter. Normally this will not be set
6649 * anymore, but ProcWaitForSignal() can return for other signals as
6650 * well. We take care to only reset the flag if we're the waiter, as
6651 * theoretically another backend could have started waiting. That's
6652 * impossible with the current usages due to table level locking, but
6653 * better be safe.
6654 */
6656 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
6657 bufHdr->wait_backend_pgprocno == MyProcNumber)
6659
6661 0, unset_bits,
6662 0);
6663
6665 /* Loop back and try again */
6666 }
6667}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1609
#define BM_PIN_COUNT_WAITER
static BufferDesc * PinCountWaitBuf
Definition bufmgr.c:212
int64 TimestampTz
Definition timestamp.h:39
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition ps_status.c:387
int DeadlockTimeout
Definition proc.c:58
void SetStartupBufferPinWaitBufId(int bufid)
Definition proc.c:758
void ProcWaitForSignal(uint32 wait_event_info)
Definition proc.c:1979
void ResolveRecoveryConflictWithBufferPin(void)
Definition standby.c:792
bool log_recovery_conflict_waits
Definition standby.c:42
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition standby.c:274
static volatile sig_atomic_t waiting
#define InHotStandby
Definition xlogutils.h:60

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, fb(), GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), UnlockBufHdrExt(), and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroAndLockBuffer().

◆ LockBufferInternal()

void LockBufferInternal ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 6431 of file bufmgr.c.

6432{
6434
6435 /*
6436 * We can't wait if we haven't got a PGPROC. This should only occur
6437 * during bootstrap or shared memory initialization. Put an Assert here
6438 * to catch unsafe coding practices.
6439 */
6441
6442 /* handled in LockBuffer() wrapper */
6444
6445 Assert(BufferIsPinned(buffer));
6446 if (BufferIsLocal(buffer))
6447 return; /* local buffers need no lock */
6448
6449 buf_hdr = GetBufferDescriptor(buffer - 1);
6450
6451 /*
6452 * Test the most frequent lock modes first. While a switch (mode) would be
6453 * nice, at least gcc generates considerably worse code for it.
6454 *
6455 * Call BufferLockAcquire() with a constant argument for mode, to generate
6456 * more efficient code for the different lock modes.
6457 */
6458 if (mode == BUFFER_LOCK_SHARE)
6460 else if (mode == BUFFER_LOCK_EXCLUSIVE)
6464 else
6465 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
6466}
bool IsUnderPostmaster
Definition globals.c:120

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, BufferLockAcquire(), elog, ERROR, fb(), GetBufferDescriptor(), IsUnderPostmaster, mode, and MyProc.

Referenced by LockBuffer().

◆ LockBufHdr()

uint64 LockBufHdr ( BufferDesc desc)

Definition at line 7107 of file bufmgr.c.

7108{
7110
7112
7113 while (true)
7114 {
7115 /*
7116 * Always try once to acquire the lock directly, without setting up
7117 * the spin-delay infrastructure. The work necessary for that shows up
7118 * in profiles and is rarely necessary.
7119 */
7121 if (likely(!(old_buf_state & BM_LOCKED)))
7122 break; /* got lock */
7123
7124 /* and then spin without atomic operations until lock is released */
7125 {
7127
7129
7130 while (old_buf_state & BM_LOCKED)
7131 {
7134 }
7136 }
7137
7138 /*
7139 * Retry. The lock might obviously already be re-acquired by the time
7140 * we're attempting to get it again.
7141 */
7142 }
7143
7144 return old_buf_state | BM_LOCKED;
7145}
void perform_spin_delay(SpinDelayStatus *status)
Definition s_lock.c:126
void finish_spin_delay(SpinDelayStatus *status)
Definition s_lock.c:186
#define init_local_spin_delay(status)
Definition s_lock.h:757

References Assert, BM_LOCKED, BufferDescriptorGetBuffer(), BufferIsLocal, fb(), finish_spin_delay(), init_local_spin_delay, likely, perform_spin_delay(), pg_atomic_fetch_or_u64(), pg_atomic_read_u64(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), buffer_stage_common(), BufferAlloc(), BufferGetLSNAtomic(), BufferLockDequeueSelf(), BufferLockQueueSelf(), BufferLockWakeup(), BufferSync(), ConditionalLockBufferForCleanup(), create_toy_buffer(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), MarkDirtyUnpinnedBuffer(), pg_buffercache_os_pages_internal(), pg_buffercache_pages(), StartBufferIO(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), WaitIO(), and WakePinCountWaiter().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 3056 of file bufmgr.c.

3057{
3061
3062 if (!BufferIsValid(buffer))
3063 elog(ERROR, "bad buffer ID: %d", buffer);
3064
3065 if (BufferIsLocal(buffer))
3066 {
3067 MarkLocalBufferDirty(buffer);
3068 return;
3069 }
3070
3071 bufHdr = GetBufferDescriptor(buffer - 1);
3072
3073 Assert(BufferIsPinned(buffer));
3075
3076 /*
3077 * NB: We have to wait for the buffer header spinlock to be not held, as
3078 * TerminateBufferIO() relies on the spinlock.
3079 */
3081 for (;;)
3082 {
3085
3087
3090
3092 buf_state))
3093 break;
3094 }
3095
3096 /*
3097 * If the buffer was not dirty already, do vacuum accounting.
3098 */
3099 if (!(old_buf_state & BM_DIRTY))
3100 {
3102 if (VacuumCostActive)
3104 }
3105}
pg_noinline uint64 WaitBufHdrUnlocked(BufferDesc *buf)
Definition bufmgr.c:7155
int VacuumCostPageDirty
Definition globals.c:153
void MarkLocalBufferDirty(Buffer buffer)
Definition localbuf.c:491
int64 shared_blks_dirtied
Definition instrument.h:28

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, BufferIsValid(), elog, ERROR, fb(), GetBufferDescriptor(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), pgBufferUsage, BufferUsage::shared_blks_dirtied, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), identify_and_fix_vm_corruption(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), MarkDirtyUnpinnedBufferInternal(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 5565 of file bufmgr.c.

5566{
5568 Page page = BufferGetPage(buffer);
5569
5570 if (!BufferIsValid(buffer))
5571 elog(ERROR, "bad buffer ID: %d", buffer);
5572
5573 if (BufferIsLocal(buffer))
5574 {
5575 MarkLocalBufferDirty(buffer);
5576 return;
5577 }
5578
5579 bufHdr = GetBufferDescriptor(buffer - 1);
5580
5581 Assert(GetPrivateRefCount(buffer) > 0);
5582 /* here, either share or exclusive lock is OK */
5583 Assert(BufferIsLockedByMe(buffer));
5584
5585 /*
5586 * This routine might get called many times on the same page, if we are
5587 * making the first scan after commit of an xact that added/deleted many
5588 * tuples. So, be as quick as we can if the buffer is already dirty. We
5589 * do this by not acquiring spinlock if it looks like the status bits are
5590 * already set. Since we make this test unlocked, there's a chance we
5591 * might fail to notice that the flags have just been cleared, and failed
5592 * to reset them, due to memory-ordering issues. But since this function
5593 * is only intended to be used in cases where failing to write out the
5594 * data would be harmless anyway, it doesn't really matter.
5595 */
5596 if ((pg_atomic_read_u64(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
5598 {
5600 bool dirtied = false;
5601 bool delayChkptFlags = false;
5603
5604 /*
5605 * If we need to protect hint bit updates from torn writes, WAL-log a
5606 * full page image of the page. This full page image is only necessary
5607 * if the hint bit update is the first change to the page since the
5608 * last checkpoint.
5609 *
5610 * We don't check full_page_writes here because that logic is included
5611 * when we call XLogInsert() since the value changes dynamically.
5612 */
5613 if (XLogHintBitIsNeeded() &&
5615 {
5616 /*
5617 * If we must not write WAL, due to a relfilelocator-specific
5618 * condition or being in recovery, don't dirty the page. We can
5619 * set the hint, just not dirty the page as a result so the hint
5620 * is lost when we evict the page or shutdown.
5621 *
5622 * See src/backend/storage/page/README for longer discussion.
5623 */
5624 if (RecoveryInProgress() ||
5626 return;
5627
5628 /*
5629 * If the block is already dirty because we either made a change
5630 * or set a hint already, then we don't need to write a full page
5631 * image. Note that aggressive cleaning of blocks dirtied by hint
5632 * bit setting would increase the call rate. Bulk setting of hint
5633 * bits would reduce the call rate...
5634 *
5635 * We must issue the WAL record before we mark the buffer dirty.
5636 * Otherwise we might write the page before we write the WAL. That
5637 * causes a race condition, since a checkpoint might occur between
5638 * writing the WAL record and marking the buffer dirty. We solve
5639 * that with a kluge, but one that is already in use during
5640 * transaction commit to prevent race conditions. Basically, we
5641 * simply prevent the checkpoint WAL record from being written
5642 * until we have marked the buffer dirty. We don't start the
5643 * checkpoint flush until we have marked dirty, so our checkpoint
5644 * must flush the change to disk successfully or the checkpoint
5645 * never gets written, so crash recovery will fix.
5646 *
5647 * It's possible we may enter here without an xid, so it is
5648 * essential that CreateCheckPoint waits for virtual transactions
5649 * rather than full transactionids.
5650 */
5653 delayChkptFlags = true;
5654 lsn = XLogSaveBufferForHint(buffer, buffer_std);
5655 }
5656
5658
5660
5661 if (!(buf_state & BM_DIRTY))
5662 {
5663 dirtied = true; /* Means "will be dirtied by this action" */
5664
5665 /*
5666 * Set the page LSN if we wrote a backup block. We aren't supposed
5667 * to set this when only holding a share lock but as long as we
5668 * serialise it somehow we're OK. We choose to set LSN while
5669 * holding the buffer header lock, which causes any reader of an
5670 * LSN who holds only a share lock to also obtain a buffer header
5671 * lock before using PageGetLSN(), which is enforced in
5672 * BufferGetLSNAtomic().
5673 *
5674 * If checksums are enabled, you might think we should reset the
5675 * checksum here. That will happen when the page is written
5676 * sometime later in this checkpoint cycle.
5677 */
5678 if (XLogRecPtrIsValid(lsn))
5679 PageSetLSN(page, lsn);
5680 }
5681
5684 0, 0);
5685
5686 if (delayChkptFlags)
5688
5689 if (dirtied)
5690 {
5692 if (VacuumCostActive)
5694 }
5695 }
5696}
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
#define DELAY_CHKPT_START
Definition proc.h:136
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition storage.c:573
int delayChkptFlags
Definition proc.h:257
bool RecoveryInProgress(void)
Definition xlog.c:6460
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsLockedByMe(), BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, fb(), GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u64(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, UnlockBufHdrExt(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsValid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ MarkDirtyAllUnpinnedBuffers()

void MarkDirtyAllUnpinnedBuffers ( int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 7821 of file bufmgr.c.

7824{
7825 *buffers_dirtied = 0;
7827 *buffers_skipped = 0;
7828
7829 for (int buf = 1; buf <= NBuffers; buf++)
7830 {
7831 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7834
7836
7838 if (!(buf_state & BM_VALID))
7839 continue;
7840
7843
7844 LockBufHdr(desc);
7845
7847 (*buffers_dirtied)++;
7848 else if (buffer_already_dirty)
7849 (*buffers_already_dirty)++;
7850 else
7851 (*buffers_skipped)++;
7852 }
7853}
static bool MarkDirtyUnpinnedBufferInternal(Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
Definition bufmgr.c:7672

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u64(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_mark_dirty_all().

◆ MarkDirtyRelUnpinnedBuffers()

void MarkDirtyRelUnpinnedBuffers ( Relation  rel,
int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 7764 of file bufmgr.c.

7768{
7770
7771 *buffers_dirtied = 0;
7773 *buffers_skipped = 0;
7774
7775 for (int buf = 1; buf <= NBuffers; buf++)
7776 {
7777 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7780
7782
7783 /* An unlocked precheck should be safe and saves some cycles. */
7784 if ((buf_state & BM_VALID) == 0 ||
7786 continue;
7787
7788 /* Make sure we can pin the buffer. */
7791
7792 buf_state = LockBufHdr(desc);
7793
7794 /* recheck, could have changed without the lock */
7795 if ((buf_state & BM_VALID) == 0 ||
7797 {
7798 UnlockBufHdr(desc);
7799 continue;
7800 }
7801
7803 (*buffers_dirtied)++;
7804 else if (buffer_already_dirty)
7805 (*buffers_already_dirty)++;
7806 else
7807 (*buffers_skipped)++;
7808 }
7809}

References Assert, BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u64(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_mark_dirty_relation().

◆ MarkDirtyUnpinnedBuffer()

bool MarkDirtyUnpinnedBuffer ( Buffer  buf,
bool buffer_already_dirty 
)

Definition at line 7728 of file bufmgr.c.

7729{
7730 BufferDesc *desc;
7731 bool buffer_dirtied = false;
7732
7734
7735 /* Make sure we can pin the buffer. */
7738
7739 desc = GetBufferDescriptor(buf - 1);
7740 LockBufHdr(desc);
7741
7743 /* Both can not be true at the same time */
7745
7746 return buffer_dirtied;
7747}

References Assert, buf, BufferIsLocal, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by pg_buffercache_mark_dirty().

◆ MarkDirtyUnpinnedBufferInternal()

static bool MarkDirtyUnpinnedBufferInternal ( Buffer  buf,
BufferDesc desc,
bool buffer_already_dirty 
)
static

Definition at line 7672 of file bufmgr.c.

7674{
7676 bool result = false;
7677
7678 *buffer_already_dirty = false;
7679
7682
7683 if ((buf_state & BM_VALID) == 0)
7684 {
7685 UnlockBufHdr(desc);
7686 return false;
7687 }
7688
7689 /* Check that it's not pinned already. */
7691 {
7692 UnlockBufHdr(desc);
7693 return false;
7694 }
7695
7696 /* Pin the buffer and then release the buffer spinlock */
7697 PinBuffer_Locked(desc);
7698
7699 /* If it was not already dirty, mark it as dirty. */
7700 if (!(buf_state & BM_DIRTY))
7701 {
7704 result = true;
7705 BufferLockUnlock(buf, desc);
7706 }
7707 else
7708 *buffer_already_dirty = true;
7709
7710 UnpinBuffer(desc);
7711
7712 return result;
7713}
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3056

References Assert, BM_DIRTY, BM_LOCKED, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BufferLockAcquire(), BufferLockUnlock(), fb(), MarkBufferDirty(), pg_atomic_read_u64(), PinBuffer_Locked(), BufferDesc::state, UnlockBufHdr(), and UnpinBuffer().

Referenced by MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), and MarkDirtyUnpinnedBuffer().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 373 of file bufmgr.c.

374{
376
377 /* only allowed to be called when a reservation has been made */
379
380 /* use up the reserved entry */
382
383 /* and fill it */
385 res->buffer = buffer;
386 res->data.refcount = 0;
388
389 /* update cache for the next lookup */
391
393
394 return res;
395}

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, PrivateRefCountEntry::data, PrivateRefCountData::lockmode, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountEntryLast, PrivateRefCountData::refcount, and ReservedRefCountSlot.

Referenced by TrackNewBufferPin().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy,
bool  skip_if_not_valid 
)
static

Definition at line 3181 of file bufmgr.c.

3183{
3185 bool result;
3187
3190
3191 ref = GetPrivateRefCountEntry(b, true);
3192
3193 if (ref == NULL)
3194 {
3197
3199 for (;;)
3200 {
3202 return false;
3203
3204 /*
3205 * We're not allowed to increase the refcount while the buffer
3206 * header spinlock is held. Wait for the lock to be released.
3207 */
3210
3212
3213 /* increase refcount */
3215
3216 if (strategy == NULL)
3217 {
3218 /* Default case: increase usagecount unless already max. */
3221 }
3222 else
3223 {
3224 /*
3225 * Ring buffers shouldn't evict others from pool. Thus we
3226 * don't make usagecount more than 1.
3227 */
3230 }
3231
3233 buf_state))
3234 {
3235 result = (buf_state & BM_VALID) != 0;
3236
3238 break;
3239 }
3240 }
3241 }
3242 else
3243 {
3244 /*
3245 * If we previously pinned the buffer, it is likely to be valid, but
3246 * it may not be if StartReadBuffers() was called and
3247 * WaitReadBuffers() hasn't been called yet. We'll check by loading
3248 * the flags without locking. This is racy, but it's OK to return
3249 * false spuriously: when WaitReadBuffers() calls StartBufferIO(),
3250 * it'll see that it's now valid.
3251 *
3252 * Note: We deliberately avoid a Valgrind client request here.
3253 * Individual access methods can optionally superimpose buffer page
3254 * client requests on top of our client requests to enforce that
3255 * buffers are only accessed while locked (and pinned). It's possible
3256 * that the buffer page is legitimately non-accessible here. We
3257 * cannot meddle with that.
3258 */
3259 result = (pg_atomic_read_u64(&buf->state) & BM_VALID) != 0;
3260
3261 Assert(ref->data.refcount > 0);
3262 ref->data.refcount++;
3264 }
3265
3266 return result;
3267}
#define BM_MAX_USAGE_COUNT
#define BUF_STATE_GET_USAGECOUNT(state)
void TrackNewBufferPin(Buffer buf)
Definition bufmgr.c:3416

References Assert, b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, CurrentResourceOwner, fb(), GetPrivateRefCountEntry(), pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), ReservedRefCountSlot, ResourceOwnerRememberBuffer(), TrackNewBufferPin(), unlikely, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 3292 of file bufmgr.c.

3293{
3295
3296 /*
3297 * As explained, We don't expect any preexisting pins. That allows us to
3298 * manipulate the PrivateRefCount after releasing the spinlock
3299 */
3301
3302 /*
3303 * Since we hold the buffer spinlock, we can update the buffer state and
3304 * release the lock in one operation.
3305 */
3307
3309 0, 0, 1);
3310
3312}

References Assert, buf, BufferDescriptorGetBuffer(), fb(), GetPrivateRefCountEntry(), pg_atomic_read_u64(), TrackNewBufferPin(), and UnlockBufHdrExt().

Referenced by EvictUnpinnedBufferInternal(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), MarkDirtyUnpinnedBufferInternal(), and SyncOneBuffer().

◆ PinBufferForBlock()

static pg_attribute_always_inline Buffer PinBufferForBlock ( Relation  rel,
SMgrRelation  smgr,
char  persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1210 of file bufmgr.c.

1217{
1221
1222 Assert(blockNum != P_NEW);
1223
1224 /* Persistence should be set before */
1225 Assert((persistence == RELPERSISTENCE_TEMP ||
1226 persistence == RELPERSISTENCE_PERMANENT ||
1227 persistence == RELPERSISTENCE_UNLOGGED));
1228
1229 if (persistence == RELPERSISTENCE_TEMP)
1230 {
1233 }
1234 else
1235 {
1236 io_context = IOContextForStrategy(strategy);
1238 }
1239
1240 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1244 smgr->smgr_rlocator.backend);
1245
1246 if (persistence == RELPERSISTENCE_TEMP)
1247 {
1248 bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1249 if (*foundPtr)
1251 }
1252 else
1253 {
1254 bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1255 strategy, foundPtr, io_context);
1256 if (*foundPtr)
1258 }
1259 if (rel)
1260 {
1261 /*
1262 * While pgBufferUsage's "read" counter isn't bumped unless we reach
1263 * WaitReadBuffers() (so, not for hits, and not for buffers that are
1264 * zeroed instead), the per-relation stats always count them.
1265 */
1267 if (*foundPtr)
1269 }
1270 if (*foundPtr)
1271 {
1273 if (VacuumCostActive)
1275
1276 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1280 smgr->smgr_rlocator.backend,
1281 true);
1282 }
1283
1285}
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
Definition bufmgr.c:2100
#define P_NEW
Definition bufmgr.h:198
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition localbuf.c:119
#define pgstat_count_buffer_read(rel)
Definition pgstat.h:715

References Assert, RelFileLocatorBackend::backend, BufferAlloc(), BufferDescriptorGetBuffer(), RelFileLocator::dbOid, fb(), IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, BufferUsage::local_blks_hit, LocalBufferAlloc(), RelFileLocatorBackend::locator, P_NEW, pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_buffer_read, pgstat_count_io_op(), RelFileLocator::relNumber, BufferUsage::shared_blks_hit, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, VacuumCostActive, VacuumCostBalance, and VacuumCostPageHit.

Referenced by ReadBuffer_common(), and StartReadBuffersImpl().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 772 of file bufmgr.c.

773{
775 Assert(BlockNumberIsValid(blockNum));
776
778 {
779 /* see comments in ReadBufferExtended */
783 errmsg("cannot access temporary tables of other sessions")));
784
785 /* pass it off to localbuf.c */
786 return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
787 }
788 else
789 {
790 /* pass it to the shared buffer version */
791 return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
792 }
793}
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:682
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition localbuf.c:72
#define RELATION_IS_OTHER_TEMP(relation)
Definition rel.h:667
#define RelationIsValid(relation)
Definition rel.h:489

References Assert, BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, fb(), PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by count_nondeletable_pages(), invalidate_rel_block(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 682 of file bufmgr.c.

685{
686 PrefetchBufferResult result = {InvalidBuffer, false};
687 BufferTag newTag; /* identity of requested block */
688 uint32 newHash; /* hash value for newTag */
689 LWLock *newPartitionLock; /* buffer partition lock for it */
690 int buf_id;
691
692 Assert(BlockNumberIsValid(blockNum));
693
694 /* create a tag so we can lookup the buffer */
695 InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
696 forkNum, blockNum);
697
698 /* determine its hash code and partition lock ID */
701
702 /* see if the block is in the buffer pool already */
704 buf_id = BufTableLookup(&newTag, newHash);
706
707 /* If not in buffers, initiate prefetch */
708 if (buf_id < 0)
709 {
710#ifdef USE_PREFETCH
711 /*
712 * Try to initiate an asynchronous read. This returns false in
713 * recovery if the relation file doesn't exist.
714 */
715 if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
716 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
717 {
718 result.initiated_io = true;
719 }
720#endif /* USE_PREFETCH */
721 }
722 else
723 {
724 /*
725 * Report the buffer it was in at that time. The caller may be able
726 * to avoid a buffer table lookup, but it's not pinned and it must be
727 * rechecked!
728 */
729 result.recent_buffer = buf_id + 1;
730 }
731
732 /*
733 * If the block *is* in buffers, we do nothing. This is not really ideal:
734 * the block might be just about to be evicted, which would be stupid
735 * since we know we are going to need it soon. But the only easy answer
736 * is to bump the usage_count, which does not seem like a great solution:
737 * when the caller does ultimately touch the block, usage_count would get
738 * bumped again, resulting in too much favoritism for blocks that are
739 * involved in a prefetch sequence. A real fix would involve some
740 * additional per-buffer state, and it's not clear that there's enough of
741 * a problem to justify that.
742 */
743
744 return result;
745}
int io_direct_flags
Definition fd.c:171
#define IO_DIRECT_DATA
Definition fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition smgr.c:678
Buffer recent_buffer
Definition bufmgr.h:61

References Assert, BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), fb(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ProcessReadBuffersResult()

static void ProcessReadBuffersResult ( ReadBuffersOperation operation)
static

Definition at line 1693 of file bufmgr.c.

1694{
1695 PgAioReturn *aio_ret = &operation->io_return;
1697 int newly_read_blocks = 0;
1698
1699 Assert(pgaio_wref_valid(&operation->io_wref));
1700 Assert(aio_ret->result.status != PGAIO_RS_UNKNOWN);
1701
1702 /*
1703 * SMGR reports the number of blocks successfully read as the result of
1704 * the IO operation. Thus we can simply add that to ->nblocks_done.
1705 */
1706
1707 if (likely(rs != PGAIO_RS_ERROR))
1708 newly_read_blocks = aio_ret->result.result;
1709
1710 if (rs == PGAIO_RS_ERROR || rs == PGAIO_RS_WARNING)
1711 pgaio_result_report(aio_ret->result, &aio_ret->target_data,
1712 rs == PGAIO_RS_ERROR ? ERROR : WARNING);
1713 else if (aio_ret->result.status == PGAIO_RS_PARTIAL)
1714 {
1715 /*
1716 * We'll retry, so we just emit a debug message to the server log (or
1717 * not even that in prod scenarios).
1718 */
1719 pgaio_result_report(aio_ret->result, &aio_ret->target_data, DEBUG1);
1720 elog(DEBUG3, "partial read, will retry");
1721 }
1722
1725
1726 operation->nblocks_done += newly_read_blocks;
1727
1728 Assert(operation->nblocks_done <= operation->nblocks);
1729}
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition aio.c:971
PgAioResultStatus
Definition aio_types.h:79
@ PGAIO_RS_UNKNOWN
Definition aio_types.h:80
@ PGAIO_RS_PARTIAL
Definition aio_types.h:82
#define DEBUG3
Definition elog.h:28
PgAioResult result
Definition aio_types.h:132

References Assert, DEBUG1, DEBUG3, elog, ERROR, fb(), ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, likely, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, pgaio_result_report(), PGAIO_RS_ERROR, PGAIO_RS_PARTIAL, PGAIO_RS_UNKNOWN, PGAIO_RS_WARNING, pgaio_wref_valid(), PgAioReturn::result, PgAioResult::status, and WARNING.

Referenced by WaitReadBuffers().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 864 of file bufmgr.c.

865{
867}
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:911
@ RBM_NORMAL
Definition bufmgr.h:46

References fb(), MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static pg_attribute_always_inline Buffer ReadBuffer_common ( Relation  rel,
SMgrRelation  smgr,
char  smgr_persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
static

Definition at line 1293 of file bufmgr.c.

1297{
1298 ReadBuffersOperation operation;
1299 Buffer buffer;
1300 int flags;
1301 char persistence;
1302
1303 /*
1304 * Backward compatibility path, most code should use ExtendBufferedRel()
1305 * instead, as acquiring the extension lock inside ExtendBufferedRel()
1306 * scales a lot better.
1307 */
1308 if (unlikely(blockNum == P_NEW))
1309 {
1311
1312 /*
1313 * Since no-one else can be looking at the page contents yet, there is
1314 * no difference between an exclusive lock and a cleanup-strength
1315 * lock.
1316 */
1318 flags |= EB_LOCK_FIRST;
1319
1320 return ExtendBufferedRel(BMR_REL(rel), forkNum, strategy, flags);
1321 }
1322
1323 if (rel)
1324 persistence = rel->rd_rel->relpersistence;
1325 else
1326 persistence = smgr_persistence;
1327
1330 {
1331 bool found;
1332
1333 buffer = PinBufferForBlock(rel, smgr, persistence,
1334 forkNum, blockNum, strategy, &found);
1335 ZeroAndLockBuffer(buffer, mode, found);
1336 return buffer;
1337 }
1338
1339 /*
1340 * Signal that we are going to immediately wait. If we're immediately
1341 * waiting, there is no benefit in actually executing the IO
1342 * asynchronously, it would just add dispatch overhead.
1343 */
1345 if (mode == RBM_ZERO_ON_ERROR)
1347 operation.smgr = smgr;
1348 operation.rel = rel;
1349 operation.persistence = persistence;
1350 operation.forknum = forkNum;
1351 operation.strategy = strategy;
1352 if (StartReadBuffer(&operation,
1353 &buffer,
1354 blockNum,
1355 flags))
1356 WaitReadBuffers(&operation);
1357
1358 return buffer;
1359}
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition bufmgr.c:964
static void ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
Definition bufmgr.c:1131
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition bufmgr.c:1210
void WaitReadBuffers(ReadBuffersOperation *operation)
Definition bufmgr.c:1732
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Definition bufmgr.c:1608
@ RBM_ZERO_ON_ERROR
Definition bufmgr.h:51
#define BMR_REL(p_rel)
Definition bufmgr.h:114
Form_pg_class rd_rel
Definition rel.h:111

References BMR_REL, PrivateRefCountEntry::buffer, EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, ExtendBufferedRel(), fb(), ReadBuffersOperation::forknum, mode, P_NEW, ReadBuffersOperation::persistence, PinBufferForBlock(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelationData::rd_rel, READ_BUFFERS_SYNCHRONOUSLY, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, StartReadBuffer(), ReadBuffersOperation::strategy, unlikely, WaitReadBuffers(), and ZeroAndLockBuffer().

Referenced by ExtendBufferedRelTo(), ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 911 of file bufmgr.c.

913{
914 Buffer buf;
915
916 /*
917 * Reject attempts to read non-local temporary relations; we would be
918 * likely to get wrong data since we have no visibility into the owning
919 * session's local buffers.
920 */
924 errmsg("cannot access temporary tables of other sessions")));
925
926 /*
927 * Read the buffer, and update pgstat counters to reflect a cache hit or
928 * miss.
929 */
931 forkNum, blockNum, mode, strategy);
932
933 return buf;
934}

References buf, ereport, errcode(), errmsg(), ERROR, fb(), mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), bt_recheck_sibling_links(), btvacuumpage(), count_nondeletable_pages(), create_toy_buffer(), fsm_readbuf(), get_raw_page_internal(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), statapprox_heap(), and vm_readbuf().

◆ ReadBuffersCanStartIO()

static bool ReadBuffersCanStartIO ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1664 of file bufmgr.c.

1665{
1666 /*
1667 * If this backend currently has staged IO, we need to submit the pending
1668 * IO before waiting for the right to issue IO, to avoid the potential for
1669 * deadlocks (and, more commonly, unnecessary delays for other backends).
1670 */
1671 if (!nowait && pgaio_have_staged())
1672 {
1673 if (ReadBuffersCanStartIOOnce(buffer, true))
1674 return true;
1675
1676 /*
1677 * Unfortunately StartBufferIO() returning false doesn't allow to
1678 * distinguish between the buffer already being valid and IO already
1679 * being in progress. Since IO already being in progress is quite
1680 * rare, this approach seems fine.
1681 */
1683 }
1684
1685 return ReadBuffersCanStartIOOnce(buffer, nowait);
1686}
bool pgaio_have_staged(void)
Definition aio.c:1107
static bool ReadBuffersCanStartIOOnce(Buffer buffer, bool nowait)
Definition bufmgr.c:1651

References PrivateRefCountEntry::buffer, pgaio_have_staged(), pgaio_submit_staged(), and ReadBuffersCanStartIOOnce().

Referenced by AsyncReadBuffers().

◆ ReadBuffersCanStartIOOnce()

static bool ReadBuffersCanStartIOOnce ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1651 of file bufmgr.c.

1652{
1653 if (BufferIsLocal(buffer))
1654 return StartLocalBufferIO(GetLocalBufferDescriptor(-buffer - 1),
1655 true, nowait);
1656 else
1657 return StartBufferIO(GetBufferDescriptor(buffer - 1), true, nowait);
1658}
bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait)
Definition localbuf.c:523

References PrivateRefCountEntry::buffer, BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), StartBufferIO(), and StartLocalBufferIO().

Referenced by ReadBuffersCanStartIO().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 948 of file bufmgr.c.

951{
952 SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
953
954 return ReadBuffer_common(NULL, smgr,
956 forkNum, blockNum,
957 mode, strategy);
958}

References fb(), INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 803 of file bufmgr.c.

805{
807 BufferTag tag;
809
810 Assert(BufferIsValid(recent_buffer));
811
814 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
815
816 if (BufferIsLocal(recent_buffer))
817 {
818 int b = -recent_buffer - 1;
819
822
823 /* Is it still valid and holding the right tag? */
824 if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
825 {
826 PinLocalBuffer(bufHdr, true);
827
829
830 return true;
831 }
832 }
833 else
834 {
835 bufHdr = GetBufferDescriptor(recent_buffer - 1);
836
837 /*
838 * Is it still valid and holding the right tag? We do an unlocked tag
839 * comparison first, to make it unlikely that we'll increment the
840 * usage counter of the wrong buffer, if someone calls us with a very
841 * out of date recent_buffer. Then we'll check it again if we get the
842 * pin.
843 */
844 if (BufferTagsEqual(&tag, &bufHdr->tag) &&
845 PinBuffer(bufHdr, NULL, true))
846 {
847 if (BufferTagsEqual(&tag, &bufHdr->tag))
848 {
850 return true;
851 }
853 }
854 }
855
856 return false;
857}

References Assert, b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), InitBufferTag(), BufferUsage::local_blks_hit, pg_atomic_read_u64(), pgBufferUsage, PinBuffer(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, and UnpinBuffer().

Referenced by invalidate_rel_block(), and XLogReadBufferExtended().

◆ RelationCopyStorageUsingBuffer()

static void RelationCopyStorageUsingBuffer ( RelFileLocator  srclocator,
RelFileLocator  dstlocator,
ForkNumber  forkNum,
bool  permanent 
)
static

Definition at line 5263 of file bufmgr.c.

5266{
5267 Buffer srcBuf;
5268 Buffer dstBuf;
5269 Page srcPage;
5270 Page dstPage;
5271 bool use_wal;
5272 BlockNumber nblocks;
5273 BlockNumber blkno;
5280
5281 /*
5282 * In general, we want to write WAL whenever wal_level > 'minimal', but we
5283 * can skip it when copying any fork of an unlogged relation other than
5284 * the init fork.
5285 */
5286 use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
5287
5288 /* Get number of blocks in the source relation. */
5290 forkNum);
5291
5292 /* Nothing to copy; just return. */
5293 if (nblocks == 0)
5294 return;
5295
5296 /*
5297 * Bulk extend the destination relation of the same size as the source
5298 * relation before starting to copy block by block.
5299 */
5300 memset(buf.data, 0, BLCKSZ);
5301 smgrextend(smgropen(dstlocator, INVALID_PROC_NUMBER), forkNum, nblocks - 1,
5302 buf.data, true);
5303
5304 /* This is a bulk operation, so use buffer access strategies. */
5307
5308 /* Initialize streaming read */
5309 p.current_blocknum = 0;
5310 p.last_exclusive = nblocks;
5312
5313 /*
5314 * It is safe to use batchmode as block_range_read_stream_cb takes no
5315 * locks.
5316 */
5320 src_smgr,
5322 forkNum,
5324 &p,
5325 0);
5326
5327 /* Iterate over each block of the source relation file. */
5328 for (blkno = 0; blkno < nblocks; blkno++)
5329 {
5331
5332 /* Read block from source relation. */
5336
5340 permanent);
5342
5344
5345 /* Copy page data from the source to the destination. */
5348
5349 /* WAL-log the copied page. */
5350 if (use_wal)
5352
5354
5357 }
5360
5363}
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5518
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
Definition bufmgr.c:948
@ BAS_BULKREAD
Definition bufmgr.h:37
@ BAS_BULKWRITE
Definition bufmgr.h:39
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition freelist.c:643
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
ReadStream * read_stream_begin_smgr_relation(int flags, BufferAccessStrategy strategy, SMgrRelation smgr, char smgr_persistence, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
void read_stream_end(ReadStream *stream)
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define READ_STREAM_FULL
Definition read_stream.h:43
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition smgr.c:620
#define XLogIsNeeded()
Definition xlog.h:111
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)

References Assert, BAS_BULKREAD, BAS_BULKWRITE, block_range_read_stream_cb(), buf, BUFFER_LOCK_SHARE, BufferGetBlockNumber(), BufferGetPage(), CHECK_FOR_INTERRUPTS, BlockRangeReadStreamPrivate::current_blocknum, END_CRIT_SECTION, fb(), FreeAccessStrategy(), GetAccessStrategy(), INIT_FORKNUM, INVALID_PROC_NUMBER, InvalidBuffer, BlockRangeReadStreamPrivate::last_exclusive, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), RBM_ZERO_AND_LOCK, read_stream_begin_smgr_relation(), read_stream_end(), READ_STREAM_FULL, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, ReadBufferWithoutRelcache(), smgrextend(), smgrnblocks(), smgropen(), START_CRIT_SECTION, UnlockReleaseBuffer(), and XLogIsNeeded.

Referenced by CreateAndCopyRelationData().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 4572 of file bufmgr.c.

4573{
4574 if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
4575 {
4576 /*
4577 * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
4578 * tableam returns the size in bytes - but for the purpose of this
4579 * routine, we want the number of blocks. Therefore divide, rounding
4580 * up.
4581 */
4583
4584 szbytes = table_relation_size(relation, forkNum);
4585
4586 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
4587 }
4588 else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
4589 {
4590 return smgrnblocks(RelationGetSmgr(relation), forkNum);
4591 }
4592 else
4593 Assert(false);
4594
4595 return 0; /* keep compiler quiet */
4596}
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition tableam.h:1859

References Assert, fb(), RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 3121 of file bufmgr.c.

3124{
3125 ForkNumber forkNum = MAIN_FORKNUM;
3127
3128 if (BufferIsValid(buffer))
3129 {
3130 Assert(BufferIsPinned(buffer));
3131 if (BufferIsLocal(buffer))
3132 {
3133 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3134 if (bufHdr->tag.blockNum == blockNum &&
3135 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3136 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3137 return buffer;
3138 UnpinLocalBuffer(buffer);
3139 }
3140 else
3141 {
3142 bufHdr = GetBufferDescriptor(buffer - 1);
3143 /* we have pin, so it's ok to examine tag without spinlock */
3144 if (bufHdr->tag.blockNum == blockNum &&
3145 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3146 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3147 return buffer;
3149 }
3150 }
3151
3152 return ReadBuffer(relation, blockNum);
3153}
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:864

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), and heapam_index_fetch_tuple().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 5501 of file bufmgr.c.

5502{
5503 if (!BufferIsValid(buffer))
5504 elog(ERROR, "bad buffer ID: %d", buffer);
5505
5506 if (BufferIsLocal(buffer))
5507 UnpinLocalBuffer(buffer);
5508 else
5509 UnpinBuffer(GetBufferDescriptor(buffer - 1));
5510}

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), BitmapHeapScanNextBlock(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), buffer_create_toy(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndIndexOnlyScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_search(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), grow_rel(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_fetch_next_buffer(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgettup(), heapgettup_pagemode(), invalidate_rel_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), modify_rel_block(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pgstatindex_impl(), read_rel_block_ll(), read_stream_reset(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ ReservePrivateRefCountEntry()

static void ReservePrivateRefCountEntry ( void  )
static

Definition at line 293 of file bufmgr.c.

294{
295 /* Already reserved (or freed), nothing to do */
296 if (ReservedRefCountSlot != -1)
297 return;
298
299 /*
300 * First search for a free entry the array, that'll be sufficient in the
301 * majority of cases.
302 */
303 {
304 int i;
305
306 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
307 {
309 {
311
312 /*
313 * We could return immediately, but iterating till the end of
314 * the array allows compiler-autovectorization.
315 */
316 }
317 }
318
319 if (ReservedRefCountSlot != -1)
320 return;
321 }
322
323 /*
324 * No luck. All array entries are full. Move one array entry into the hash
325 * table.
326 */
327 {
328 /*
329 * Move entry from the current clock position in the array into the
330 * hashtable. Use that slot.
331 */
332 int victim_slot;
335 bool found;
336
337 /* select victim slot */
341
342 /* Better be used, otherwise we shouldn't get here. */
346
347 /* enter victim array entry into hashtable */
351 &found);
352 Assert(!found);
353 /* move data from the entry in the array to the hash entry */
354 hashent->data = victim_entry->data;
355
356 /* clear the now free array slot */
358 victim_entry->buffer = InvalidBuffer;
359
360 /* clear the whole data member, just for future proofing */
361 memset(&victim_entry->data, 0, sizeof(victim_entry->data));
362 victim_entry->data.refcount = 0;
363 victim_entry->data.lockmode = BUFFER_LOCK_UNLOCK;
364
366 }
367}
static uint32 PrivateRefCountClock
Definition bufmgr.c:251
@ HASH_ENTER
Definition hsearch.h:114

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, fb(), HASH_ENTER, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountClock, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountSlot.

Referenced by BufferAlloc(), EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetPrivateRefCountEntrySlow(), GetVictimBuffer(), MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), MarkDirtyUnpinnedBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ ResOwnerPrintBuffer()

static char * ResOwnerPrintBuffer ( Datum  res)
static

Definition at line 7470 of file bufmgr.c.

7471{
7473}
static int32 DatumGetInt32(Datum X)
Definition postgres.h:212

References DatumGetInt32(), and DebugPrintBufferRefcount().

◆ ResOwnerPrintBufferIO()

static char * ResOwnerPrintBufferIO ( Datum  res)
static

Definition at line 7420 of file bufmgr.c.

7421{
7422 Buffer buffer = DatumGetInt32(res);
7423
7424 return psprintf("lost track of buffer IO on buffer %d", buffer);
7425}

References PrivateRefCountEntry::buffer, DatumGetInt32(), and psprintf().

◆ ResOwnerReleaseBuffer()

static void ResOwnerReleaseBuffer ( Datum  res)
static

Definition at line 7434 of file bufmgr.c.

7435{
7436 Buffer buffer = DatumGetInt32(res);
7437
7438 /* Like ReleaseBuffer, but don't call ResourceOwnerForgetBuffer */
7439 if (!BufferIsValid(buffer))
7440 elog(ERROR, "bad buffer ID: %d", buffer);
7441
7442 if (BufferIsLocal(buffer))
7444 else
7445 {
7447
7448 ref = GetPrivateRefCountEntry(buffer, false);
7449
7450 /* not having a private refcount would imply resowner corruption */
7451 Assert(ref != NULL);
7452
7453 /*
7454 * If the buffer was locked at the time of the resowner release,
7455 * release the lock now. This should only happen after errors.
7456 */
7457 if (ref->data.lockmode != BUFFER_LOCK_UNLOCK)
7458 {
7459 BufferDesc *buf = GetBufferDescriptor(buffer - 1);
7460
7461 HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
7462 BufferLockUnlock(buffer, buf);
7463 }
7464
7466 }
7467}
static void UnpinBufferNoOwner(BufferDesc *buf)
Definition bufmgr.c:3369
void UnpinLocalBufferNoOwner(Buffer buffer)
Definition localbuf.c:848

References Assert, buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), BufferLockUnlock(), DatumGetInt32(), elog, ERROR, fb(), GetBufferDescriptor(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, UnpinBufferNoOwner(), and UnpinLocalBufferNoOwner().

◆ ResOwnerReleaseBufferIO()

static void ResOwnerReleaseBufferIO ( Datum  res)
static

Definition at line 7412 of file bufmgr.c.

7413{
7414 Buffer buffer = DatumGetInt32(res);
7415
7416 AbortBufferIO(buffer);
7417}
static void AbortBufferIO(Buffer buffer)
Definition bufmgr.c:7009

References AbortBufferIO(), PrivateRefCountEntry::buffer, and DatumGetInt32().

◆ rlocator_comparator()

static int rlocator_comparator ( const void p1,
const void p2 
)
static

Definition at line 7080 of file bufmgr.c.

7081{
7082 RelFileLocator n1 = *(const RelFileLocator *) p1;
7083 RelFileLocator n2 = *(const RelFileLocator *) p2;
7084
7085 if (n1.relNumber < n2.relNumber)
7086 return -1;
7087 else if (n1.relNumber > n2.relNumber)
7088 return 1;
7089
7090 if (n1.dbOid < n2.dbOid)
7091 return -1;
7092 else if (n1.dbOid > n2.dbOid)
7093 return 1;
7094
7095 if (n1.spcOid < n2.spcOid)
7096 return -1;
7097 else if (n1.spcOid > n2.spcOid)
7098 return 1;
7099 else
7100 return 0;
7101}

References fb().

Referenced by buffertag_comparator(), DropRelationsAllBuffers(), and FlushRelationsAllBuffers().

◆ ScheduleBufferTagForWriteback()

void ScheduleBufferTagForWriteback ( WritebackContext wb_context,
IOContext  io_context,
BufferTag tag 
)

Definition at line 7279 of file bufmgr.c.

7281{
7282 PendingWriteback *pending;
7283
7284 /*
7285 * As pg_flush_data() doesn't do anything with fsync disabled, there's no
7286 * point in tracking in that case.
7287 */
7289 !enableFsync)
7290 return;
7291
7292 /*
7293 * Add buffer to the pending writeback array, unless writeback control is
7294 * disabled.
7295 */
7296 if (*wb_context->max_pending > 0)
7297 {
7299
7300 pending = &wb_context->pending_writebacks[wb_context->nr_pending++];
7301
7302 pending->tag = *tag;
7303 }
7304
7305 /*
7306 * Perform pending flushes if the writeback limit is exceeded. This
7307 * includes the case where previously an item has been added, but control
7308 * is now disabled.
7309 */
7310 if (wb_context->nr_pending >= *wb_context->max_pending)
7312}
bool enableFsync
Definition globals.c:129
#define WRITEBACK_MAX_PENDING_FLUSHES

References Assert, enableFsync, fb(), IO_DIRECT_DATA, io_direct_flags, IssuePendingWritebacks(), PendingWriteback::tag, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by GetVictimBuffer(), and SyncOneBuffer().

◆ shared_buffer_readv_complete()

static PgAioResult shared_buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 8462 of file bufmgr.c.

8464{
8466}

References buffer_readv_complete(), and fb().

◆ shared_buffer_readv_complete_local()

static PgAioResult shared_buffer_readv_complete_local ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

◆ shared_buffer_readv_stage()

static void shared_buffer_readv_stage ( PgAioHandle ioh,
uint8  cb_data 
)
static

Definition at line 8456 of file bufmgr.c.

8457{
8458 buffer_stage_common(ioh, false, false);
8459}

References buffer_stage_common(), and fb().

◆ shared_buffer_write_error_callback()

static void shared_buffer_write_error_callback ( void arg)
static

Definition at line 7048 of file bufmgr.c.

7049{
7051
7052 /* Buffer is pinned, so we can read the tag without locking the spinlock */
7053 if (bufHdr != NULL)
7054 errcontext("writing block %u of relation \"%s\"",
7055 bufHdr->tag.blockNum,
7057 BufTagGetForkNum(&bufHdr->tag)).str);
7058}

References arg, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, fb(), and relpathperm.

Referenced by FlushBuffer().

◆ StartBufferIO()

bool StartBufferIO ( BufferDesc buf,
bool  forInput,
bool  nowait 
)

Definition at line 6889 of file bufmgr.c.

6890{
6892
6894
6895 for (;;)
6896 {
6898
6900 break;
6902 if (nowait)
6903 return false;
6904 WaitIO(buf);
6905 }
6906
6907 /* Once we get here, there is definitely no I/O active on this buffer */
6908
6909 /* Check if someone else already did the I/O */
6910 if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
6911 {
6913 return false;
6914 }
6915
6918 0);
6919
6922
6923 return true;
6924}
static void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)

References BM_DIRTY, BM_IO_IN_PROGRESS, BM_VALID, buf, BufferDescriptorGetBuffer(), CurrentResourceOwner, fb(), LockBufHdr(), ResourceOwnerEnlarge(), ResourceOwnerRememberBufferIO(), UnlockBufHdr(), UnlockBufHdrExt(), and WaitIO().

Referenced by buffer_call_start_io(), ExtendBufferedRelShared(), FlushBuffer(), read_rel_block_ll(), ReadBuffersCanStartIOOnce(), and ZeroAndLockBuffer().

◆ StartReadBuffer()

bool StartReadBuffer ( ReadBuffersOperation operation,
Buffer buffer,
BlockNumber  blocknum,
int  flags 
)

Definition at line 1608 of file bufmgr.c.

1612{
1613 int nblocks = 1;
1614 bool result;
1615
1616 result = StartReadBuffersImpl(operation, buffer, blocknum, &nblocks, flags,
1617 false /* single block, no forwarding */ );
1618 Assert(nblocks == 1); /* single block can't be short */
1619
1620 return result;
1621}
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
Definition bufmgr.c:1362

References Assert, PrivateRefCountEntry::buffer, and StartReadBuffersImpl().

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ StartReadBuffers()

bool StartReadBuffers ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int nblocks,
int  flags 
)

Definition at line 1589 of file bufmgr.c.

1594{
1595 return StartReadBuffersImpl(operation, buffers, blockNum, nblocks, flags,
1596 true /* expect forwarded buffers */ );
1597}

References StartReadBuffersImpl().

Referenced by read_stream_start_pending_read().

◆ StartReadBuffersImpl()

static pg_attribute_always_inline bool StartReadBuffersImpl ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int nblocks,
int  flags,
bool  allow_forwarding 
)
static

Definition at line 1362 of file bufmgr.c.

1368{
1369 int actual_nblocks = *nblocks;
1370 int maxcombine = 0;
1371 bool did_start_io;
1372
1373 Assert(*nblocks == 1 || allow_forwarding);
1374 Assert(*nblocks > 0);
1375 Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
1376
1377 for (int i = 0; i < actual_nblocks; ++i)
1378 {
1379 bool found;
1380
1381 if (allow_forwarding && buffers[i] != InvalidBuffer)
1382 {
1384
1385 /*
1386 * This is a buffer that was pinned by an earlier call to
1387 * StartReadBuffers(), but couldn't be handled in one operation at
1388 * that time. The operation was split, and the caller has passed
1389 * an already pinned buffer back to us to handle the rest of the
1390 * operation. It must continue at the expected block number.
1391 */
1392 Assert(BufferGetBlockNumber(buffers[i]) == blockNum + i);
1393
1394 /*
1395 * It might be an already valid buffer (a hit) that followed the
1396 * final contiguous block of an earlier I/O (a miss) marking the
1397 * end of it, or a buffer that some other backend has since made
1398 * valid by performing the I/O for us, in which case we can handle
1399 * it as a hit now. It is safe to check for a BM_VALID flag with
1400 * a relaxed load, because we got a fresh view of it while pinning
1401 * it in the previous call.
1402 *
1403 * On the other hand if we don't see BM_VALID yet, it must be an
1404 * I/O that was split by the previous call and we need to try to
1405 * start a new I/O from this block. We're also racing against any
1406 * other backend that might start the I/O or even manage to mark
1407 * it BM_VALID after this check, but StartBufferIO() will handle
1408 * those cases.
1409 */
1410 if (BufferIsLocal(buffers[i]))
1411 bufHdr = GetLocalBufferDescriptor(-buffers[i] - 1);
1412 else
1413 bufHdr = GetBufferDescriptor(buffers[i] - 1);
1415 found = pg_atomic_read_u64(&bufHdr->state) & BM_VALID;
1416 }
1417 else
1418 {
1419 buffers[i] = PinBufferForBlock(operation->rel,
1420 operation->smgr,
1421 operation->persistence,
1422 operation->forknum,
1423 blockNum + i,
1424 operation->strategy,
1425 &found);
1426 }
1427
1428 if (found)
1429 {
1430 /*
1431 * We have a hit. If it's the first block in the requested range,
1432 * we can return it immediately and report that WaitReadBuffers()
1433 * does not need to be called. If the initial value of *nblocks
1434 * was larger, the caller will have to call again for the rest.
1435 */
1436 if (i == 0)
1437 {
1438 *nblocks = 1;
1439
1440#ifdef USE_ASSERT_CHECKING
1441
1442 /*
1443 * Initialize enough of ReadBuffersOperation to make
1444 * CheckReadBuffersOperation() work. Outside of assertions
1445 * that's not necessary when no IO is issued.
1446 */
1447 operation->buffers = buffers;
1448 operation->blocknum = blockNum;
1449 operation->nblocks = 1;
1450 operation->nblocks_done = 1;
1451 CheckReadBuffersOperation(operation, true);
1452#endif
1453 return false;
1454 }
1455
1456 /*
1457 * Otherwise we already have an I/O to perform, but this block
1458 * can't be included as it is already valid. Split the I/O here.
1459 * There may or may not be more blocks requiring I/O after this
1460 * one, we haven't checked, but they can't be contiguous with this
1461 * one in the way. We'll leave this buffer pinned, forwarding it
1462 * to the next call, avoiding the need to unpin it here and re-pin
1463 * it in the next call.
1464 */
1465 actual_nblocks = i;
1466 break;
1467 }
1468 else
1469 {
1470 /*
1471 * Check how many blocks we can cover with the same IO. The smgr
1472 * implementation might e.g. be limited due to a segment boundary.
1473 */
1474 if (i == 0 && actual_nblocks > 1)
1475 {
1476 maxcombine = smgrmaxcombine(operation->smgr,
1477 operation->forknum,
1478 blockNum);
1480 {
1481 elog(DEBUG2, "limiting nblocks at %u from %u to %u",
1482 blockNum, actual_nblocks, maxcombine);
1484 }
1485 }
1486 }
1487 }
1488 *nblocks = actual_nblocks;
1489
1490 /* Populate information needed for I/O. */
1491 operation->buffers = buffers;
1492 operation->blocknum = blockNum;
1493 operation->flags = flags;
1494 operation->nblocks = actual_nblocks;
1495 operation->nblocks_done = 0;
1496 pgaio_wref_clear(&operation->io_wref);
1497
1498 /*
1499 * When using AIO, start the IO in the background. If not, issue prefetch
1500 * requests if desired by the caller.
1501 *
1502 * The reason we have a dedicated path for IOMETHOD_SYNC here is to
1503 * de-risk the introduction of AIO somewhat. It's a large architectural
1504 * change, with lots of chances for unanticipated performance effects.
1505 *
1506 * Use of IOMETHOD_SYNC already leads to not actually performing IO
1507 * asynchronously, but without the check here we'd execute IO earlier than
1508 * we used to. Eventually this IOMETHOD_SYNC specific path should go away.
1509 */
1510 if (io_method != IOMETHOD_SYNC)
1511 {
1512 /*
1513 * Try to start IO asynchronously. It's possible that no IO needs to
1514 * be started, if another backend already performed the IO.
1515 *
1516 * Note that if an IO is started, it might not cover the entire
1517 * requested range, e.g. because an intermediary block has been read
1518 * in by another backend. In that case any "trailing" buffers we
1519 * already pinned above will be "forwarded" by read_stream.c to the
1520 * next call to StartReadBuffers().
1521 *
1522 * This is signalled to the caller by decrementing *nblocks *and*
1523 * reducing operation->nblocks. The latter is done here, but not below
1524 * WaitReadBuffers(), as in WaitReadBuffers() we can't "shorten" the
1525 * overall read size anymore, we need to retry until done in its
1526 * entirety or until failed.
1527 */
1528 did_start_io = AsyncReadBuffers(operation, nblocks);
1529
1530 operation->nblocks = *nblocks;
1531 }
1532 else
1533 {
1534 operation->flags |= READ_BUFFERS_SYNCHRONOUSLY;
1535
1536 if (flags & READ_BUFFERS_ISSUE_ADVICE)
1537 {
1538 /*
1539 * In theory we should only do this if PinBufferForBlock() had to
1540 * allocate new buffers above. That way, if two calls to
1541 * StartReadBuffers() were made for the same blocks before
1542 * WaitReadBuffers(), only the first would issue the advice.
1543 * That'd be a better simulation of true asynchronous I/O, which
1544 * would only start the I/O once, but isn't done here for
1545 * simplicity.
1546 */
1547 smgrprefetch(operation->smgr,
1548 operation->forknum,
1549 blockNum,
1551 }
1552
1553 /*
1554 * Indicate that WaitReadBuffers() should be called. WaitReadBuffers()
1555 * will initiate the necessary IO.
1556 */
1557 did_start_io = true;
1558 }
1559
1561
1562 return did_start_io;
1563}
int io_method
Definition aio.c:74
@ IOMETHOD_SYNC
Definition aio.h:34
static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete)
Definition bufmgr.c:1627
static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
Definition bufmgr.c:1864
#define READ_BUFFERS_ISSUE_ADVICE
Definition bufmgr.h:124
uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition smgr.c:697

References Assert, AsyncReadBuffers(), ReadBuffersOperation::blocknum, BM_TAG_VALID, BM_VALID, BufferGetBlockNumber(), BufferIsLocal, ReadBuffersOperation::buffers, CheckReadBuffersOperation(), DEBUG2, elog, fb(), ReadBuffersOperation::flags, ReadBuffersOperation::forknum, GetBufferDescriptor(), GetLocalBufferDescriptor(), i, InvalidBuffer, io_method, ReadBuffersOperation::io_wref, IOMETHOD_SYNC, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, pg_atomic_read_u64(), pgaio_wref_clear(), PinBufferForBlock(), READ_BUFFERS_ISSUE_ADVICE, READ_BUFFERS_SYNCHRONOUSLY, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, smgrmaxcombine(), smgrprefetch(), ReadBuffersOperation::strategy, and unlikely.

Referenced by StartReadBuffer(), and StartReadBuffers().

◆ SyncOneBuffer()

static int SyncOneBuffer ( int  buf_id,
bool  skip_recently_used,
WritebackContext wb_context 
)
static

Definition at line 4033 of file bufmgr.c.

4034{
4036 int result = 0;
4038 BufferTag tag;
4039
4040 /* Make sure we can handle the pin */
4043
4044 /*
4045 * Check whether buffer needs writing.
4046 *
4047 * We can make this check without taking the buffer content lock so long
4048 * as we mark pages dirty in access methods *before* logging changes with
4049 * XLogInsert(): if someone marks the buffer dirty just after our check we
4050 * don't worry because our checkpoint.redo points before log record for
4051 * upcoming changes and so we are not required to write such dirty buffer.
4052 */
4054
4057 {
4058 result |= BUF_REUSABLE;
4059 }
4060 else if (skip_recently_used)
4061 {
4062 /* Caller told us not to write recently-used buffers */
4064 return result;
4065 }
4066
4067 if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
4068 {
4069 /* It's clean, so nothing to do */
4071 return result;
4072 }
4073
4074 /*
4075 * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
4076 * buffer is clean by the time we've locked it.)
4077 */
4079
4081
4082 tag = bufHdr->tag;
4083
4085
4086 /*
4087 * SyncOneBuffer() is only called by checkpointer and bgwriter, so
4088 * IOContext will always be IOCONTEXT_NORMAL.
4089 */
4091
4092 return result | BUF_WRITTEN;
4093}

References BM_DIRTY, BM_VALID, BUF_REUSABLE, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BUF_WRITTEN, CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), UnlockBufHdr(), and UnpinBuffer().

Referenced by BgBufferSync(), and BufferSync().

◆ TerminateBufferIO()

void TerminateBufferIO ( BufferDesc buf,
bool  clear_dirty,
uint64  set_flag_bits,
bool  forget_owner,
bool  release_aio 
)

Definition at line 6947 of file bufmgr.c.

6949{
6952 int refcount_change = 0;
6953
6955
6958
6959 /* Clear earlier errors, if this IO failed, it'll be marked again */
6961
6964
6965 if (release_aio)
6966 {
6967 /* release ownership by the AIO subsystem */
6969 refcount_change = -1;
6970 pgaio_wref_clear(&buf->io_wref);
6971 }
6972
6976
6977 if (forget_owner)
6980
6982
6983 /*
6984 * Support LockBufferForCleanup()
6985 *
6986 * We may have just released the last pin other than the waiter's. In most
6987 * cases, this backend holds another pin on the buffer. But, if, for
6988 * example, this backend is completing an IO issued by another backend, it
6989 * may be time to wake the waiter.
6990 */
6993}
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
static void WakePinCountWaiter(BufferDesc *buf)
Definition bufmgr.c:3324
void ConditionVariableBroadcast(ConditionVariable *cv)

References Assert, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_JUST_DIRTIED, BM_PIN_COUNT_WAITER, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetIOCV(), ConditionVariableBroadcast(), CurrentResourceOwner, fb(), LockBufHdr(), pgaio_wref_clear(), ResourceOwnerForgetBufferIO(), UnlockBufHdrExt(), and WakePinCountWaiter().

Referenced by AbortBufferIO(), buffer_call_terminate_io(), buffer_readv_complete_one(), ExtendBufferedRelShared(), FlushBuffer(), and ZeroAndLockBuffer().

◆ TrackNewBufferPin()

void TrackNewBufferPin ( Buffer  buf)
inline

Definition at line 3416 of file bufmgr.c.

3417{
3419
3421 ref->data.refcount++;
3422
3424
3425 /*
3426 * This is the first pin for this page by this backend, mark its page as
3427 * defined to valgrind. While the page contents might not actually be
3428 * valid yet, we don't currently guarantee that such pages are marked
3429 * undefined or non-accessible.
3430 *
3431 * It's not necessarily the prettiest to do this here, but otherwise we'd
3432 * need this block of code in multiple places.
3433 */
3435 BLCKSZ);
3436}
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition bufmgr.c:373

References buf, BufHdrGetBlock, CurrentResourceOwner, fb(), GetBufferDescriptor(), NewPrivateRefCountEntry(), ResourceOwnerRememberBuffer(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by GetBufferFromRing(), PinBuffer(), PinBuffer_Locked(), and StrategyGetBuffer().

◆ ts_ckpt_progress_comparator()

static int ts_ckpt_progress_comparator ( Datum  a,
Datum  b,
void arg 
)
static

Definition at line 7244 of file bufmgr.c.

7245{
7248
7249 /* we want a min-heap, so return 1 for the a < b */
7250 if (sa->progress < sb->progress)
7251 return 1;
7252 else if (sa->progress == sb->progress)
7253 return 0;
7254 else
7255 return -1;
7256}

References a, b, DatumGetPointer(), and fb().

Referenced by BufferSync().

◆ UnlockBuffer()

void UnlockBuffer ( Buffer  buffer)

Definition at line 6415 of file bufmgr.c.

6416{
6418
6419 Assert(BufferIsPinned(buffer));
6420 if (BufferIsLocal(buffer))
6421 return; /* local buffers need no lock */
6422
6423 buf_hdr = GetBufferDescriptor(buffer - 1);
6424 BufferLockUnlock(buffer, buf_hdr);
6425}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockUnlock(), fb(), and GetBufferDescriptor().

Referenced by LockBuffer().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 5709 of file bufmgr.c.

5710{
5712
5713 if (buf)
5714 {
5716 uint64 unset_bits = 0;
5717
5719
5720 /*
5721 * Don't complain if flag bit not set; it could have been reset but we
5722 * got a cancel/die interrupt before getting the signal.
5723 */
5724 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5725 buf->wait_backend_pgprocno == MyProcNumber)
5727
5729 0, unset_bits,
5730 0);
5731
5733 }
5734}

References BM_PIN_COUNT_WAITER, buf, fb(), LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdrExt().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 5518 of file bufmgr.c.

5519{
5521 ReleaseBuffer(buffer);
5522}

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), gin_refind_parent(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_get_sequence_data(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ UnpinBuffer()

◆ UnpinBufferNoOwner()

static void UnpinBufferNoOwner ( BufferDesc buf)
static

Definition at line 3369 of file bufmgr.c.

3370{
3373
3375
3376 /* not moving as we're likely deleting it soon anyway */
3377 ref = GetPrivateRefCountEntry(b, false);
3378 Assert(ref != NULL);
3379 Assert(ref->data.refcount > 0);
3380 ref->data.refcount--;
3381 if (ref->data.refcount == 0)
3382 {
3384
3385 /*
3386 * Mark buffer non-accessible to Valgrind.
3387 *
3388 * Note that the buffer may have already been marked non-accessible
3389 * within access method code that enforces that buffers are only
3390 * accessed while a buffer lock is held.
3391 */
3393
3394 /*
3395 * I'd better not still hold the buffer content lock. Can't use
3396 * BufferIsLockedByMe(), as that asserts the buffer is pinned.
3397 */
3399
3400 /* decrement the shared reference count */
3402
3403 /* Support LockBufferForCleanup() */
3406
3408 }
3409}
static uint64 pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
Definition atomics.h:541
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
Definition bufmgr.c:551

References Assert, b, BM_PIN_COUNT_WAITER, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, BufferLockHeldByMe(), BufHdrGetBlock, fb(), ForgetPrivateRefCountEntry(), GetPrivateRefCountEntry(), pg_atomic_fetch_sub_u64(), VALGRIND_MAKE_MEM_NOACCESS, and WakePinCountWaiter().

Referenced by ResOwnerReleaseBuffer(), and UnpinBuffer().

◆ WaitBufHdrUnlocked()

pg_noinline uint64 WaitBufHdrUnlocked ( BufferDesc buf)

◆ WaitIO()

static void WaitIO ( BufferDesc buf)
static

Definition at line 6810 of file bufmgr.c.

6811{
6813
6815 for (;;)
6816 {
6819
6820 /*
6821 * It may not be necessary to acquire the spinlock to check the flag
6822 * here, but since this test is essential for correctness, we'd better
6823 * play it safe.
6824 */
6826
6827 /*
6828 * Copy the wait reference while holding the spinlock. This protects
6829 * against a concurrent TerminateBufferIO() in another backend from
6830 * clearing the wref while it's being read.
6831 */
6832 iow = buf->io_wref;
6834
6835 /* no IO in progress, we don't need to wait */
6837 break;
6838
6839 /*
6840 * The buffer has asynchronous IO in progress, wait for it to
6841 * complete.
6842 */
6843 if (pgaio_wref_valid(&iow))
6844 {
6846
6847 /*
6848 * The AIO subsystem internally uses condition variables and thus
6849 * might remove this backend from the BufferDesc's CV. While that
6850 * wouldn't cause a correctness issue (the first CV sleep just
6851 * immediately returns if not already registered), it seems worth
6852 * avoiding unnecessary loop iterations, given that we take care
6853 * to do so at the start of the function.
6854 */
6856 continue;
6857 }
6858
6859 /* wait on BufferDesc->cv, e.g. for concurrent synchronous IO */
6861 }
6863}
void pgaio_wref_wait(PgAioWaitRef *iow)
Definition aio.c:991
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)

References BM_IO_IN_PROGRESS, buf, BufferDescriptorGetIOCV(), ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), fb(), LockBufHdr(), pgaio_wref_valid(), pgaio_wref_wait(), and UnlockBufHdr().

Referenced by InvalidateBuffer(), and StartBufferIO().

◆ WaitReadBuffers()

void WaitReadBuffers ( ReadBuffersOperation operation)

Definition at line 1732 of file bufmgr.c.

1733{
1734 PgAioReturn *aio_ret = &operation->io_return;
1737
1738 if (operation->persistence == RELPERSISTENCE_TEMP)
1739 {
1742 }
1743 else
1744 {
1747 }
1748
1749 /*
1750 * If we get here without an IO operation having been issued, the
1751 * io_method == IOMETHOD_SYNC path must have been used. Otherwise the
1752 * caller should not have called WaitReadBuffers().
1753 *
1754 * In the case of IOMETHOD_SYNC, we start - as we used to before the
1755 * introducing of AIO - the IO in WaitReadBuffers(). This is done as part
1756 * of the retry logic below, no extra code is required.
1757 *
1758 * This path is expected to eventually go away.
1759 */
1760 if (!pgaio_wref_valid(&operation->io_wref) && io_method != IOMETHOD_SYNC)
1761 elog(ERROR, "waiting for read operation that didn't read");
1762
1763 /*
1764 * To handle partial reads, and IOMETHOD_SYNC, we re-issue IO until we're
1765 * done. We may need multiple retries, not just because we could get
1766 * multiple partial reads, but also because some of the remaining
1767 * to-be-read buffers may have been read in by other backends, limiting
1768 * the IO size.
1769 */
1770 while (true)
1771 {
1773
1774 CheckReadBuffersOperation(operation, false);
1775
1776 /*
1777 * If there is an IO associated with the operation, we may need to
1778 * wait for it.
1779 */
1780 if (pgaio_wref_valid(&operation->io_wref))
1781 {
1782 /*
1783 * Track the time spent waiting for the IO to complete. As
1784 * tracking a wait even if we don't actually need to wait
1785 *
1786 * a) is not cheap, due to the timestamping overhead
1787 *
1788 * b) reports some time as waiting, even if we never waited
1789 *
1790 * we first check if we already know the IO is complete.
1791 */
1792 if (aio_ret->result.status == PGAIO_RS_UNKNOWN &&
1793 !pgaio_wref_check_done(&operation->io_wref))
1794 {
1796
1797 pgaio_wref_wait(&operation->io_wref);
1798
1799 /*
1800 * The IO operation itself was already counted earlier, in
1801 * AsyncReadBuffers(), this just accounts for the wait time.
1802 */
1804 io_start, 0, 0);
1805 }
1806 else
1807 {
1808 Assert(pgaio_wref_check_done(&operation->io_wref));
1809 }
1810
1811 /*
1812 * We now are sure the IO completed. Check the results. This
1813 * includes reporting on errors if there were any.
1814 */
1815 ProcessReadBuffersResult(operation);
1816 }
1817
1818 /*
1819 * Most of the time, the one IO we already started, will read in
1820 * everything. But we need to deal with partial reads and buffers not
1821 * needing IO anymore.
1822 */
1823 if (operation->nblocks_done == operation->nblocks)
1824 break;
1825
1827
1828 /*
1829 * This may only complete the IO partially, either because some
1830 * buffers were already valid, or because of a partial read.
1831 *
1832 * NB: In contrast to after the AsyncReadBuffers() call in
1833 * StartReadBuffers(), we do *not* reduce
1834 * ReadBuffersOperation->nblocks here, callers expect the full
1835 * operation to be completed at this point (as more operations may
1836 * have been queued).
1837 */
1839 }
1840
1841 CheckReadBuffersOperation(operation, true);
1842
1843 /* NB: READ_DONE tracepoint was already executed in completion callback */
1844}
bool pgaio_wref_check_done(PgAioWaitRef *iow)
Definition aio.c:1005
static void ProcessReadBuffersResult(ReadBuffersOperation *operation)
Definition bufmgr.c:1693

References Assert, AsyncReadBuffers(), CHECK_FOR_INTERRUPTS, CheckReadBuffersOperation(), elog, ERROR, fb(), io_method, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOMETHOD_SYNC, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_READ, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_RS_UNKNOWN, pgaio_wref_check_done(), pgaio_wref_valid(), pgaio_wref_wait(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), ProcessReadBuffersResult(), ReadBuffersOperation::strategy, and track_io_timing.

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ WakePinCountWaiter()

static void WakePinCountWaiter ( BufferDesc buf)
static

Definition at line 3324 of file bufmgr.c.

3325{
3326 /*
3327 * Acquire the buffer header lock, re-check that there's a waiter. Another
3328 * backend could have unpinned this buffer, and already woken up the
3329 * waiter.
3330 *
3331 * There's no danger of the buffer being replaced after we unpinned it
3332 * above, as it's pinned by the waiter. The waiter removes
3333 * BM_PIN_COUNT_WAITER if it stops waiting for a reason other than this
3334 * backend waking it up.
3335 */
3337
3340 {
3341 /* we just released the last pin other than the waiter's */
3342 int wait_backend_pgprocno = buf->wait_backend_pgprocno;
3343
3346 0);
3347 ProcSendSignal(wait_backend_pgprocno);
3348 }
3349 else
3351}
void ProcSendSignal(ProcNumber procNumber)
Definition proc.c:1991

References BM_PIN_COUNT_WAITER, buf, BUF_STATE_GET_REFCOUNT, fb(), LockBufHdr(), ProcSendSignal(), UnlockBufHdr(), and UnlockBufHdrExt().

Referenced by TerminateBufferIO(), and UnpinBufferNoOwner().

◆ WritebackContextInit()

void WritebackContextInit ( WritebackContext context,
int max_pending 
)

Definition at line 7267 of file bufmgr.c.

7268{
7269 Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES);
7270
7271 context->max_pending = max_pending;
7272 context->nr_pending = 0;
7273}

References Assert, WritebackContext::max_pending, WritebackContext::nr_pending, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by BackgroundWriterMain(), BufferManagerShmemInit(), and BufferSync().

◆ ZeroAndLockBuffer()

static void ZeroAndLockBuffer ( Buffer  buffer,
ReadBufferMode  mode,
bool  already_valid 
)
static

Definition at line 1131 of file bufmgr.c.

1132{
1134 bool need_to_zero;
1135 bool isLocalBuf = BufferIsLocal(buffer);
1136
1138
1139 if (already_valid)
1140 {
1141 /*
1142 * If the caller already knew the buffer was valid, we can skip some
1143 * header interaction. The caller just wants to lock the buffer.
1144 */
1145 need_to_zero = false;
1146 }
1147 else if (isLocalBuf)
1148 {
1149 /* Simple case for non-shared buffers. */
1150 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1151 need_to_zero = StartLocalBufferIO(bufHdr, true, false);
1152 }
1153 else
1154 {
1155 /*
1156 * Take BM_IO_IN_PROGRESS, or discover that BM_VALID has been set
1157 * concurrently. Even though we aren't doing I/O, that ensures that
1158 * we don't zero a page that someone else has pinned. An exclusive
1159 * content lock wouldn't be enough, because readers are allowed to
1160 * drop the content lock after determining that a tuple is visible
1161 * (see buffer access rules in README).
1162 */
1163 bufHdr = GetBufferDescriptor(buffer - 1);
1164 need_to_zero = StartBufferIO(bufHdr, true, false);
1165 }
1166
1167 if (need_to_zero)
1168 {
1169 memset(BufferGetPage(buffer), 0, BLCKSZ);
1170
1171 /*
1172 * Grab the buffer content lock before marking the page as valid, to
1173 * make sure that no other backend sees the zeroed page before the
1174 * caller has had a chance to initialize it.
1175 *
1176 * Since no-one else can be looking at the page contents yet, there is
1177 * no difference between an exclusive lock and a cleanup-strength
1178 * lock. (Note that we cannot use LockBuffer() or
1179 * LockBufferForCleanup() here, because they assert that the buffer is
1180 * already valid.)
1181 */
1182 if (!isLocalBuf)
1184
1185 /* Set BM_VALID, terminate IO, and wake up any waiters */
1186 if (isLocalBuf)
1187 TerminateLocalBufferIO(bufHdr, false, BM_VALID, false);
1188 else
1189 TerminateBufferIO(bufHdr, false, BM_VALID, true, false);
1190 }
1191 else if (!isLocalBuf)
1192 {
1193 /*
1194 * The buffer is valid, so we can't zero it. The caller still expects
1195 * the page to be locked on return.
1196 */
1197 if (mode == RBM_ZERO_AND_LOCK)
1199 else
1200 LockBufferForCleanup(buffer);
1201 }
1202}
void LockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6527

References Assert, BM_VALID, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), BufferIsLocal, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), LockBuffer(), LockBufferForCleanup(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, StartBufferIO(), StartLocalBufferIO(), TerminateBufferIO(), and TerminateLocalBufferIO().

Referenced by ReadBuffer_common().

Variable Documentation

◆ aio_local_buffer_readv_cb

const PgAioHandleCallbacks aio_local_buffer_readv_cb
Initial value:
= {
.complete_local = local_buffer_readv_complete,
}
static PgAioResult local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8513
static void local_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
Definition bufmgr.c:8507
static void buffer_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
Definition bufmgr.c:8361

Definition at line 8529 of file bufmgr.c.

8529 {
8530 .stage = local_buffer_readv_stage,
8531
8532 /*
8533 * Note that this, in contrast to the shared_buffers case, uses
8534 * complete_local, as only the issuing backend has access to the required
8535 * datastructures. This is important in case the IO completion may be
8536 * consumed incidentally by another backend.
8537 */
8538 .complete_local = local_buffer_readv_complete,
8539 .report = buffer_readv_report,
8540};

◆ aio_shared_buffer_readv_cb

const PgAioHandleCallbacks aio_shared_buffer_readv_cb
Initial value:
= {
.complete_shared = shared_buffer_readv_complete,
}
static PgAioResult shared_buffer_readv_complete_local(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8476
static void shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
Definition bufmgr.c:8456
static PgAioResult shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8462

Definition at line 8520 of file bufmgr.c.

8520 {
8522 .complete_shared = shared_buffer_readv_complete,
8523 /* need a local callback to report checksum failures */
8524 .complete_local = shared_buffer_readv_complete_local,
8525 .report = buffer_readv_report,
8526};

◆ backend_flush_after

int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER

Definition at line 209 of file bufmgr.c.

Referenced by BufferManagerShmemInit().

◆ bgwriter_flush_after

int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER

Definition at line 208 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

int bgwriter_lru_maxpages = 100

Definition at line 174 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

double bgwriter_lru_multiplier = 2.0

Definition at line 175 of file bufmgr.c.

Referenced by BgBufferSync().

◆ buffer_io_resowner_desc

const ResourceOwnerDesc buffer_io_resowner_desc
Initial value:
=
{
.name = "buffer io",
.release_priority = RELEASE_PRIO_BUFFER_IOS,
.ReleaseResource = ResOwnerReleaseBufferIO,
.DebugPrint = ResOwnerPrintBufferIO
}
static void ResOwnerReleaseBufferIO(Datum res)
Definition bufmgr.c:7412
static char * ResOwnerPrintBufferIO(Datum res)
Definition bufmgr.c:7420
#define RELEASE_PRIO_BUFFER_IOS
Definition resowner.h:62
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition resowner.h:54

Definition at line 269 of file bufmgr.c.

270{
271 .name = "buffer io",
272 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
273 .release_priority = RELEASE_PRIO_BUFFER_IOS,
274 .ReleaseResource = ResOwnerReleaseBufferIO,
275 .DebugPrint = ResOwnerPrintBufferIO
276};

Referenced by ResourceOwnerForgetBufferIO(), and ResourceOwnerRememberBufferIO().

◆ buffer_resowner_desc

const ResourceOwnerDesc buffer_resowner_desc
Initial value:
=
{
.name = "buffer",
.release_priority = RELEASE_PRIO_BUFFER_PINS,
.ReleaseResource = ResOwnerReleaseBuffer,
.DebugPrint = ResOwnerPrintBuffer
}
static void ResOwnerReleaseBuffer(Datum res)
Definition bufmgr.c:7434
static char * ResOwnerPrintBuffer(Datum res)
Definition bufmgr.c:7470
#define RELEASE_PRIO_BUFFER_PINS
Definition resowner.h:63

Definition at line 278 of file bufmgr.c.

279{
280 .name = "buffer",
281 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
282 .release_priority = RELEASE_PRIO_BUFFER_PINS,
283 .ReleaseResource = ResOwnerReleaseBuffer,
284 .DebugPrint = ResOwnerPrintBuffer
285};

Referenced by ResourceOwnerForgetBuffer(), and ResourceOwnerRememberBuffer().

◆ checkpoint_flush_after

int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER

Definition at line 207 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

◆ io_combine_limit

◆ io_combine_limit_guc

int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT

Definition at line 200 of file bufmgr.c.

Referenced by assign_io_max_combine_limit().

◆ io_max_combine_limit

◆ maintenance_io_concurrency

◆ MaxProportionalPins

uint32 MaxProportionalPins
static

Definition at line 255 of file bufmgr.c.

Referenced by GetAdditionalPinLimit(), GetPinLimit(), and InitBufferManagerAccess().

◆ PinCountWaitBuf

BufferDesc* PinCountWaitBuf = NULL
static

Definition at line 212 of file bufmgr.c.

Referenced by LockBufferForCleanup(), and UnlockBuffers().

◆ PrivateRefCountArray

◆ PrivateRefCountArrayKeys

◆ PrivateRefCountClock

uint32 PrivateRefCountClock = 0
static

Definition at line 251 of file bufmgr.c.

Referenced by ReservePrivateRefCountEntry().

◆ PrivateRefCountEntryLast

int PrivateRefCountEntryLast = -1
static

◆ PrivateRefCountHash

◆ PrivateRefCountOverflowed

◆ ReservedRefCountSlot

int ReservedRefCountSlot = -1
static

◆ track_io_timing

◆ zero_damaged_pages

bool zero_damaged_pages = false

Definition at line 173 of file bufmgr.c.

Referenced by AsyncReadBuffers(), mdreadv(), and read_rel_block_ll().