PostgreSQL Source Code git master
Loading...
Searching...
No Matches
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/aio.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/proclist.h"
#include "storage/procsignal.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include "lib/sort_template.h"
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountData
 
struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define READV_COUNT_BITS   7
 
#define READV_COUNT_MASK   ((1 << READV_COUNT_BITS) - 1)
 

Typedefs

typedef struct PrivateRefCountData PrivateRefCountData
 
typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static void ResOwnerReleaseBufferIO (Datum res)
 
static charResOwnerPrintBufferIO (Datum res)
 
static void ResOwnerReleaseBuffer (Datum res)
 
static charResOwnerPrintBuffer (Datum res)
 
static pg_noinline PrivateRefCountEntryGetPrivateRefCountEntrySlow (Buffer buffer, bool do_move)
 
static Buffer ReadBuffer_common (Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static BlockNumber ExtendBufferedRelCommon (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static BlockNumber ExtendBufferedRelShared (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void UnpinBufferNoOwner (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static void AbortBufferIO (Buffer buffer)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
 
static bool AsyncReadBuffers (ReadBuffersOperation *operation, int *nblocks_progress)
 
static void CheckReadBuffersOperation (ReadBuffersOperation *operation, bool is_complete)
 
static Buffer GetVictimBuffer (BufferAccessStrategy strategy, IOContext io_context)
 
static void FlushUnlockedBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
static void BufferLockAcquire (Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockUnlock (Buffer buffer, BufferDesc *buf_hdr)
 
static bool BufferLockConditional (Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
 
static bool BufferLockHeldByMeInMode (BufferDesc *buf_hdr, BufferLockMode mode)
 
static bool BufferLockHeldByMe (BufferDesc *buf_hdr)
 
static void BufferLockDisown (Buffer buffer, BufferDesc *buf_hdr)
 
static int BufferLockDisownInternal (Buffer buffer, BufferDesc *buf_hdr)
 
static bool BufferLockAttempt (BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockQueueSelf (BufferDesc *buf_hdr, BufferLockMode mode)
 
static void BufferLockDequeueSelf (BufferDesc *buf_hdr)
 
static void BufferLockWakeup (BufferDesc *buf_hdr, bool unlocked)
 
static void BufferLockProcessRelease (BufferDesc *buf_hdr, BufferLockMode mode, uint64 lockstate)
 
static uint64 BufferLockReleaseSub (BufferLockMode mode)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
static void ZeroAndLockBuffer (Buffer buffer, ReadBufferMode mode, bool already_valid)
 
static pg_attribute_always_inline Buffer PinBufferForBlock (Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static pg_attribute_always_inline bool StartReadBuffersImpl (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
static bool ReadBuffersCanStartIOOnce (Buffer buffer, bool nowait)
 
static bool ReadBuffersCanStartIO (Buffer buffer, bool nowait)
 
static void ProcessReadBuffersResult (ReadBuffersOperation *operation)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
static void InvalidateBuffer (BufferDesc *buf)
 
static bool InvalidateVictimBuffer (BufferDesc *buf_hdr)
 
uint32 GetPinLimit (void)
 
uint32 GetAdditionalPinLimit (void)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
bool BufferIsLockedByMe (Buffer buffer)
 
bool BufferIsLockedByMeInMode (Buffer buffer, BufferLockMode mode)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
static void WakePinCountWaiter (BufferDesc *buf)
 
void TrackNewBufferPin (Buffer buf)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferManagerAccess (void)
 
charDebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void UnlockBuffer (Buffer buffer)
 
void LockBufferInternal (Buffer buffer, BufferLockMode mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
bool StartBufferIO (BufferDesc *buf, bool forInput, bool nowait)
 
void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint64 set_flag_bits, bool forget_owner, bool release_aio)
 
uint64 LockBufHdr (BufferDesc *desc)
 
pg_noinline uint64 WaitBufHdrUnlocked (BufferDesc *buf)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *wb_context, IOContext io_context)
 
static bool EvictUnpinnedBufferInternal (BufferDesc *desc, bool *buffer_flushed)
 
bool EvictUnpinnedBuffer (Buffer buf, bool *buffer_flushed)
 
void EvictAllUnpinnedBuffers (int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
void EvictRelUnpinnedBuffers (Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
static bool MarkDirtyUnpinnedBufferInternal (Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
 
bool MarkDirtyUnpinnedBuffer (Buffer buf, bool *buffer_already_dirty)
 
void MarkDirtyRelUnpinnedBuffers (Relation rel, int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
void MarkDirtyAllUnpinnedBuffers (int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
static pg_attribute_always_inline void buffer_stage_common (PgAioHandle *ioh, bool is_write, bool is_temp)
 
static void buffer_readv_decode_error (PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
 
static void buffer_readv_encode_error (PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
 
static pg_attribute_always_inline void buffer_readv_complete_one (PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
 
static pg_attribute_always_inline PgAioResult buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
 
static void buffer_readv_report (PgAioResult result, const PgAioTargetData *td, int elevel)
 
static void shared_buffer_readv_stage (PgAioHandle *ioh, uint8 cb_data)
 
static PgAioResult shared_buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 
static PgAioResult shared_buffer_readv_complete_local (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 
static void local_buffer_readv_stage (PgAioHandle *ioh, uint8 cb_data)
 
static PgAioResult local_buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int io_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT
 
int io_max_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescPinCountWaitBuf = NULL
 
static Buffer PrivateRefCountArrayKeys [REFCOUNT_ARRAY_ENTRIES]
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static int ReservedRefCountSlot = -1
 
static int PrivateRefCountEntryLast = -1
 
static uint32 MaxProportionalPins
 
const ResourceOwnerDesc buffer_io_resowner_desc
 
const ResourceOwnerDesc buffer_resowner_desc
 
const PgAioHandleCallbacks aio_shared_buffer_readv_cb
 
const PgAioHandleCallbacks aio_local_buffer_readv_cb
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 93 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 83 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 82 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 75 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
false \
: \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition bufmgr.c:529
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
int32 * LocalRefCount
Definition localbuf.c:49
static int fb(int x)

Definition at line 590 of file bufmgr.c.

594 : \
596 (LocalRefCount[-(bufnum) - 1] > 0) \
597 : \
599)

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 74 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 78 of file bufmgr.c.

◆ READV_COUNT_BITS

#define READV_COUNT_BITS   7

◆ READV_COUNT_MASK

#define READV_COUNT_MASK   ((1 << READV_COUNT_BITS) - 1)

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 130 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 85 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 3441 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 3441 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 3443 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 3443 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 3440 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 3440 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 3442 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 3442 of file bufmgr.c.

◆ ST_SORT [1/2]

Definition at line 3439 of file bufmgr.c.

◆ ST_SORT [2/2]

Definition at line 3439 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

◆ PrivateRefCountData

◆ PrivateRefCountEntry

◆ SMgrSortArray

Function Documentation

◆ AbortBufferIO()

static void AbortBufferIO ( Buffer  buffer)
static

Definition at line 7010 of file bufmgr.c.

7011{
7012 BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1);
7014
7017
7018 if (!(buf_state & BM_VALID))
7019 {
7022 }
7023 else
7024 {
7027
7028 /* Issue notice if this is not the first failure... */
7029 if (buf_state & BM_IO_ERROR)
7030 {
7031 /* Buffer is pinned, so we can read tag without spinlock */
7034 errmsg("could not write block %u of %s",
7035 buf_hdr->tag.blockNum,
7037 BufTagGetForkNum(&buf_hdr->tag)).str),
7038 errdetail("Multiple failures --- write error might be permanent.")));
7039 }
7040 }
7041
7042 TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false, false);
7043}
#define BM_TAG_VALID
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc)
#define BM_DIRTY
#define BM_IO_IN_PROGRESS
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
#define BM_IO_ERROR
static BufferDesc * GetBufferDescriptor(uint32 id)
uint64 LockBufHdr(BufferDesc *desc)
Definition bufmgr.c:7108
void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint64 set_flag_bits, bool forget_owner, bool release_aio)
Definition bufmgr.c:6948
#define Assert(condition)
Definition c.h:873
uint64_t uint64
Definition c.h:547
int errdetail(const char *fmt,...)
Definition elog.c:1217
int errcode(int sqlerrcode)
Definition elog.c:864
int errmsg(const char *fmt,...)
Definition elog.c:1081
#define WARNING
Definition elog.h:36
#define ereport(elevel,...)
Definition elog.h:150
#define relpathperm(rlocator, forknum)
Definition relpath.h:146

References Assert, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), fb(), GetBufferDescriptor(), LockBufHdr(), relpathperm, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by ResOwnerReleaseBufferIO().

◆ AsyncReadBuffers()

static bool AsyncReadBuffers ( ReadBuffersOperation operation,
int nblocks_progress 
)
static

Definition at line 1865 of file bufmgr.c.

1866{
1867 Buffer *buffers = &operation->buffers[0];
1868 int flags = operation->flags;
1869 BlockNumber blocknum = operation->blocknum;
1870 ForkNumber forknum = operation->forknum;
1871 char persistence = operation->persistence;
1872 int16 nblocks_done = operation->nblocks_done;
1873 Buffer *io_buffers = &operation->buffers[nblocks_done];
1874 int io_buffers_len = 0;
1876 uint32 ioh_flags = 0;
1880 bool did_start_io;
1881
1882 /*
1883 * When this IO is executed synchronously, either because the caller will
1884 * immediately block waiting for the IO or because IOMETHOD_SYNC is used,
1885 * the AIO subsystem needs to know.
1886 */
1887 if (flags & READ_BUFFERS_SYNCHRONOUSLY)
1889
1890 if (persistence == RELPERSISTENCE_TEMP)
1891 {
1895 }
1896 else
1897 {
1900 }
1901
1902 /*
1903 * If zero_damaged_pages is enabled, add the READ_BUFFERS_ZERO_ON_ERROR
1904 * flag. The reason for that is that, hopefully, zero_damaged_pages isn't
1905 * set globally, but on a per-session basis. The completion callback,
1906 * which may be run in other processes, e.g. in IO workers, may have a
1907 * different value of the zero_damaged_pages GUC.
1908 *
1909 * XXX: We probably should eventually use a different flag for
1910 * zero_damaged_pages, so we can report different log levels / error codes
1911 * for zero_damaged_pages and ZERO_ON_ERROR.
1912 */
1915
1916 /*
1917 * For the same reason as with zero_damaged_pages we need to use this
1918 * backend's ignore_checksum_failure value.
1919 */
1922
1923
1924 /*
1925 * To be allowed to report stats in the local completion callback we need
1926 * to prepare to report stats now. This ensures we can safely report the
1927 * checksum failure even in a critical section.
1928 */
1930
1931 /*
1932 * Get IO handle before ReadBuffersCanStartIO(), as pgaio_io_acquire()
1933 * might block, which we don't want after setting IO_IN_PROGRESS.
1934 *
1935 * If we need to wait for IO before we can get a handle, submit
1936 * already-staged IO first, so that other backends don't need to wait.
1937 * There wouldn't be a deadlock risk, as pgaio_io_acquire() just needs to
1938 * wait for already submitted IO, which doesn't require additional locks,
1939 * but it could still cause undesirable waits.
1940 *
1941 * A secondary benefit is that this would allow us to measure the time in
1942 * pgaio_io_acquire() without causing undue timer overhead in the common,
1943 * non-blocking, case. However, currently the pgstats infrastructure
1944 * doesn't really allow that, as it a) asserts that an operation can't
1945 * have time without operations b) doesn't have an API to report
1946 * "accumulated" time.
1947 */
1949 if (unlikely(!ioh))
1950 {
1952
1954 }
1955
1956 /*
1957 * Check if we can start IO on the first to-be-read buffer.
1958 *
1959 * If an I/O is already in progress in another backend, we want to wait
1960 * for the outcome: either done, or something went wrong and we will
1961 * retry.
1962 */
1963 if (!ReadBuffersCanStartIO(buffers[nblocks_done], false))
1964 {
1965 /*
1966 * Someone else has already completed this block, we're done.
1967 *
1968 * When IO is necessary, ->nblocks_done is updated in
1969 * ProcessReadBuffersResult(), but that is not called if no IO is
1970 * necessary. Thus update here.
1971 */
1972 operation->nblocks_done += 1;
1973 *nblocks_progress = 1;
1974
1976 pgaio_wref_clear(&operation->io_wref);
1977 did_start_io = false;
1978
1979 /*
1980 * Report and track this as a 'hit' for this backend, even though it
1981 * must have started out as a miss in PinBufferForBlock(). The other
1982 * backend will track this as a 'read'.
1983 */
1984 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + operation->nblocks_done,
1985 operation->smgr->smgr_rlocator.locator.spcOid,
1986 operation->smgr->smgr_rlocator.locator.dbOid,
1987 operation->smgr->smgr_rlocator.locator.relNumber,
1988 operation->smgr->smgr_rlocator.backend,
1989 true);
1990
1991 if (persistence == RELPERSISTENCE_TEMP)
1993 else
1995
1996 if (operation->rel)
1997 pgstat_count_buffer_hit(operation->rel);
1998
2000
2001 if (VacuumCostActive)
2003 }
2004 else
2005 {
2007
2008 /* We found a buffer that we need to read in. */
2009 Assert(io_buffers[0] == buffers[nblocks_done]);
2010 io_pages[0] = BufferGetBlock(buffers[nblocks_done]);
2011 io_buffers_len = 1;
2012
2013 /*
2014 * How many neighboring-on-disk blocks can we scatter-read into other
2015 * buffers at the same time? In this case we don't wait if we see an
2016 * I/O already in progress. We already set BM_IO_IN_PROGRESS for the
2017 * head block, so we should get on with that I/O as soon as possible.
2018 */
2019 for (int i = nblocks_done + 1; i < operation->nblocks; i++)
2020 {
2021 if (!ReadBuffersCanStartIO(buffers[i], true))
2022 break;
2023 /* Must be consecutive block numbers. */
2024 Assert(BufferGetBlockNumber(buffers[i - 1]) ==
2025 BufferGetBlockNumber(buffers[i]) - 1);
2026 Assert(io_buffers[io_buffers_len] == buffers[i]);
2027
2028 io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
2029 }
2030
2031 /* get a reference to wait for in WaitReadBuffers() */
2032 pgaio_io_get_wref(ioh, &operation->io_wref);
2033
2034 /* provide the list of buffers to the completion callbacks */
2036
2038 persistence == RELPERSISTENCE_TEMP ?
2041 flags);
2042
2044
2045 /* ---
2046 * Even though we're trying to issue IO asynchronously, track the time
2047 * in smgrstartreadv():
2048 * - if io_method == IOMETHOD_SYNC, we will always perform the IO
2049 * immediately
2050 * - the io method might not support the IO (e.g. worker IO for a temp
2051 * table)
2052 * ---
2053 */
2055 smgrstartreadv(ioh, operation->smgr, forknum,
2056 blocknum + nblocks_done,
2060
2061 if (persistence == RELPERSISTENCE_TEMP)
2063 else
2065
2066 /*
2067 * Track vacuum cost when issuing IO, not after waiting for it.
2068 * Otherwise we could end up issuing a lot of IO in a short timespan,
2069 * despite a low cost limit.
2070 */
2071 if (VacuumCostActive)
2073
2075 did_start_io = true;
2076 }
2077
2078 return did_start_io;
2079}
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition aio.c:162
void pgaio_wref_clear(PgAioWaitRef *iow)
Definition aio.c:964
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
Definition aio.c:366
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition aio.c:330
void pgaio_submit_staged(void)
Definition aio.c:1123
void pgaio_io_release(PgAioHandle *ioh)
Definition aio.c:240
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition aio.c:188
@ PGAIO_HCB_LOCAL_BUFFER_READV
Definition aio.h:200
@ PGAIO_HCB_SHARED_BUFFER_READV
Definition aio.h:198
@ PGAIO_HF_SYNCHRONOUS
Definition aio.h:70
@ PGAIO_HF_REFERENCES_LOCAL
Definition aio.h:60
void pgaio_io_set_handle_data_32(PgAioHandle *ioh, uint32 *data, uint8 len)
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
bool track_io_timing
Definition bufmgr.c:177
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
static bool ReadBuffersCanStartIO(Buffer buffer, bool nowait)
Definition bufmgr.c:1665
bool zero_damaged_pages
Definition bufmgr.c:174
#define READ_BUFFERS_ZERO_ON_ERROR
Definition bufmgr.h:122
static Block BufferGetBlock(Buffer buffer)
Definition bufmgr.h:433
#define MAX_IO_COMBINE_LIMIT
Definition bufmgr.h:173
#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES
Definition bufmgr.h:126
#define READ_BUFFERS_SYNCHRONOUSLY
Definition bufmgr.h:128
bool ignore_checksum_failure
Definition bufpage.c:27
int16_t int16
Definition c.h:541
#define unlikely(x)
Definition c.h:412
uint32_t uint32
Definition c.h:546
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition freelist.c:747
int VacuumCostPageMiss
Definition globals.c:152
bool VacuumCostActive
Definition globals.c:158
int VacuumCostBalance
Definition globals.c:157
int VacuumCostPageHit
Definition globals.c:151
BufferUsage pgBufferUsage
Definition instrument.c:20
int i
Definition isn.c:77
IOObject
Definition pgstat.h:276
@ IOOBJECT_RELATION
Definition pgstat.h:277
@ IOOBJECT_TEMP_RELATION
Definition pgstat.h:278
IOContext
Definition pgstat.h:285
@ IOCONTEXT_NORMAL
Definition pgstat.h:289
@ IOOP_READ
Definition pgstat.h:315
@ IOOP_HIT
Definition pgstat.h:309
#define pgstat_count_buffer_hit(rel)
Definition pgstat.h:720
void pgstat_prepare_report_checksum_failure(Oid dboid)
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:68
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
ForkNumber
Definition relpath.h:56
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
void smgrstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition smgr.c:753
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 shared_blks_hit
Definition instrument.h:26
ForkNumber forknum
Definition bufmgr.h:137
PgAioWaitRef io_wref
Definition bufmgr.h:150
SMgrRelation smgr
Definition bufmgr.h:135
BufferAccessStrategy strategy
Definition bufmgr.h:138
BlockNumber blocknum
Definition bufmgr.h:146
PgAioReturn io_return
Definition bufmgr.h:151
RelFileLocator locator
RelFileNumber relNumber
RelFileLocatorBackend smgr_rlocator
Definition smgr.h:38

References Assert, RelFileLocatorBackend::backend, ReadBuffersOperation::blocknum, BufferGetBlock(), BufferGetBlockNumber(), ReadBuffersOperation::buffers, CurrentResourceOwner, RelFileLocator::dbOid, fb(), ReadBuffersOperation::flags, ReadBuffersOperation::forknum, i, ignore_checksum_failure, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, IOOP_READ, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, RelFileLocatorBackend::locator, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_HCB_LOCAL_BUFFER_READV, PGAIO_HCB_SHARED_BUFFER_READV, PGAIO_HF_REFERENCES_LOCAL, PGAIO_HF_SYNCHRONOUS, pgaio_io_acquire(), pgaio_io_acquire_nb(), pgaio_io_get_wref(), pgaio_io_register_callbacks(), pgaio_io_release(), pgaio_io_set_flag(), pgaio_io_set_handle_data_32(), pgaio_submit_staged(), pgaio_wref_clear(), pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_io_op(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_prepare_report_checksum_failure(), READ_BUFFERS_IGNORE_CHECKSUM_FAILURES, READ_BUFFERS_SYNCHRONOUSLY, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersCanStartIO(), ReadBuffersOperation::rel, RelFileLocator::relNumber, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, ReadBuffersOperation::smgr, SMgrRelationData::smgr_rlocator, smgrstartreadv(), RelFileLocator::spcOid, ReadBuffersOperation::strategy, track_io_timing, unlikely, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, and zero_damaged_pages.

Referenced by StartReadBuffersImpl(), and WaitReadBuffers().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 4104 of file bufmgr.c.

4105{
4107
4109
4111}
static void CheckForBufferLeaks(void)
Definition bufmgr.c:4174
static int32 PrivateRefCountOverflowed
Definition bufmgr.c:251
void AtEOXact_LocalBuffers(bool isCommit)
Definition localbuf.c:1003

References Assert, AtEOXact_LocalBuffers(), CheckForBufferLeaks(), fb(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 4156 of file bufmgr.c.

4157{
4158 UnlockBuffers();
4159
4161
4162 /* localbuf.c needs a chance too */
4164}
void UnlockBuffers(void)
Definition bufmgr.c:5710
void AtProcExit_LocalBuffers(void)
Definition localbuf.c:1014

References AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferManagerAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3736 of file bufmgr.c.

3737{
3738 /* info obtained from freelist.c */
3739 int strategy_buf_id;
3742
3743 /*
3744 * Information saved between calls so we can determine the strategy
3745 * point's advance rate and avoid scanning already-cleaned buffers.
3746 */
3747 static bool saved_info_valid = false;
3748 static int prev_strategy_buf_id;
3750 static int next_to_clean;
3751 static uint32 next_passes;
3752
3753 /* Moving averages of allocation rate and clean-buffer density */
3754 static float smoothed_alloc = 0;
3755 static float smoothed_density = 10.0;
3756
3757 /* Potentially these could be tunables, but for now, not */
3758 float smoothing_samples = 16;
3759 float scan_whole_pool_milliseconds = 120000.0;
3760
3761 /* Used to compute how far we scan ahead */
3762 long strategy_delta;
3763 int bufs_to_lap;
3764 int bufs_ahead;
3765 float scans_per_alloc;
3768 int min_scan_buffers;
3769
3770 /* Variables for the scanning loop proper */
3771 int num_to_scan;
3772 int num_written;
3773 int reusable_buffers;
3774
3775 /* Variables for final smoothed_density update */
3776 long new_strategy_delta;
3778
3779 /*
3780 * Find out where the clock-sweep currently is, and how many buffer
3781 * allocations have happened since our last call.
3782 */
3784
3785 /* Report buffer alloc counts to pgstat */
3787
3788 /*
3789 * If we're not running the LRU scan, just stop after doing the stats
3790 * stuff. We mark the saved state invalid so that we can recover sanely
3791 * if LRU scan is turned back on later.
3792 */
3793 if (bgwriter_lru_maxpages <= 0)
3794 {
3795 saved_info_valid = false;
3796 return true;
3797 }
3798
3799 /*
3800 * Compute strategy_delta = how many buffers have been scanned by the
3801 * clock-sweep since last time. If first time through, assume none. Then
3802 * see if we are still ahead of the clock-sweep, and if so, how many
3803 * buffers we could scan before we'd catch up with it and "lap" it. Note:
3804 * weird-looking coding of xxx_passes comparisons are to avoid bogus
3805 * behavior when the passes counts wrap around.
3806 */
3807 if (saved_info_valid)
3808 {
3810
3813
3814 Assert(strategy_delta >= 0);
3815
3816 if ((int32) (next_passes - strategy_passes) > 0)
3817 {
3818 /* we're one pass ahead of the strategy point */
3820#ifdef BGW_DEBUG
3821 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3825#endif
3826 }
3827 else if (next_passes == strategy_passes &&
3829 {
3830 /* on same pass, but ahead or at least not behind */
3832#ifdef BGW_DEBUG
3833 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3837#endif
3838 }
3839 else
3840 {
3841 /*
3842 * We're behind, so skip forward to the strategy point and start
3843 * cleaning from there.
3844 */
3845#ifdef BGW_DEBUG
3846 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3850#endif
3854 }
3855 }
3856 else
3857 {
3858 /*
3859 * Initializing at startup or after LRU scanning had been off. Always
3860 * start at the strategy point.
3861 */
3862#ifdef BGW_DEBUG
3863 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3865#endif
3866 strategy_delta = 0;
3870 }
3871
3872 /* Update saved info for next time */
3875 saved_info_valid = true;
3876
3877 /*
3878 * Compute how many buffers had to be scanned for each new allocation, ie,
3879 * 1/density of reusable buffers, and track a moving average of that.
3880 *
3881 * If the strategy point didn't move, we don't update the density estimate
3882 */
3883 if (strategy_delta > 0 && recent_alloc > 0)
3884 {
3888 }
3889
3890 /*
3891 * Estimate how many reusable buffers there are between the current
3892 * strategy point and where we've scanned ahead to, based on the smoothed
3893 * density estimate.
3894 */
3897
3898 /*
3899 * Track a moving average of recent buffer allocations. Here, rather than
3900 * a true average we want a fast-attack, slow-decline behavior: we
3901 * immediately follow any increase.
3902 */
3903 if (smoothed_alloc <= (float) recent_alloc)
3905 else
3908
3909 /* Scale the estimate by a GUC to allow more aggressive tuning. */
3911
3912 /*
3913 * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3914 * eventually underflow to zero, and the underflows produce annoying
3915 * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3916 * zero, there's no point in tracking smaller and smaller values of
3917 * smoothed_alloc, so just reset it to exactly zero to avoid this
3918 * syndrome. It will pop back up as soon as recent_alloc increases.
3919 */
3920 if (upcoming_alloc_est == 0)
3921 smoothed_alloc = 0;
3922
3923 /*
3924 * Even in cases where there's been little or no buffer allocation
3925 * activity, we want to make a small amount of progress through the buffer
3926 * cache so that as many reusable buffers as possible are clean after an
3927 * idle period.
3928 *
3929 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3930 * the BGW will be called during the scan_whole_pool time; slice the
3931 * buffer pool into that many sections.
3932 */
3934
3936 {
3937#ifdef BGW_DEBUG
3938 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3940#endif
3942 }
3943
3944 /*
3945 * Now write out dirty reusable buffers, working forward from the
3946 * next_to_clean point, until we have lapped the strategy scan, or cleaned
3947 * enough buffers to match our estimate of the next cycle's allocation
3948 * requirements, or hit the bgwriter_lru_maxpages limit.
3949 */
3950
3951 num_to_scan = bufs_to_lap;
3952 num_written = 0;
3954
3955 /* Execute the LRU scan */
3956 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3957 {
3959 wb_context);
3960
3961 if (++next_to_clean >= NBuffers)
3962 {
3963 next_to_clean = 0;
3964 next_passes++;
3965 }
3966 num_to_scan--;
3967
3968 if (sync_state & BUF_WRITTEN)
3969 {
3972 {
3974 break;
3975 }
3976 }
3977 else if (sync_state & BUF_REUSABLE)
3979 }
3980
3982
3983#ifdef BGW_DEBUG
3984 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3987 bufs_to_lap - num_to_scan,
3990#endif
3991
3992 /*
3993 * Consider the above scan as being like a new allocation scan.
3994 * Characterize its density and update the smoothed one based on it. This
3995 * effectively halves the moving average period in cases where both the
3996 * strategy and the background writer are doing some useful scanning,
3997 * which is helpful because a long memory isn't as desirable on the
3998 * density estimates.
3999 */
4000 new_strategy_delta = bufs_to_lap - num_to_scan;
4002 if (new_strategy_delta > 0 && new_recent_alloc > 0)
4003 {
4007
4008#ifdef BGW_DEBUG
4009 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
4012#endif
4013 }
4014
4015 /* Return true if OK to hibernate */
4016 return (bufs_to_lap == 0 && recent_alloc == 0);
4017}
int BgWriterDelay
Definition bgwriter.c:58
#define BUF_REUSABLE
Definition bufmgr.c:83
double bgwriter_lru_multiplier
Definition bufmgr.c:176
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition bufmgr.c:4034
int bgwriter_lru_maxpages
Definition bufmgr.c:175
#define BUF_WRITTEN
Definition bufmgr.c:82
int32_t int32
Definition c.h:542
#define DEBUG2
Definition elog.h:29
#define DEBUG1
Definition elog.h:30
#define elog(elevel,...)
Definition elog.h:226
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition freelist.c:321
int NBuffers
Definition globals.c:142
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition pgstat.h:242
PgStat_Counter maxwritten_clean
Definition pgstat.h:243
PgStat_Counter buf_alloc
Definition pgstat.h:244

References Assert, bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, fb(), PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ buffer_readv_complete()

static pg_attribute_always_inline PgAioResult buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data,
bool  is_temp 
)
static

Definition at line 8259 of file bufmgr.c.

8261{
8262 PgAioResult result = prior_result;
8267 uint8 error_count = 0;
8268 uint8 zeroed_count = 0;
8269 uint8 ignored_count = 0;
8271 uint64 *io_data;
8272 uint8 handle_data_len;
8273
8274 if (is_temp)
8275 {
8276 Assert(td->smgr.is_temp);
8278 }
8279 else
8280 Assert(!td->smgr.is_temp);
8281
8282 /*
8283 * Iterate over all the buffers affected by this IO and call the
8284 * per-buffer completion function for each buffer.
8285 */
8286 io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
8287 for (uint8 buf_off = 0; buf_off < handle_data_len; buf_off++)
8288 {
8290 bool failed;
8291 bool failed_verification = false;
8292 bool failed_checksum = false;
8293 bool zeroed_buffer = false;
8294 bool ignored_checksum = false;
8295
8297
8298 /*
8299 * If the entire I/O failed on a lower-level, each buffer needs to be
8300 * marked as failed. In case of a partial read, the first few buffers
8301 * may be ok.
8302 */
8303 failed =
8305 || prior_result.result <= buf_off;
8306
8307 buffer_readv_complete_one(td, buf_off, buf, cb_data, failed, is_temp,
8311 &zeroed_buffer);
8312
8313 /*
8314 * Track information about the number of different kinds of error
8315 * conditions across all pages, as there can be multiple pages failing
8316 * verification as part of one IO.
8317 */
8320 if (zeroed_buffer && zeroed_count++ == 0)
8322 if (ignored_checksum && ignored_count++ == 0)
8324 if (failed_checksum)
8326 }
8327
8328 /*
8329 * If the smgr read succeeded [partially] and page verification failed for
8330 * some of the pages, adjust the IO's result state appropriately.
8331 */
8332 if (prior_result.status != PGAIO_RS_ERROR &&
8333 (error_count > 0 || ignored_count > 0 || zeroed_count > 0))
8334 {
8335 buffer_readv_encode_error(&result, is_temp,
8336 zeroed_count > 0, ignored_count > 0,
8340 pgaio_result_report(result, td, DEBUG1);
8341 }
8342
8343 /*
8344 * For shared relations this reporting is done in
8345 * shared_buffer_readv_complete_local().
8346 */
8347 if (is_temp && checkfail_count > 0)
8350
8351 return result;
8352}
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
Definition aio.c:355
uint64 * pgaio_io_get_handle_data(PgAioHandle *ioh, uint8 *len)
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
Definition aio_target.c:73
@ PGAIO_RS_ERROR
Definition aio_types.h:84
static pg_attribute_always_inline void buffer_readv_complete_one(PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
Definition bufmgr.c:8115
static void buffer_readv_encode_error(PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
Definition bufmgr.c:8020
uint8_t uint8
Definition c.h:544
ProcNumber MyProcNumber
Definition globals.c:90
static char buf[DEFAULT_XLOG_SEG_SIZE]
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
RelFileLocator rlocator
Definition aio_types.h:65
struct PgAioTargetData::@126 smgr

References Assert, buf, buffer_readv_complete_one(), buffer_readv_encode_error(), BufferIsValid(), RelFileLocator::dbOid, DEBUG1, fb(), PgAioTargetData::is_temp, MyProcNumber, pgaio_io_get_handle_data(), pgaio_io_get_owner(), pgaio_io_get_target_data(), pgaio_result_report(), PGAIO_RS_ERROR, pgstat_report_checksum_failures_in_db(), PgAioTargetData::rlocator, and PgAioTargetData::smgr.

Referenced by local_buffer_readv_complete(), and shared_buffer_readv_complete().

◆ buffer_readv_complete_one()

static pg_attribute_always_inline void buffer_readv_complete_one ( PgAioTargetData td,
uint8  buf_off,
Buffer  buffer,
uint8  flags,
bool  failed,
bool  is_temp,
bool buffer_invalid,
bool failed_checksum,
bool ignored_checksum,
bool zeroed_buffer 
)
static

Definition at line 8115 of file bufmgr.c.

8121{
8122 BufferDesc *buf_hdr = is_temp ?
8123 GetLocalBufferDescriptor(-buffer - 1)
8124 : GetBufferDescriptor(buffer - 1);
8125 BufferTag tag = buf_hdr->tag;
8126 char *bufdata = BufferGetBlock(buffer);
8128 int piv_flags;
8129
8130 /* check that the buffer is in the expected state for a read */
8131#ifdef USE_ASSERT_CHECKING
8132 {
8134
8137 /* temp buffers don't use BM_IO_IN_PROGRESS */
8138 if (!is_temp)
8141 }
8142#endif
8143
8144 *buffer_invalid = false;
8145 *failed_checksum = false;
8146 *ignored_checksum = false;
8147 *zeroed_buffer = false;
8148
8149 /*
8150 * We ask PageIsVerified() to only log the message about checksum errors,
8151 * as the completion might be run in any backend (or IO workers). We will
8152 * report checksum errors in buffer_readv_report().
8153 */
8155
8156 /* the local zero_damaged_pages may differ from the definer's */
8159
8160 /* Check for garbage data. */
8161 if (!failed)
8162 {
8163 /*
8164 * If the buffer is not currently pinned by this backend, e.g. because
8165 * we're completing this IO after an error, the buffer data will have
8166 * been marked as inaccessible when the buffer was unpinned. The AIO
8167 * subsystem holds a pin, but that doesn't prevent the buffer from
8168 * having been marked as inaccessible. The completion might also be
8169 * executed in a different process.
8170 */
8171#ifdef USE_VALGRIND
8172 if (!BufferIsPinned(buffer))
8174#endif
8175
8176 if (!PageIsVerified((Page) bufdata, tag.blockNum, piv_flags,
8178 {
8179 if (flags & READ_BUFFERS_ZERO_ON_ERROR)
8180 {
8181 memset(bufdata, 0, BLCKSZ);
8182 *zeroed_buffer = true;
8183 }
8184 else
8185 {
8186 *buffer_invalid = true;
8187 /* mark buffer as having failed */
8188 failed = true;
8189 }
8190 }
8191 else if (*failed_checksum)
8192 *ignored_checksum = true;
8193
8194 /* undo what we did above */
8195#ifdef USE_VALGRIND
8196 if (!BufferIsPinned(buffer))
8198#endif
8199
8200 /*
8201 * Immediately log a message about the invalid page, but only to the
8202 * server log. The reason to do so immediately is that this may be
8203 * executed in a different backend than the one that originated the
8204 * request. The reason to do so immediately is that the originator
8205 * might not process the query result immediately (because it is busy
8206 * doing another part of query processing) or at all (e.g. if it was
8207 * cancelled or errored out due to another IO also failing). The
8208 * definer of the IO will emit an ERROR or WARNING when processing the
8209 * IO's results
8210 *
8211 * To avoid duplicating the code to emit these log messages, we reuse
8212 * buffer_readv_report().
8213 */
8215 {
8216 PgAioResult result_one = {0};
8217
8222 *zeroed_buffer ? 1 : 0,
8223 *failed_checksum ? 1 : 0,
8226 }
8227 }
8228
8229 /* Terminate I/O and set BM_VALID. */
8230 set_flag_bits = failed ? BM_IO_ERROR : BM_VALID;
8231 if (is_temp)
8233 else
8234 TerminateBufferIO(buf_hdr, false, set_flag_bits, false, true);
8235
8236 /*
8237 * Call the BUFFER_READ_DONE tracepoint in the callback, even though the
8238 * callback may not be executed in the same backend that called
8239 * BUFFER_READ_START. The alternative would be to defer calling the
8240 * tracepoint to a later point (e.g. the local completion callback for
8241 * shared buffer reads), which seems even less helpful.
8242 */
8244 tag.blockNum,
8245 tag.spcOid,
8246 tag.dbOid,
8247 tag.relNumber,
8249 false);
8250}
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition bufmgr.c:590
bool PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
Definition bufpage.c:94
#define PIV_LOG_LOG
Definition bufpage.h:468
PageData * Page
Definition bufpage.h:81
#define PIV_IGNORE_CHECKSUM_FAILURE
Definition bufpage.h:469
#define LOG_SERVER_ONLY
Definition elog.h:32
#define false
void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty, uint64 set_flag_bits, bool release_aio)
Definition localbuf.c:562
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition memdebug.h:26
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition memdebug.h:27
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
BlockNumber blockNum
RelFileNumber relNumber
ForkNumber forkNum

References Assert, buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, buffer_readv_encode_error(), BufferGetBlock(), BufferIsPinned, buftag::dbOid, fb(), buftag::forkNum, GetBufferDescriptor(), GetLocalBufferDescriptor(), INVALID_PROC_NUMBER, LOG_SERVER_ONLY, MyProcNumber, PageIsVerified(), pg_atomic_read_u64(), pgaio_result_report(), PIV_IGNORE_CHECKSUM_FAILURE, PIV_LOG_LOG, READ_BUFFERS_IGNORE_CHECKSUM_FAILURES, READ_BUFFERS_ZERO_ON_ERROR, buftag::relNumber, buftag::spcOid, TerminateBufferIO(), TerminateLocalBufferIO(), VALGRIND_MAKE_MEM_DEFINED, and VALGRIND_MAKE_MEM_NOACCESS.

Referenced by buffer_readv_complete().

◆ buffer_readv_decode_error()

static void buffer_readv_decode_error ( PgAioResult  result,
bool zeroed_any,
bool ignored_any,
uint8 zeroed_or_error_count,
uint8 checkfail_count,
uint8 first_off 
)
inlinestatic

Definition at line 7978 of file bufmgr.c.

7984{
7985 uint32 rem_error = result.error_data;
7986
7987 /* see static asserts in buffer_readv_encode_error */
7988#define READV_COUNT_BITS 7
7989#define READV_COUNT_MASK ((1 << READV_COUNT_BITS) - 1)
7990
7991 *zeroed_any = rem_error & 1;
7992 rem_error >>= 1;
7993
7994 *ignored_any = rem_error & 1;
7995 rem_error >>= 1;
7996
7999
8002
8005}
#define READV_COUNT_BITS
#define READV_COUNT_MASK
uint32 error_data
Definition aio_types.h:111

References PgAioResult::error_data, fb(), READV_COUNT_BITS, and READV_COUNT_MASK.

Referenced by buffer_readv_encode_error(), buffer_readv_report(), and shared_buffer_readv_complete_local().

◆ buffer_readv_encode_error()

static void buffer_readv_encode_error ( PgAioResult result,
bool  is_temp,
bool  zeroed_any,
bool  ignored_any,
uint8  error_count,
uint8  zeroed_count,
uint8  checkfail_count,
uint8  first_error_off,
uint8  first_zeroed_off,
uint8  first_ignored_off 
)
inlinestatic

Definition at line 8020 of file bufmgr.c.

8030{
8031
8032 uint8 shift = 0;
8036
8038 "PG_IOV_MAX is bigger than reserved space for error data");
8040 "PGAIO_RESULT_ERROR_BITS is insufficient for buffer_readv");
8041
8042 /*
8043 * We only have space to encode one offset - but luckily that's good
8044 * enough. If there is an error, the error is the interesting offset, same
8045 * with a zeroed buffer vs an ignored buffer.
8046 */
8047 if (error_count > 0)
8049 else if (zeroed_count > 0)
8051 else
8053
8054 Assert(!zeroed_any || error_count == 0);
8055
8056 result->error_data = 0;
8057
8058 result->error_data |= zeroed_any << shift;
8059 shift += 1;
8060
8061 result->error_data |= ignored_any << shift;
8062 shift += 1;
8063
8064 result->error_data |= ((uint32) zeroed_or_error_count) << shift;
8065 shift += READV_COUNT_BITS;
8066
8067 result->error_data |= ((uint32) checkfail_count) << shift;
8068 shift += READV_COUNT_BITS;
8069
8070 result->error_data |= ((uint32) first_off) << shift;
8071 shift += READV_COUNT_BITS;
8072
8073 result->id = is_temp ? PGAIO_HCB_LOCAL_BUFFER_READV :
8075
8076 if (error_count > 0)
8077 result->status = PGAIO_RS_ERROR;
8078 else
8079 result->status = PGAIO_RS_WARNING;
8080
8081 /*
8082 * The encoding is complicated enough to warrant cross-checking it against
8083 * the decode function.
8084 */
8085#ifdef USE_ASSERT_CHECKING
8086 {
8087 bool zeroed_any_2,
8092
8097 &first_off_2);
8103 }
8104#endif
8105
8106#undef READV_COUNT_BITS
8107#undef READV_COUNT_MASK
8108}
#define PGAIO_RESULT_ERROR_BITS
Definition aio_types.h:98
@ PGAIO_RS_WARNING
Definition aio_types.h:83
static void buffer_readv_decode_error(PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
Definition bufmgr.c:7978
#define StaticAssertDecl(condition, errmessage)
Definition c.h:942
#define PG_IOV_MAX
Definition pg_iovec.h:47
uint32 status
Definition aio_types.h:108
uint32 id
Definition aio_types.h:105

References Assert, buffer_readv_decode_error(), PgAioResult::error_data, fb(), PgAioResult::id, PG_IOV_MAX, PGAIO_HCB_LOCAL_BUFFER_READV, PGAIO_HCB_SHARED_BUFFER_READV, PGAIO_RESULT_ERROR_BITS, PGAIO_RS_ERROR, PGAIO_RS_WARNING, READV_COUNT_BITS, StaticAssertDecl, and PgAioResult::status.

Referenced by buffer_readv_complete(), and buffer_readv_complete_one().

◆ buffer_readv_report()

static void buffer_readv_report ( PgAioResult  result,
const PgAioTargetData td,
int  elevel 
)
static

Definition at line 8362 of file bufmgr.c.

8364{
8365 int nblocks = td->smgr.nblocks;
8366 BlockNumber first = td->smgr.blockNum;
8367 BlockNumber last = first + nblocks - 1;
8370 RelPathStr rpath =
8372 bool zeroed_any,
8376 first_off;
8378 const char *msg_one,
8379 *msg_mult,
8380 *det_mult,
8381 *hint_mult;
8382
8386 &first_off);
8387
8388 /*
8389 * Treat a read that had both zeroed buffers *and* ignored checksums as a
8390 * special case, it's too irregular to be emitted the same way as the
8391 * other cases.
8392 */
8393 if (zeroed_any && ignored_any)
8394 {
8396 Assert(nblocks > 1); /* same block can't be both zeroed and ignored */
8397 Assert(result.status != PGAIO_RS_ERROR);
8399
8400 ereport(elevel,
8402 errmsg("zeroing %u page(s) and ignoring %u checksum failure(s) among blocks %u..%u of relation \"%s\"",
8403 affected_count, checkfail_count, first, last, rpath.str),
8404 affected_count > 1 ?
8405 errdetail("Block %u held the first zeroed page.",
8406 first + first_off) : 0,
8407 errhint_plural("See server log for details about the other %d invalid block.",
8408 "See server log for details about the other %d invalid blocks.",
8411 return;
8412 }
8413
8414 /*
8415 * The other messages are highly repetitive. To avoid duplicating a long
8416 * and complicated ereport(), gather the translated format strings
8417 * separately and then do one common ereport.
8418 */
8419 if (result.status == PGAIO_RS_ERROR)
8420 {
8421 Assert(!zeroed_any); /* can't have invalid pages when zeroing them */
8423 msg_one = _("invalid page in block %u of relation \"%s\"");
8424 msg_mult = _("%u invalid pages among blocks %u..%u of relation \"%s\"");
8425 det_mult = _("Block %u held the first invalid page.");
8426 hint_mult = _("See server log for the other %u invalid block(s).");
8427 }
8428 else if (zeroed_any && !ignored_any)
8429 {
8431 msg_one = _("invalid page in block %u of relation \"%s\"; zeroing out page");
8432 msg_mult = _("zeroing out %u invalid pages among blocks %u..%u of relation \"%s\"");
8433 det_mult = _("Block %u held the first zeroed page.");
8434 hint_mult = _("See server log for the other %u zeroed block(s).");
8435 }
8436 else if (!zeroed_any && ignored_any)
8437 {
8439 msg_one = _("ignoring checksum failure in block %u of relation \"%s\"");
8440 msg_mult = _("ignoring %u checksum failures among blocks %u..%u of relation \"%s\"");
8441 det_mult = _("Block %u held the first ignored page.");
8442 hint_mult = _("See server log for the other %u ignored block(s).");
8443 }
8444 else
8446
8447 ereport(elevel,
8449 affected_count == 1 ?
8450 errmsg_internal(msg_one, first + first_off, rpath.str) :
8451 errmsg_internal(msg_mult, affected_count, first, last, rpath.str),
8454}
#define pg_unreachable()
Definition c.h:341
int errmsg_internal(const char *fmt,...)
Definition elog.c:1171
int errdetail_internal(const char *fmt,...)
Definition elog.c:1244
int errhint_internal(const char *fmt,...)
Definition elog.c:1353
int errhint_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition elog.c:1374
#define _(x)
Definition elog.c:91
const char * str
#define ERRCODE_DATA_CORRUPTED
int ProcNumber
Definition procnumber.h:24
#define relpathbackend(rlocator, backend, forknum)
Definition relpath.h:141
char str[REL_PATH_STR_MAXLEN+1]
Definition relpath.h:123
BlockNumber blockNum
Definition aio_types.h:66
BlockNumber nblocks
Definition aio_types.h:67
ForkNumber forkNum
Definition aio_types.h:68

References _, Assert, PgAioTargetData::blockNum, buffer_readv_decode_error(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errdetail(), errdetail_internal(), errhint_internal(), errhint_plural(), errmsg(), errmsg_internal(), fb(), PgAioTargetData::forkNum, INVALID_PROC_NUMBER, PgAioTargetData::is_temp, MyProcNumber, PgAioTargetData::nblocks, pg_unreachable, PGAIO_RS_ERROR, relpathbackend, PgAioTargetData::rlocator, PgAioTargetData::smgr, PgAioResult::status, and RelPathStr::str.

◆ buffer_stage_common()

static pg_attribute_always_inline void buffer_stage_common ( PgAioHandle ioh,
bool  is_write,
bool  is_temp 
)
static

Definition at line 7871 of file bufmgr.c.

7872{
7873 uint64 *io_data;
7874 uint8 handle_data_len;
7877
7878 io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
7879
7881
7882 /* iterate over all buffers affected by the vectored readv/writev */
7883 for (int i = 0; i < handle_data_len; i++)
7884 {
7885 Buffer buffer = (Buffer) io_data[i];
7886 BufferDesc *buf_hdr = is_temp ?
7887 GetLocalBufferDescriptor(-buffer - 1)
7888 : GetBufferDescriptor(buffer - 1);
7890
7891 /*
7892 * Check that all the buffers are actually ones that could conceivably
7893 * be done in one IO, i.e. are sequential. This is the last
7894 * buffer-aware code before IO is actually executed and confusion
7895 * about which buffers are targeted by IO can be hard to debug, making
7896 * it worth doing extra-paranoid checks.
7897 */
7898 if (i == 0)
7899 first = buf_hdr->tag;
7900 else
7901 {
7902 Assert(buf_hdr->tag.relNumber == first.relNumber);
7903 Assert(buf_hdr->tag.blockNum == first.blockNum + i);
7904 }
7905
7906 if (is_temp)
7908 else
7910
7911 /* verify the buffer is in the expected state */
7913 if (is_write)
7914 {
7917 }
7918 else
7919 {
7922 }
7923
7924 /* temp buffers don't use BM_IO_IN_PROGRESS */
7925 if (!is_temp)
7927
7929
7930 /*
7931 * Reflect that the buffer is now owned by the AIO subsystem.
7932 *
7933 * For local buffers: This can't be done just via LocalRefCount, as
7934 * one might initially think, as this backend could error out while
7935 * AIO is still in progress, releasing all the pins by the backend
7936 * itself.
7937 *
7938 * This pin is released again in TerminateBufferIO().
7939 */
7940 buf_hdr->io_wref = io_ref;
7941
7942 if (is_temp)
7943 {
7946 }
7947 else
7949
7950 /*
7951 * Ensure the content lock that prevents buffer modifications while
7952 * the buffer is being written out is not released early due to an
7953 * error.
7954 */
7955 if (is_write && !is_temp)
7956 {
7958
7959 /*
7960 * Lock is now owned by AIO subsystem.
7961 */
7962 BufferLockDisown(buffer, buf_hdr);
7963 }
7964
7965 /*
7966 * Stop tracking this buffer via the resowner - the AIO system now
7967 * keeps track.
7968 */
7969 if (!is_temp)
7971 }
7972}
static void pg_atomic_unlocked_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:494
#define BUF_REFCOUNT_ONE
static uint64 UnlockBufHdrExt(BufferDesc *desc, uint64 old_buf_state, uint64 set_bits, uint64 unset_bits, int refcount_change)
static void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
#define BUF_STATE_GET_REFCOUNT(state)
static void BufferLockDisown(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:6129
static bool BufferLockHeldByMe(BufferDesc *buf_hdr)
Definition bufmgr.c:6401
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
BufferTag tag

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferLockDisown(), BufferLockHeldByMe(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, LockBufHdr(), pg_atomic_read_u64(), pg_atomic_unlocked_write_u64(), PG_USED_FOR_ASSERTS_ONLY, pgaio_io_get_handle_data(), pgaio_io_get_wref(), ResourceOwnerForgetBufferIO(), and UnlockBufHdrExt().

Referenced by local_buffer_readv_stage(), and shared_buffer_readv_stage().

◆ BufferAlloc()

static pg_attribute_always_inline BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr,
IOContext  io_context 
)
inlinestatic

Definition at line 2101 of file bufmgr.c.

2105{
2106 BufferTag newTag; /* identity of requested block */
2107 uint32 newHash; /* hash value for newTag */
2108 LWLock *newPartitionLock; /* buffer partition lock for it */
2109 int existing_buf_id;
2113 uint64 set_bits = 0;
2114
2115 /* Make sure we will have room to remember the buffer pin */
2118
2119 /* create a tag so we can lookup the buffer */
2120 InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
2121
2122 /* determine its hash code and partition lock ID */
2125
2126 /* see if the block is in the buffer pool already */
2129 if (existing_buf_id >= 0)
2130 {
2131 BufferDesc *buf;
2132 bool valid;
2133
2134 /*
2135 * Found it. Now, pin the buffer so no one can steal it from the
2136 * buffer pool, and check to see if the correct data has been loaded
2137 * into the buffer.
2138 */
2140
2141 valid = PinBuffer(buf, strategy, false);
2142
2143 /* Can release the mapping lock as soon as we've pinned it */
2145
2146 *foundPtr = true;
2147
2148 if (!valid)
2149 {
2150 /*
2151 * We can only get here if (a) someone else is still reading in
2152 * the page, (b) a previous read attempt failed, or (c) someone
2153 * called StartReadBuffers() but not yet WaitReadBuffers().
2154 */
2155 *foundPtr = false;
2156 }
2157
2158 return buf;
2159 }
2160
2161 /*
2162 * Didn't find it in the buffer pool. We'll have to initialize a new
2163 * buffer. Remember to unlock the mapping lock while doing the work.
2164 */
2166
2167 /*
2168 * Acquire a victim buffer. Somebody else might try to do the same, we
2169 * don't hold any conflicting locks. If so we'll have to undo our work
2170 * later.
2171 */
2174
2175 /*
2176 * Try to make a hashtable entry for the buffer under its new tag. If
2177 * somebody else inserted another buffer for the tag, we'll release the
2178 * victim buffer we acquired and use the already inserted one.
2179 */
2182 if (existing_buf_id >= 0)
2183 {
2185 bool valid;
2186
2187 /*
2188 * Got a collision. Someone has already done what we were about to do.
2189 * We'll just handle this as if it were found in the buffer pool in
2190 * the first place. First, give up the buffer we were planning to
2191 * use.
2192 *
2193 * We could do this after releasing the partition lock, but then we'd
2194 * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry()
2195 * before acquiring the lock, for the rare case of such a collision.
2196 */
2198
2199 /* remaining code should match code at top of routine */
2200
2202
2203 valid = PinBuffer(existing_buf_hdr, strategy, false);
2204
2205 /* Can release the mapping lock as soon as we've pinned it */
2207
2208 *foundPtr = true;
2209
2210 if (!valid)
2211 {
2212 /*
2213 * We can only get here if (a) someone else is still reading in
2214 * the page, (b) a previous read attempt failed, or (c) someone
2215 * called StartReadBuffers() but not yet WaitReadBuffers().
2216 */
2217 *foundPtr = false;
2218 }
2219
2220 return existing_buf_hdr;
2221 }
2222
2223 /*
2224 * Need to lock the buffer header too in order to change its tag.
2225 */
2227
2228 /* some sanity checks while we hold the buffer header lock */
2231
2232 victim_buf_hdr->tag = newTag;
2233
2234 /*
2235 * Make sure BM_PERMANENT is set for buffers that must be written at every
2236 * checkpoint. Unlogged buffers only need to be written at shutdown
2237 * checkpoints, except for their "init" forks, which need to be treated
2238 * just like permanent relations.
2239 */
2241 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
2243
2245 set_bits, 0, 0);
2246
2248
2249 /*
2250 * Buffer contents are currently invalid.
2251 */
2252 *foundPtr = false;
2253
2254 return victim_buf_hdr;
2255}
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_PERMANENT
#define BUF_USAGECOUNT_ONE
static LWLock * BufMappingPartitionLock(uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition buf_table.c:78
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition buf_table.c:118
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
Definition bufmgr.c:2452
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
Definition bufmgr.c:3182
static void ReservePrivateRefCountEntry(void)
Definition bufmgr.c:294
static void UnpinBuffer(BufferDesc *buf)
Definition bufmgr.c:3361
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
@ INIT_FORKNUM
Definition relpath.h:61
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition resowner.c:449

References Assert, BM_DIRTY, BM_IO_IN_PROGRESS, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetVictimBuffer(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrRelationData::smgr_rlocator, UnlockBufHdrExt(), and UnpinBuffer().

Referenced by PinBufferForBlock().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 4357 of file bufmgr.c.

4358{
4360
4361 Assert(BufferIsPinned(buffer));
4362
4363 if (BufferIsLocal(buffer))
4364 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4365 else
4366 bufHdr = GetBufferDescriptor(buffer - 1);
4367
4368 /* pinned, so OK to read tag without spinlock */
4369 return bufHdr->tag.blockNum;
4370}
#define BufferIsLocal(buffer)
Definition buf.h:37

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, fb(), GetBufferDescriptor(), and GetLocalBufferDescriptor().

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_finish_split(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), AsyncReadBuffers(), BitmapHeapScanNextBlock(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), btvacuumpage(), check_index_page(), CheckReadBuffersOperation(), collect_corrupt_items(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), gistvacuumpage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_would_be_all_visible(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_heap(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum_heap_rel(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), prune_freeze_plan(), read_stream_start_pending_read(), ReadBufferBI(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), StartReadBuffersImpl(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), verify_heapam(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and visibilitymap_set_vmbits().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 4635 of file bufmgr.c.

4636{
4637 char *page = BufferGetPage(buffer);
4639 XLogRecPtr lsn;
4640
4641 /*
4642 * If we don't need locking for correctness, fastpath out.
4643 */
4644 if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
4645 return PageGetLSN(page);
4646
4647 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4648 Assert(BufferIsValid(buffer));
4649 Assert(BufferIsPinned(buffer));
4650
4651 bufHdr = GetBufferDescriptor(buffer - 1);
4653 lsn = PageGetLSN(page);
4655
4656 return lsn;
4657}
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
static XLogRecPtr PageGetLSN(const PageData *page)
Definition bufpage.h:385
#define XLogHintBitIsNeeded()
Definition xlog.h:122
uint64 XLogRecPtr
Definition xlogdefs.h:21

References Assert, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), fb(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_killitems(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 4378 of file bufmgr.c.

4380{
4382
4383 /* Do the same checks as BufferGetBlockNumber. */
4384 Assert(BufferIsPinned(buffer));
4385
4386 if (BufferIsLocal(buffer))
4387 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4388 else
4389 bufHdr = GetBufferDescriptor(buffer - 1);
4390
4391 /* pinned, so OK to read tag without spinlock */
4392 *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4393 *forknum = BufTagGetForkNum(&bufHdr->tag);
4394 *blknum = bufHdr->tag.blockNum;
4395}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), GetBufferDescriptor(), and GetLocalBufferDescriptor().

Referenced by fsm_search_avail(), ginRedoInsertEntry(), heap_inplace_update_and_unlock(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 3025 of file bufmgr.c.

3026{
3028
3029 Assert(BufferIsPinned(buffer));
3030
3031 if (BufferIsLocal(buffer))
3032 {
3033 int bufid = -buffer - 1;
3034
3036 /* Content locks are not maintained for local buffers. */
3037 }
3038 else
3039 {
3040 bufHdr = GetBufferDescriptor(buffer - 1);
3042 }
3043
3044 return pg_atomic_read_u64(&bufHdr->state) & BM_DIRTY;
3045}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2998
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220

References Assert, BM_DIRTY, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and pg_atomic_read_u64().

Referenced by heap_multi_insert(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), log_heap_prune_and_freeze(), and XLogRegisterBuffer().

◆ BufferIsLockedByMe()

bool BufferIsLockedByMe ( Buffer  buffer)

Definition at line 2972 of file bufmgr.c.

2973{
2975
2976 Assert(BufferIsPinned(buffer));
2977
2978 if (BufferIsLocal(buffer))
2979 {
2980 /* Content locks are not maintained for local buffers. */
2981 return true;
2982 }
2983 else
2984 {
2985 bufHdr = GetBufferDescriptor(buffer - 1);
2986 return BufferLockHeldByMe(bufHdr);
2987 }
2988}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockHeldByMe(), fb(), and GetBufferDescriptor().

Referenced by FlushOneBuffer(), and MarkBufferDirtyHint().

◆ BufferIsLockedByMeInMode()

bool BufferIsLockedByMeInMode ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 2998 of file bufmgr.c.

2999{
3001
3002 Assert(BufferIsPinned(buffer));
3003
3004 if (BufferIsLocal(buffer))
3005 {
3006 /* Content locks are not maintained for local buffers. */
3007 return true;
3008 }
3009 else
3010 {
3011 bufHdr = GetBufferDescriptor(buffer - 1);
3013 }
3014}
static bool BufferLockHeldByMeInMode(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:6383
static PgChecksumMode mode

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockHeldByMeInMode(), fb(), GetBufferDescriptor(), and mode.

Referenced by BufferIsDirty(), HeapTupleSetHintBits(), identify_and_fix_vm_corruption(), IsBufferCleanupOK(), MarkBufferDirty(), visibilitymap_set(), visibilitymap_set_vmbits(), and XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 4605 of file bufmgr.c.

4606{
4608
4609 /* Local buffers are used only for temp relations. */
4610 if (BufferIsLocal(buffer))
4611 return false;
4612
4613 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4614 Assert(BufferIsValid(buffer));
4615 Assert(BufferIsPinned(buffer));
4616
4617 /*
4618 * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
4619 * need not bother with the buffer header spinlock. Even if someone else
4620 * changes the buffer header state while we're doing this, the state is
4621 * changed atomically, so we'll read the old value or the new value, but
4622 * not random garbage.
4623 */
4624 bufHdr = GetBufferDescriptor(buffer - 1);
4625 return (pg_atomic_read_u64(&bufHdr->state) & BM_PERMANENT) != 0;
4626}

References Assert, BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), fb(), GetBufferDescriptor(), and pg_atomic_read_u64().

Referenced by SetHintBits().

◆ BufferLockAcquire()

static void BufferLockAcquire ( Buffer  buffer,
BufferDesc buf_hdr,
BufferLockMode  mode 
)
inlinestatic

Definition at line 5756 of file bufmgr.c.

5757{
5758 PrivateRefCountEntry *entry;
5759 int extraWaits = 0;
5760
5761 /*
5762 * Get reference to the refcount entry before we hold the lock, it seems
5763 * better to do before holding the lock.
5764 */
5765 entry = GetPrivateRefCountEntry(buffer, true);
5766
5767 /*
5768 * We better not already hold a lock on the buffer.
5769 */
5771
5772 /*
5773 * Lock out cancel/die interrupts until we exit the code section protected
5774 * by the content lock. This ensures that interrupts will not interfere
5775 * with manipulations of data structures in shared memory.
5776 */
5778
5779 for (;;)
5780 {
5781 uint32 wait_event = 0; /* initialized to avoid compiler warning */
5782 bool mustwait;
5783
5784 /*
5785 * Try to grab the lock the first time, we're not in the waitqueue
5786 * yet/anymore.
5787 */
5789
5790 if (likely(!mustwait))
5791 {
5792 break;
5793 }
5794
5795 /*
5796 * Ok, at this point we couldn't grab the lock on the first try. We
5797 * cannot simply queue ourselves to the end of the list and wait to be
5798 * woken up because by now the lock could long have been released.
5799 * Instead add us to the queue and try to grab the lock again. If we
5800 * succeed we need to revert the queuing and be happy, otherwise we
5801 * recheck the lock. If we still couldn't grab it, we know that the
5802 * other locker will see our queue entries when releasing since they
5803 * existed before we checked for the lock.
5804 */
5805
5806 /* add to the queue */
5808
5809 /* we're now guaranteed to be woken up if necessary */
5811
5812 /* ok, grabbed the lock the second time round, need to undo queueing */
5813 if (!mustwait)
5814 {
5816 break;
5817 }
5818
5819 switch (mode)
5820 {
5823 break;
5826 break;
5827 case BUFFER_LOCK_SHARE:
5829 break;
5830 case BUFFER_LOCK_UNLOCK:
5832
5833 }
5835
5836 /*
5837 * Wait until awakened.
5838 *
5839 * It is possible that we get awakened for a reason other than being
5840 * signaled by BufferLockWakeup(). If so, loop back and wait again.
5841 * Once we've gotten the lock, re-increment the sema by the number of
5842 * additional signals received.
5843 */
5844 for (;;)
5845 {
5848 break;
5849 extraWaits++;
5850 }
5851
5853
5854 /* Retrying, allow BufferLockRelease to release waiters again. */
5856 }
5857
5858 /* Remember that we now hold this lock */
5859 entry->data.lockmode = mode;
5860
5861 /*
5862 * Fix the process wait semaphore's count for any absorbed wakeups.
5863 */
5864 while (unlikely(extraWaits-- > 0))
5866}
static uint64 pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_)
Definition atomics.h:551
#define BM_LOCK_WAKE_IN_PROGRESS
static bool BufferLockAttempt(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5954
static void BufferLockDequeueSelf(BufferDesc *buf_hdr)
Definition bufmgr.c:6061
static void BufferLockQueueSelf(BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:6021
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition bufmgr.c:494
@ BUFFER_LOCK_SHARE_EXCLUSIVE
Definition bufmgr.h:215
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
#define likely(x)
Definition c.h:411
@ LW_WS_NOT_WAITING
Definition lwlock.h:30
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
void PGSemaphoreUnlock(PGSemaphore sema)
Definition posix_sema.c:335
void PGSemaphoreLock(PGSemaphore sema)
Definition posix_sema.c:315
PGPROC * MyProc
Definition proc.c:67
PGSemaphore sem
Definition proc.h:184
uint8 lwWaiting
Definition proc.h:250
BufferLockMode lockmode
Definition bufmgr.c:110
PrivateRefCountData data
Definition bufmgr.c:126
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85

References Assert, BM_LOCK_WAKE_IN_PROGRESS, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferLockAttempt(), BufferLockDequeueSelf(), BufferLockQueueSelf(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, likely, PrivateRefCountData::lockmode, LW_WS_NOT_WAITING, PGPROC::lwWaiting, mode, MyProc, pg_atomic_fetch_and_u64(), pg_unreachable, PGSemaphoreLock(), PGSemaphoreUnlock(), pgstat_report_wait_end(), pgstat_report_wait_start(), PGPROC::sem, and unlikely.

Referenced by FlushUnlockedBuffer(), LockBufferInternal(), and MarkDirtyUnpinnedBufferInternal().

◆ BufferLockAttempt()

static bool BufferLockAttempt ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
inlinestatic

Definition at line 5954 of file bufmgr.c.

5955{
5957
5958 /*
5959 * Read once outside the loop, later iterations will get the newer value
5960 * via compare & exchange.
5961 */
5963
5964 /* loop until we've determined whether we could acquire the lock or not */
5965 while (true)
5966 {
5968 bool lock_free;
5969
5971
5973 {
5974 lock_free = (old_state & BM_LOCK_MASK) == 0;
5975 if (lock_free)
5977 }
5979 {
5981 if (lock_free)
5983 }
5984 else
5985 {
5987 if (lock_free)
5989 }
5990
5991 /*
5992 * Attempt to swap in the state we are expecting. If we didn't see
5993 * lock to be free, that's just the old value. If we saw it as free,
5994 * we'll attempt to mark it acquired. The reason that we always swap
5995 * in the value is that this doubles as a memory barrier. We could try
5996 * to be smarter and only swap in values if we saw the lock as free,
5997 * but benchmark haven't shown it as beneficial so far.
5998 *
5999 * Retry if the value changed since we last looked at it.
6000 */
6003 {
6004 if (lock_free)
6005 {
6006 /* Great! Got the lock. */
6007 return false;
6008 }
6009 else
6010 return true; /* somebody else has the lock */
6011 }
6012 }
6013
6015}
static bool pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 *expected, uint64 newval)
Definition atomics.h:522
#define BM_LOCK_VAL_SHARED
#define BM_LOCK_VAL_EXCLUSIVE
#define BM_LOCK_MASK
#define BM_LOCK_VAL_SHARE_EXCLUSIVE

References BM_LOCK_MASK, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_VAL_SHARE_EXCLUSIVE, BM_LOCK_VAL_SHARED, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE_EXCLUSIVE, fb(), likely, mode, pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), and pg_unreachable.

Referenced by BufferLockAcquire(), and BufferLockConditional().

◆ BufferLockConditional()

static bool BufferLockConditional ( Buffer  buffer,
BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 5908 of file bufmgr.c.

5909{
5910 PrivateRefCountEntry *entry = GetPrivateRefCountEntry(buffer, true);
5911 bool mustwait;
5912
5913 /*
5914 * As described above, if we're trying to lock a buffer this backend
5915 * already has locked, return false, independent of the existing and
5916 * desired lock level.
5917 */
5918 if (entry->data.lockmode != BUFFER_LOCK_UNLOCK)
5919 return false;
5920
5921 /*
5922 * Lock out cancel/die interrupts until we exit the code section protected
5923 * by the content lock. This ensures that interrupts will not interfere
5924 * with manipulations of data structures in shared memory.
5925 */
5927
5928 /* Check for the lock */
5930
5931 if (mustwait)
5932 {
5933 /* Failed to get lock, so release interrupt holdoff */
5935 }
5936 else
5937 {
5938 entry->data.lockmode = mode;
5939 }
5940
5941 return !mustwait;
5942}
#define RESUME_INTERRUPTS()
Definition miscadmin.h:136

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferLockAttempt(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, PrivateRefCountData::lockmode, mode, and RESUME_INTERRUPTS.

Referenced by ConditionalLockBuffer(), and GetVictimBuffer().

◆ BufferLockDequeueSelf()

static void BufferLockDequeueSelf ( BufferDesc buf_hdr)
static

Definition at line 6061 of file bufmgr.c.

6062{
6063 bool on_waitlist;
6064
6066
6068 if (on_waitlist)
6069 proclist_delete(&buf_hdr->lock_waiters, MyProcNumber, lwWaitLink);
6070
6071 if (proclist_is_empty(&buf_hdr->lock_waiters) &&
6073 {
6075 }
6076
6077 /* XXX: combine with fetch_and above? */
6079
6080 /* clear waiting state again, nice for debugging */
6081 if (on_waitlist)
6083 else
6084 {
6085 int extraWaits = 0;
6086
6087
6088 /*
6089 * Somebody else dequeued us and has or will wake us up. Deal with the
6090 * superfluous absorption of a wakeup.
6091 */
6092
6093 /*
6094 * Clear BM_LOCK_WAKE_IN_PROGRESS if somebody woke us before we
6095 * removed ourselves - they'll have set it.
6096 */
6098
6099 /*
6100 * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
6101 * get reset at some inconvenient point later. Most of the time this
6102 * will immediately return.
6103 */
6104 for (;;)
6105 {
6108 break;
6109 extraWaits++;
6110 }
6111
6112 /*
6113 * Fix the process wait semaphore's count for any absorbed wakeups.
6114 */
6115 while (extraWaits-- > 0)
6117 }
6118}
#define BM_LOCK_HAS_WAITERS
@ LW_WS_WAITING
Definition lwlock.h:31
#define proclist_delete(list, procno, link_member)
Definition proclist.h:187
static bool proclist_is_empty(const proclist_head *list)
Definition proclist.h:38

References BM_LOCK_HAS_WAITERS, BM_LOCK_WAKE_IN_PROGRESS, fb(), LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_WAITING, PGPROC::lwWaiting, MyProc, MyProcNumber, pg_atomic_fetch_and_u64(), pg_atomic_read_u64(), PGSemaphoreLock(), PGSemaphoreUnlock(), proclist_delete, proclist_is_empty(), PGPROC::sem, and UnlockBufHdr().

Referenced by BufferLockAcquire().

◆ BufferLockDisown()

static void BufferLockDisown ( Buffer  buffer,
BufferDesc buf_hdr 
)
inlinestatic

Definition at line 6129 of file bufmgr.c.

6130{
6133}
static int BufferLockDisownInternal(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:6143

References PrivateRefCountEntry::buffer, BufferLockDisownInternal(), fb(), and RESUME_INTERRUPTS.

Referenced by buffer_stage_common().

◆ BufferLockDisownInternal()

static int BufferLockDisownInternal ( Buffer  buffer,
BufferDesc buf_hdr 
)
inlinestatic

Definition at line 6143 of file bufmgr.c.

6144{
6147
6148 ref = GetPrivateRefCountEntry(buffer, false);
6149 if (ref == NULL)
6150 elog(ERROR, "lock %d is not held", buffer);
6151 mode = ref->data.lockmode;
6152 ref->data.lockmode = BUFFER_LOCK_UNLOCK;
6153
6154 return mode;
6155}
BufferLockMode
Definition bufmgr.h:204
#define ERROR
Definition elog.h:39

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, elog, ERROR, fb(), GetPrivateRefCountEntry(), and mode.

Referenced by BufferLockDisown(), and BufferLockUnlock().

◆ BufferLockHeldByMe()

static bool BufferLockHeldByMe ( BufferDesc buf_hdr)
static

Definition at line 6401 of file bufmgr.c.

6402{
6403 PrivateRefCountEntry *entry =
6405
6406 if (!entry)
6407 return false;
6408 else
6409 return entry->data.lockmode != BUFFER_LOCK_UNLOCK;
6410}
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)

References BUFFER_LOCK_UNLOCK, BufferDescriptorGetBuffer(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), and PrivateRefCountData::lockmode.

Referenced by buffer_stage_common(), BufferIsLockedByMe(), and UnpinBufferNoOwner().

◆ BufferLockHeldByMeInMode()

static bool BufferLockHeldByMeInMode ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 6383 of file bufmgr.c.

6384{
6385 PrivateRefCountEntry *entry =
6387
6388 if (!entry)
6389 return false;
6390 else
6391 return entry->data.lockmode == mode;
6392}

References BufferDescriptorGetBuffer(), PrivateRefCountEntry::data, fb(), GetPrivateRefCountEntry(), PrivateRefCountData::lockmode, and mode.

Referenced by BufferIsLockedByMeInMode().

◆ BufferLockProcessRelease()

static void BufferLockProcessRelease ( BufferDesc buf_hdr,
BufferLockMode  mode,
uint64  lockstate 
)
static

Definition at line 6328 of file bufmgr.c.

6329{
6330 bool check_waiters = false;
6331 bool wake_exclusive = false;
6332
6333 /* nobody else can have that kind of lock */
6335
6336 /*
6337 * If we're still waiting for backends to get scheduled, don't wake them
6338 * up again. Otherwise check if we need to look through the waitqueue to
6339 * wake other backends.
6340 */
6343 {
6344 if ((lockstate & BM_LOCK_MASK) == 0)
6345 {
6346 /*
6347 * We released a lock and the lock was, in that moment, free. We
6348 * therefore can wake waiters for any kind of lock.
6349 */
6350 check_waiters = true;
6351 wake_exclusive = true;
6352 }
6354 {
6355 /*
6356 * We released the lock, but another backend still holds a lock.
6357 * We can't have released an exclusive lock, as there couldn't
6358 * have been other lock holders. If we released a share lock, no
6359 * waiters need to be woken up, as there must be other share
6360 * lockers. However, if we held a share-exclusive lock, another
6361 * backend now could acquire a share-exclusive lock.
6362 */
6363 check_waiters = true;
6364 wake_exclusive = false;
6365 }
6366 }
6367
6368 /*
6369 * As waking up waiters requires the spinlock to be acquired, only do so
6370 * if necessary.
6371 */
6372 if (check_waiters)
6374}
static void BufferLockWakeup(BufferDesc *buf_hdr, bool unlocked)
Definition bufmgr.c:6163

References Assert, BM_LOCK_HAS_WAITERS, BM_LOCK_MASK, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_WAKE_IN_PROGRESS, BUFFER_LOCK_SHARE_EXCLUSIVE, BufferLockWakeup(), fb(), and mode.

Referenced by BufferLockUnlock().

◆ BufferLockQueueSelf()

static void BufferLockQueueSelf ( BufferDesc buf_hdr,
BufferLockMode  mode 
)
static

Definition at line 6021 of file bufmgr.c.

6022{
6023 /*
6024 * If we don't have a PGPROC structure, there's no way to wait. This
6025 * should never occur, since MyProc should only be null during shared
6026 * memory initialization.
6027 */
6028 if (MyProc == NULL)
6029 elog(PANIC, "cannot wait without a PGPROC structure");
6030
6032 elog(PANIC, "queueing for lock while waiting on another one");
6033
6035
6036 /* setting the flag is protected by the spinlock */
6038
6039 /*
6040 * These are currently used both for lwlocks and buffer content locks,
6041 * which is acceptable, although not pretty, because a backend can't wait
6042 * for both types of locks at the same time.
6043 */
6046
6047 proclist_push_tail(&buf_hdr->lock_waiters, MyProcNumber, lwWaitLink);
6048
6049 /* Can release the mutex now */
6051}
static uint64 pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_)
Definition atomics.h:560
#define PANIC
Definition elog.h:42
#define proclist_push_tail(list, procno, link_member)
Definition proclist.h:191
uint8 lwWaitMode
Definition proc.h:251

References BM_LOCK_HAS_WAITERS, elog, fb(), LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_WAITING, PGPROC::lwWaiting, PGPROC::lwWaitMode, mode, MyProc, MyProcNumber, PANIC, pg_atomic_fetch_or_u64(), proclist_push_tail, and UnlockBufHdr().

Referenced by BufferLockAcquire().

◆ BufferLockReleaseSub()

static uint64 BufferLockReleaseSub ( BufferLockMode  mode)
inlinestatic

Definition at line 6299 of file bufmgr.c.

6300{
6301 /*
6302 * Turns out that a switch() leads gcc to generate sufficiently worse code
6303 * for this to show up in profiles...
6304 */
6306 return BM_LOCK_VAL_EXCLUSIVE;
6309 else
6310 {
6312 return BM_LOCK_VAL_SHARED;
6313 }
6314
6315 return 0; /* keep compiler quiet */
6316}

References Assert, BM_LOCK_VAL_EXCLUSIVE, BM_LOCK_VAL_SHARE_EXCLUSIVE, BM_LOCK_VAL_SHARED, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, and mode.

Referenced by BufferLockUnlock().

◆ BufferLockUnlock()

static void BufferLockUnlock ( Buffer  buffer,
BufferDesc buf_hdr 
)
static

Definition at line 5872 of file bufmgr.c.

5873{
5876 uint64 sub;
5877
5879
5880 /*
5881 * Release my hold on lock, after that it can immediately be acquired by
5882 * others, even if we still have to wakeup other waiters.
5883 */
5885
5887
5889
5890 /*
5891 * Now okay to allow cancel/die interrupts.
5892 */
5894}
static uint64 pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
Definition atomics.h:578
static void BufferLockProcessRelease(BufferDesc *buf_hdr, BufferLockMode mode, uint64 lockstate)
Definition bufmgr.c:6328
static uint64 BufferLockReleaseSub(BufferLockMode mode)
Definition bufmgr.c:6299

References PrivateRefCountEntry::buffer, BufferLockDisownInternal(), BufferLockProcessRelease(), BufferLockReleaseSub(), fb(), mode, pg_atomic_sub_fetch_u64(), and RESUME_INTERRUPTS.

Referenced by FlushUnlockedBuffer(), MarkDirtyUnpinnedBufferInternal(), ResOwnerReleaseBuffer(), and UnlockBuffer().

◆ BufferLockWakeup()

static void BufferLockWakeup ( BufferDesc buf_hdr,
bool  unlocked 
)
static

Definition at line 6163 of file bufmgr.c.

6164{
6165 bool new_wake_in_progress = false;
6166 bool wake_share_exclusive = true;
6169
6171
6172 /* lock wait list while collecting backends to wake up */
6174
6175 proclist_foreach_modify(iter, &buf_hdr->lock_waiters, lwWaitLink)
6176 {
6177 PGPROC *waiter = GetPGProcByNumber(iter.cur);
6178
6179 /*
6180 * Already woke up a conflicting lock, so skip over this wait list
6181 * entry.
6182 */
6184 continue;
6186 continue;
6187
6188 proclist_delete(&buf_hdr->lock_waiters, iter.cur, lwWaitLink);
6189 proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
6190
6191 /*
6192 * Prevent additional wakeups until retryer gets to run. Backends that
6193 * are just waiting for the lock to become free don't retry
6194 * automatically.
6195 */
6196 new_wake_in_progress = true;
6197
6198 /*
6199 * Signal that the process isn't on the wait list anymore. This allows
6200 * BufferLockDequeueSelf() to remove itself from the waitlist with a
6201 * proclist_delete(), rather than having to check if it has been
6202 * removed from the list.
6203 */
6204 Assert(waiter->lwWaiting == LW_WS_WAITING);
6206
6207 /*
6208 * Don't wakeup further waiters after waking a conflicting waiter.
6209 */
6210 if (waiter->lwWaitMode == BUFFER_LOCK_SHARE)
6211 {
6212 /*
6213 * Share locks conflict with exclusive locks.
6214 */
6215 wake_exclusive = false;
6216 }
6217 else if (waiter->lwWaitMode == BUFFER_LOCK_SHARE_EXCLUSIVE)
6218 {
6219 /*
6220 * Share-exclusive locks conflict with share-exclusive and
6221 * exclusive locks.
6222 */
6223 wake_exclusive = false;
6224 wake_share_exclusive = false;
6225 }
6226 else if (waiter->lwWaitMode == BUFFER_LOCK_EXCLUSIVE)
6227 {
6228 /*
6229 * Exclusive locks conflict with all other locks, there's no point
6230 * in waking up anybody else.
6231 */
6232 break;
6233 }
6234 }
6235
6237
6238 /* unset required flags, and release lock, in one fell swoop */
6239 {
6242
6244 while (true)
6245 {
6247
6248 /* compute desired flags */
6249
6252 else
6254
6255 if (proclist_is_empty(&buf_hdr->lock_waiters))
6257
6258 desired_state &= ~BM_LOCKED; /* release lock */
6259
6262 break;
6263 }
6264 }
6265
6266 /* Awaken any waiters I removed from the queue. */
6267 proclist_foreach_modify(iter, &wakeup, lwWaitLink)
6268 {
6269 PGPROC *waiter = GetPGProcByNumber(iter.cur);
6270
6271 proclist_delete(&wakeup, iter.cur, lwWaitLink);
6272
6273 /*
6274 * Guarantee that lwWaiting being unset only becomes visible once the
6275 * unlink from the link has completed. Otherwise the target backend
6276 * could be woken up for other reason and enqueue for a new lock - if
6277 * that happens before the list unlink happens, the list would end up
6278 * being corrupted.
6279 *
6280 * The barrier pairs with the LockBufHdr() when enqueuing for another
6281 * lock.
6282 */
6284 waiter->lwWaiting = LW_WS_NOT_WAITING;
6285 PGSemaphoreUnlock(waiter->sem);
6286 }
6287}
#define pg_write_barrier()
Definition atomics.h:155
@ LW_WS_PENDING_WAKEUP
Definition lwlock.h:32
#define GetPGProcByNumber(n)
Definition proc.h:450
static void proclist_init(proclist_head *list)
Definition proclist.h:29
#define proclist_foreach_modify(iter, lhead, link_member)
Definition proclist.h:206
Definition proc.h:180
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]

References Assert, BM_LOCK_HAS_WAITERS, BM_LOCK_WAKE_IN_PROGRESS, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, proclist_mutable_iter::cur, fb(), GetPGProcByNumber, LockBufHdr(), LW_WS_NOT_WAITING, LW_WS_PENDING_WAKEUP, LW_WS_WAITING, PGPROC::lwWaiting, PGPROC::lwWaitMode, pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), pg_write_barrier, PGSemaphoreUnlock(), proclist_delete, proclist_foreach_modify, proclist_init(), proclist_is_empty(), proclist_push_tail, PGPROC::sem, and wakeup.

Referenced by BufferLockProcessRelease().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 3457 of file bufmgr.c.

3458{
3460 int buf_id;
3461 int num_to_scan;
3462 int num_spaces;
3463 int num_processed;
3464 int num_written;
3466 Oid last_tsid;
3468 int i;
3469 uint64 mask = BM_DIRTY;
3471
3472 /*
3473 * Unless this is a shutdown checkpoint or we have been explicitly told,
3474 * we write only permanent, dirty buffers. But at shutdown or end of
3475 * recovery, we write all dirty buffers.
3476 */
3479 mask |= BM_PERMANENT;
3480
3481 /*
3482 * Loop over all buffers, and mark the ones that need to be written with
3483 * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
3484 * can estimate how much work needs to be done.
3485 *
3486 * This allows us to write only those pages that were dirty when the
3487 * checkpoint began, and not those that get dirtied while it proceeds.
3488 * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
3489 * later in this function, or by normal backends or the bgwriter cleaning
3490 * scan, the flag is cleared. Any buffer dirtied after this point won't
3491 * have the flag set.
3492 *
3493 * Note that if we fail to write some buffer, we may leave buffers with
3494 * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
3495 * certainly need to be written for the next checkpoint attempt, too.
3496 */
3497 num_to_scan = 0;
3498 for (buf_id = 0; buf_id < NBuffers; buf_id++)
3499 {
3501 uint64 set_bits = 0;
3502
3503 /*
3504 * Header spinlock is enough to examine BM_DIRTY, see comment in
3505 * SyncOneBuffer.
3506 */
3508
3509 if ((buf_state & mask) == mask)
3510 {
3511 CkptSortItem *item;
3512
3514
3515 item = &CkptBufferIds[num_to_scan++];
3516 item->buf_id = buf_id;
3517 item->tsId = bufHdr->tag.spcOid;
3518 item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
3519 item->forkNum = BufTagGetForkNum(&bufHdr->tag);
3520 item->blockNum = bufHdr->tag.blockNum;
3521 }
3522
3524 set_bits, 0,
3525 0);
3526
3527 /* Check for barrier events in case NBuffers is large. */
3530 }
3531
3532 if (num_to_scan == 0)
3533 return; /* nothing to do */
3534
3536
3538
3539 /*
3540 * Sort buffers that need to be written to reduce the likelihood of random
3541 * IO. The sorting is also important for the implementation of balancing
3542 * writes between tablespaces. Without balancing writes we'd potentially
3543 * end up writing to the tablespaces one-by-one; possibly overloading the
3544 * underlying system.
3545 */
3547
3548 num_spaces = 0;
3549
3550 /*
3551 * Allocate progress status for each tablespace with buffers that need to
3552 * be flushed. This requires the to-be-flushed array to be sorted.
3553 */
3555 for (i = 0; i < num_to_scan; i++)
3556 {
3557 CkptTsStatus *s;
3558 Oid cur_tsid;
3559
3561
3562 /*
3563 * Grow array of per-tablespace status structs, every time a new
3564 * tablespace is found.
3565 */
3567 {
3568 Size sz;
3569
3570 num_spaces++;
3571
3572 /*
3573 * Not worth adding grow-by-power-of-2 logic here - even with a
3574 * few hundred tablespaces this should be fine.
3575 */
3576 sz = sizeof(CkptTsStatus) * num_spaces;
3577
3578 if (per_ts_stat == NULL)
3580 else
3582
3583 s = &per_ts_stat[num_spaces - 1];
3584 memset(s, 0, sizeof(*s));
3585 s->tsId = cur_tsid;
3586
3587 /*
3588 * The first buffer in this tablespace. As CkptBufferIds is sorted
3589 * by tablespace all (s->num_to_scan) buffers in this tablespace
3590 * will follow afterwards.
3591 */
3592 s->index = i;
3593
3594 /*
3595 * progress_slice will be determined once we know how many buffers
3596 * are in each tablespace, i.e. after this loop.
3597 */
3598
3600 }
3601 else
3602 {
3603 s = &per_ts_stat[num_spaces - 1];
3604 }
3605
3606 s->num_to_scan++;
3607
3608 /* Check for barrier events. */
3611 }
3612
3613 Assert(num_spaces > 0);
3614
3615 /*
3616 * Build a min-heap over the write-progress in the individual tablespaces,
3617 * and compute how large a portion of the total progress a single
3618 * processed buffer is.
3619 */
3622 NULL);
3623
3624 for (i = 0; i < num_spaces; i++)
3625 {
3627
3628 ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
3629
3631 }
3632
3634
3635 /*
3636 * Iterate through to-be-checkpointed buffers and write the ones (still)
3637 * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
3638 * tablespaces; otherwise the sorting would lead to only one tablespace
3639 * receiving writes at a time, making inefficient use of the hardware.
3640 */
3641 num_processed = 0;
3642 num_written = 0;
3643 while (!binaryheap_empty(ts_heap))
3644 {
3648
3649 buf_id = CkptBufferIds[ts_stat->index].buf_id;
3650 Assert(buf_id != -1);
3651
3652 bufHdr = GetBufferDescriptor(buf_id);
3653
3654 num_processed++;
3655
3656 /*
3657 * We don't need to acquire the lock here, because we're only looking
3658 * at a single bit. It's possible that someone else writes the buffer
3659 * and clears the flag right after we check, but that doesn't matter
3660 * since SyncOneBuffer will then do nothing. However, there is a
3661 * further race condition: it's conceivable that between the time we
3662 * examine the bit here and the time SyncOneBuffer acquires the lock,
3663 * someone else not only wrote the buffer but replaced it with another
3664 * page and dirtied it. In that improbable case, SyncOneBuffer will
3665 * write the buffer though we didn't need to. It doesn't seem worth
3666 * guarding against this, though.
3667 */
3669 {
3670 if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
3671 {
3674 num_written++;
3675 }
3676 }
3677
3678 /*
3679 * Measure progress independent of actually having to flush the buffer
3680 * - otherwise writing become unbalanced.
3681 */
3682 ts_stat->progress += ts_stat->progress_slice;
3683 ts_stat->num_scanned++;
3684 ts_stat->index++;
3685
3686 /* Have all the buffers from the tablespace been processed? */
3687 if (ts_stat->num_scanned == ts_stat->num_to_scan)
3688 {
3690 }
3691 else
3692 {
3693 /* update heap with the new progress */
3695 }
3696
3697 /*
3698 * Sleep to throttle our I/O rate.
3699 *
3700 * (This will check for barrier events even if it doesn't sleep.)
3701 */
3702 CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
3703 }
3704
3705 /*
3706 * Issue all pending flushes. Only checkpointer calls BufferSync(), so
3707 * IOContext will always be IOCONTEXT_NORMAL.
3708 */
3710
3712 per_ts_stat = NULL;
3714
3715 /*
3716 * Update checkpoint statistics. As noted above, this doesn't include
3717 * buffers written by other backends or bgwriter scan.
3718 */
3720
3722}
void binaryheap_build(binaryheap *heap)
Definition binaryheap.c:136
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:253
bh_node_type binaryheap_first(binaryheap *heap)
Definition binaryheap.c:175
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition binaryheap.c:190
void binaryheap_free(binaryheap *heap)
Definition binaryheap.c:73
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition binaryheap.c:114
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition binaryheap.c:37
#define binaryheap_empty(h)
Definition binaryheap.h:65
CkptSortItem * CkptBufferIds
Definition buf_init.c:26
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_CHECKPOINT_NEEDED
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition bufmgr.c:7245
int checkpoint_flush_after
Definition bufmgr.c:208
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition bufmgr.c:7268
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
Definition bufmgr.c:7330
double float8
Definition c.h:644
size_t Size
Definition c.h:619
void CheckpointWriteDelay(int flags, double progress)
volatile sig_atomic_t ProcSignalBarrierPending
Definition globals.c:40
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
#define InvalidOid
unsigned int Oid
void ProcessProcSignalBarrier(void)
Definition procsignal.c:499
int ckpt_bufs_written
Definition xlog.h:178
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition bufmgr.c:149
int num_to_scan
Definition bufmgr.c:152
PgStat_Counter buffers_written
Definition pgstat.h:266
CheckpointStatsData CheckpointStats
Definition xlog.c:212
#define CHECKPOINT_FLUSH_UNLOGGED
Definition xlog.h:154
#define CHECKPOINT_END_OF_RECOVERY
Definition xlog.h:151
#define CHECKPOINT_IS_SHUTDOWN
Definition xlog.h:150

References Assert, binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buffers_written, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, DatumGetPointer(), fb(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IOCONTEXT_NORMAL, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u64(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), SyncOneBuffer(), ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdrExt(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 7180 of file bufmgr.c.

7181{
7182 int ret;
7185
7188
7190
7191 if (ret != 0)
7192 return ret;
7193
7195 return -1;
7197 return 1;
7198
7199 if (ba->blockNum < bb->blockNum)
7200 return -1;
7201 if (ba->blockNum > bb->blockNum)
7202 return 1;
7203
7204 return 0;
7205}
static int rlocator_comparator(const void *p1, const void *p2)
Definition bufmgr.c:7081

References BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), and rlocator_comparator().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 6495 of file bufmgr.c.

6496{
6497 if (BufferIsLocal(buffer))
6498 {
6499 if (LocalRefCount[-buffer - 1] != 1)
6500 elog(ERROR, "incorrect local pin count: %d",
6501 LocalRefCount[-buffer - 1]);
6502 }
6503 else
6504 {
6505 if (GetPrivateRefCount(buffer) != 1)
6506 elog(ERROR, "incorrect local pin count: %d",
6507 GetPrivateRefCount(buffer));
6508 }
6509}

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), lazy_scan_heap(), and LockBufferForCleanup().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 4174 of file bufmgr.c.

4175{
4176#ifdef USE_ASSERT_CHECKING
4177 int RefCountErrors = 0;
4179 int i;
4180 char *s;
4181
4182 /* check the array */
4183 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
4184 {
4186 {
4187 res = &PrivateRefCountArray[i];
4188
4190 elog(WARNING, "buffer refcount leak: %s", s);
4191 pfree(s);
4192
4194 }
4195 }
4196
4197 /* if necessary search the hash */
4199 {
4201
4203 while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
4204 {
4206 elog(WARNING, "buffer refcount leak: %s", s);
4207 pfree(s);
4209 }
4210 }
4211
4212 Assert(RefCountErrors == 0);
4213#endif
4214}
#define InvalidBuffer
Definition buf.h:25
static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES]
Definition bufmgr.c:248
char * DebugPrintBufferRefcount(Buffer buffer)
Definition bufmgr.c:4300
#define REFCOUNT_ARRAY_ENTRIES
Definition bufmgr.c:130
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition bufmgr.c:249
static HTAB * PrivateRefCountHash
Definition bufmgr.c:250
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380

References Assert, PrivateRefCountEntry::buffer, DebugPrintBufferRefcount(), elog, fb(), hash_seq_init(), hash_seq_search(), i, InvalidBuffer, pfree(), PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and WARNING.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 4343 of file bufmgr.c.

4344{
4345 BufferSync(flags);
4346}
static void BufferSync(int flags)
Definition bufmgr.c:3457

References BufferSync().

Referenced by CheckPointGuts().

◆ CheckReadBuffersOperation()

static void CheckReadBuffersOperation ( ReadBuffersOperation operation,
bool  is_complete 
)
static

Definition at line 1628 of file bufmgr.c.

1629{
1630#ifdef USE_ASSERT_CHECKING
1631 Assert(operation->nblocks_done <= operation->nblocks);
1632 Assert(!is_complete || operation->nblocks == operation->nblocks_done);
1633
1634 for (int i = 0; i < operation->nblocks; i++)
1635 {
1636 Buffer buffer = operation->buffers[i];
1637 BufferDesc *buf_hdr = BufferIsLocal(buffer) ?
1638 GetLocalBufferDescriptor(-buffer - 1) :
1639 GetBufferDescriptor(buffer - 1);
1640
1641 Assert(BufferGetBlockNumber(buffer) == operation->blocknum + i);
1643
1644 if (i < operation->nblocks_done)
1646 }
1647#endif
1648}

References Assert, ReadBuffersOperation::blocknum, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufferGetBlockNumber(), BufferIsLocal, ReadBuffersOperation::buffers, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, and pg_atomic_read_u64().

Referenced by StartReadBuffersImpl(), and WaitReadBuffers().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 7214 of file bufmgr.c.

7215{
7216 /* compare tablespace */
7217 if (a->tsId < b->tsId)
7218 return -1;
7219 else if (a->tsId > b->tsId)
7220 return 1;
7221 /* compare relation */
7222 if (a->relNumber < b->relNumber)
7223 return -1;
7224 else if (a->relNumber > b->relNumber)
7225 return 1;
7226 /* compare fork */
7227 else if (a->forkNum < b->forkNum)
7228 return -1;
7229 else if (a->forkNum > b->forkNum)
7230 return 1;
7231 /* compare block number */
7232 else if (a->blockNum < b->blockNum)
7233 return -1;
7234 else if (a->blockNum > b->blockNum)
7235 return 1;
7236 /* equal page IDs are unlikely, but not impossible */
7237 return 0;
7238}
int b
Definition isn.c:74
int a
Definition isn.c:73

References a, and b.

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

Definition at line 6475 of file bufmgr.c.

6476{
6477 BufferDesc *buf;
6478
6479 Assert(BufferIsPinned(buffer));
6480 if (BufferIsLocal(buffer))
6481 return true; /* act as though we got it */
6482
6483 buf = GetBufferDescriptor(buffer - 1);
6484
6486}
static bool BufferLockConditional(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5908

References Assert, buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsPinned, BufferLockConditional(), and GetBufferDescriptor().

Referenced by _bt_conditionallockbuf(), BloomNewBuffer(), ConditionalLockBufferForCleanup(), GinNewBuffer(), gistNewBuffer(), RelationGetBufferForTuple(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), and SpGistUpdateMetaPage().

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 6701 of file bufmgr.c.

6702{
6705 refcount;
6706
6707 Assert(BufferIsValid(buffer));
6708
6709 /* see AIO related comment in LockBufferForCleanup() */
6710
6711 if (BufferIsLocal(buffer))
6712 {
6713 refcount = LocalRefCount[-buffer - 1];
6714 /* There should be exactly one pin */
6715 Assert(refcount > 0);
6716 if (refcount != 1)
6717 return false;
6718 /* Nobody else to wait for */
6719 return true;
6720 }
6721
6722 /* There should be exactly one local pin */
6723 refcount = GetPrivateRefCount(buffer);
6724 Assert(refcount);
6725 if (refcount != 1)
6726 return false;
6727
6728 /* Try to acquire lock */
6729 if (!ConditionalLockBuffer(buffer))
6730 return false;
6731
6732 bufHdr = GetBufferDescriptor(buffer - 1);
6735
6736 Assert(refcount > 0);
6737 if (refcount == 1)
6738 {
6739 /* Successfully acquired exclusive lock with pincount 1 */
6741 return true;
6742 }
6743
6744 /* Failed, so release the lock */
6747 return false;
6748}
bool ConditionalLockBuffer(Buffer buffer)
Definition bufmgr.c:6475
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), fb(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 5378 of file bufmgr.c.

5380{
5381 char relpersistence;
5384
5385 /* Set the relpersistence. */
5386 relpersistence = permanent ?
5388
5391
5392 /*
5393 * Create and copy all forks of the relation. During create database we
5394 * have a separate cleanup mechanism which deletes complete database
5395 * directory. Therefore, each individual relation doesn't need to be
5396 * registered for cleanup.
5397 */
5398 RelationCreateStorage(dst_rlocator, relpersistence, false);
5399
5400 /* copy main fork. */
5402 permanent);
5403
5404 /* copy those extra forks that exist */
5405 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
5406 forkNum <= MAX_FORKNUM; forkNum++)
5407 {
5408 if (smgrexists(src_rel, forkNum))
5409 {
5410 smgrcreate(dst_rel, forkNum, false);
5411
5412 /*
5413 * WAL log creation if the relation is persistent, or this is the
5414 * init fork of an unlogged relation.
5415 */
5416 if (permanent || forkNum == INIT_FORKNUM)
5417 log_smgrcreate(&dst_rlocator, forkNum);
5418
5419 /* Copy a fork's data, block by block. */
5421 permanent);
5422 }
5423 }
5424}
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition bufmgr.c:5264
@ MAIN_FORKNUM
Definition relpath.h:58
#define MAX_FORKNUM
Definition relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition smgr.c:240
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition smgr.c:481
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:462
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition storage.c:122
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition storage.c:187

References fb(), INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char * DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 4300 of file bufmgr.c.

4301{
4302 BufferDesc *buf;
4304 char *result;
4305 ProcNumber backend;
4307
4308 Assert(BufferIsValid(buffer));
4309 if (BufferIsLocal(buffer))
4310 {
4311 buf = GetLocalBufferDescriptor(-buffer - 1);
4312 loccount = LocalRefCount[-buffer - 1];
4313 backend = MyProcNumber;
4314 }
4315 else
4316 {
4317 buf = GetBufferDescriptor(buffer - 1);
4318 loccount = GetPrivateRefCount(buffer);
4319 backend = INVALID_PROC_NUMBER;
4320 }
4321
4322 /* theoretically we should lock the bufHdr here */
4323 buf_state = pg_atomic_read_u64(&buf->state);
4324
4325 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%" PRIx64 ", refcount=%u %d)",
4326 buffer,
4328 BufTagGetForkNum(&buf->tag)).str,
4329 buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
4331 return result;
4332}
#define BUF_FLAG_MASK
char * psprintf(const char *fmt,...)
Definition psprintf.c:43

References Assert, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pg_atomic_read_u64(), psprintf(), and relpathbackend.

Referenced by buffer_call_start_io(), buffer_call_terminate_io(), CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBuffer().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 5031 of file bufmgr.c.

5032{
5033 int i;
5034
5035 /*
5036 * We needn't consider local buffers, since by assumption the target
5037 * database isn't our own.
5038 */
5039
5040 for (i = 0; i < NBuffers; i++)
5041 {
5043
5044 /*
5045 * As in DropRelationBuffers, an unlocked precheck should be safe and
5046 * saves some cycles.
5047 */
5048 if (bufHdr->tag.dbOid != dbid)
5049 continue;
5050
5052 if (bufHdr->tag.dbOid == dbid)
5053 InvalidateBuffer(bufHdr); /* releases spinlock */
5054 else
5056 }
5057}
static void InvalidateBuffer(BufferDesc *buf)
Definition bufmgr.c:2274

References fb(), GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 4681 of file bufmgr.c.

4683{
4684 int i;
4685 int j;
4686 RelFileLocatorBackend rlocator;
4689
4690 rlocator = smgr_reln->smgr_rlocator;
4691
4692 /* If it's a local relation, it's localbuf.c's problem. */
4693 if (RelFileLocatorBackendIsTemp(rlocator))
4694 {
4695 if (rlocator.backend == MyProcNumber)
4696 DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
4698
4699 return;
4700 }
4701
4702 /*
4703 * To remove all the pages of the specified relation forks from the buffer
4704 * pool, we need to scan the entire buffer pool but we can optimize it by
4705 * finding the buffers from BufMapping table provided we know the exact
4706 * size of each fork of the relation. The exact size is required to ensure
4707 * that we don't leave any buffer for the relation being dropped as
4708 * otherwise the background writer or checkpointer can lead to a PANIC
4709 * error while flushing buffers corresponding to files that don't exist.
4710 *
4711 * To know the exact size, we rely on the size cached for each fork by us
4712 * during recovery which limits the optimization to recovery and on
4713 * standbys but we can easily extend it once we have shared cache for
4714 * relation size.
4715 *
4716 * In recovery, we cache the value returned by the first lseek(SEEK_END)
4717 * and the future writes keeps the cached value up-to-date. See
4718 * smgrextend. It is possible that the value of the first lseek is smaller
4719 * than the actual number of existing blocks in the file due to buggy
4720 * Linux kernels that might not have accounted for the recent write. But
4721 * that should be fine because there must not be any buffers after that
4722 * file size.
4723 */
4724 for (i = 0; i < nforks; i++)
4725 {
4726 /* Get the number of blocks for a relation's fork */
4728
4730 {
4732 break;
4733 }
4734
4735 /* calculate the number of blocks to be invalidated */
4737 }
4738
4739 /*
4740 * We apply the optimization iff the total number of blocks to invalidate
4741 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4742 */
4745 {
4746 for (j = 0; j < nforks; j++)
4747 FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4749 return;
4750 }
4751
4752 for (i = 0; i < NBuffers; i++)
4753 {
4755
4756 /*
4757 * We can make this a tad faster by prechecking the buffer tag before
4758 * we attempt to lock the buffer; this saves a lot of lock
4759 * acquisitions in typical cases. It should be safe because the
4760 * caller must have AccessExclusiveLock on the relation, or some other
4761 * reason to be certain that no one is loading new pages of the rel
4762 * into the buffer pool. (Otherwise we might well miss such pages
4763 * entirely.) Therefore, while the tag might be changing while we
4764 * look at it, it can't be changing *to* a value we care about, only
4765 * *away* from such a value. So false negatives are impossible, and
4766 * false positives are safe because we'll recheck after getting the
4767 * buffer lock.
4768 *
4769 * We could check forkNum and blockNum as well as the rlocator, but
4770 * the incremental win from doing so seems small.
4771 */
4772 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4773 continue;
4774
4776
4777 for (j = 0; j < nforks; j++)
4778 {
4779 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4780 BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4781 bufHdr->tag.blockNum >= firstDelBlock[j])
4782 {
4783 InvalidateBuffer(bufHdr); /* releases spinlock */
4784 break;
4785 }
4786 }
4787 if (j >= nforks)
4789 }
4790}
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition bufmgr.c:93
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition bufmgr.c:4971
int j
Definition isn.c:78
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Definition localbuf.c:665
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:847

References RelFileLocatorBackend::backend, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), fb(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, smgrnblocks_cached(), and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4801 of file bufmgr.c.

4802{
4803 int i;
4804 int n = 0;
4805 SMgrRelation *rels;
4806 BlockNumber (*block)[MAX_FORKNUM + 1];
4809 bool cached = true;
4810 bool use_bsearch;
4811
4812 if (nlocators == 0)
4813 return;
4814
4815 rels = palloc_array(SMgrRelation, nlocators); /* non-local relations */
4816
4817 /* If it's a local relation, it's localbuf.c's problem. */
4818 for (i = 0; i < nlocators; i++)
4819 {
4820 if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4821 {
4822 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4823 DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4824 }
4825 else
4826 rels[n++] = smgr_reln[i];
4827 }
4828
4829 /*
4830 * If there are no non-local relations, then we're done. Release the
4831 * memory and return.
4832 */
4833 if (n == 0)
4834 {
4835 pfree(rels);
4836 return;
4837 }
4838
4839 /*
4840 * This is used to remember the number of blocks for all the relations
4841 * forks.
4842 */
4843 block = (BlockNumber (*)[MAX_FORKNUM + 1])
4844 palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4845
4846 /*
4847 * We can avoid scanning the entire buffer pool if we know the exact size
4848 * of each of the given relation forks. See DropRelationBuffers.
4849 */
4850 for (i = 0; i < n && cached; i++)
4851 {
4852 for (int j = 0; j <= MAX_FORKNUM; j++)
4853 {
4854 /* Get the number of blocks for a relation's fork. */
4855 block[i][j] = smgrnblocks_cached(rels[i], j);
4856
4857 /* We need to only consider the relation forks that exists. */
4858 if (block[i][j] == InvalidBlockNumber)
4859 {
4860 if (!smgrexists(rels[i], j))
4861 continue;
4862 cached = false;
4863 break;
4864 }
4865
4866 /* calculate the total number of blocks to be invalidated */
4867 nBlocksToInvalidate += block[i][j];
4868 }
4869 }
4870
4871 /*
4872 * We apply the optimization iff the total number of blocks to invalidate
4873 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4874 */
4876 {
4877 for (i = 0; i < n; i++)
4878 {
4879 for (int j = 0; j <= MAX_FORKNUM; j++)
4880 {
4881 /* ignore relation forks that doesn't exist */
4882 if (!BlockNumberIsValid(block[i][j]))
4883 continue;
4884
4885 /* drop all the buffers for a particular relation fork */
4886 FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4887 j, block[i][j], 0);
4888 }
4889 }
4890
4891 pfree(block);
4892 pfree(rels);
4893 return;
4894 }
4895
4896 pfree(block);
4897 locators = palloc_array(RelFileLocator, n); /* non-local relations */
4898 for (i = 0; i < n; i++)
4899 locators[i] = rels[i]->smgr_rlocator.locator;
4900
4901 /*
4902 * For low number of relations to drop just use a simple walk through, to
4903 * save the bsearch overhead. The threshold to use is rather a guess than
4904 * an exactly determined value, as it depends on many factors (CPU and RAM
4905 * speeds, amount of shared buffers etc.).
4906 */
4908
4909 /* sort the list of rlocators if necessary */
4910 if (use_bsearch)
4912
4913 for (i = 0; i < NBuffers; i++)
4914 {
4915 RelFileLocator *rlocator = NULL;
4917
4918 /*
4919 * As in DropRelationBuffers, an unlocked precheck should be safe and
4920 * saves some cycles.
4921 */
4922
4923 if (!use_bsearch)
4924 {
4925 int j;
4926
4927 for (j = 0; j < n; j++)
4928 {
4930 {
4931 rlocator = &locators[j];
4932 break;
4933 }
4934 }
4935 }
4936 else
4937 {
4938 RelFileLocator locator;
4939
4940 locator = BufTagGetRelFileLocator(&bufHdr->tag);
4941 rlocator = bsearch(&locator,
4942 locators, n, sizeof(RelFileLocator),
4944 }
4945
4946 /* buffer doesn't belong to any of the given relfilelocators; skip it */
4947 if (rlocator == NULL)
4948 continue;
4949
4951 if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4952 InvalidateBuffer(bufHdr); /* releases spinlock */
4953 else
4955 }
4956
4957 pfree(locators);
4958 pfree(rels);
4959}
#define RELS_BSEARCH_THRESHOLD
Definition bufmgr.c:85
#define palloc_array(type, count)
Definition fe_memutils.h:76
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition localbuf.c:702
#define qsort(a, b, c, d)
Definition port.h:495

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), fb(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), palloc_array, pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictAllUnpinnedBuffers()

void EvictAllUnpinnedBuffers ( int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 7572 of file bufmgr.c.

7574{
7575 *buffers_evicted = 0;
7576 *buffers_skipped = 0;
7577 *buffers_flushed = 0;
7578
7579 for (int buf = 1; buf <= NBuffers; buf++)
7580 {
7581 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7583 bool buffer_flushed;
7584
7586
7588 if (!(buf_state & BM_VALID))
7589 continue;
7590
7593
7594 LockBufHdr(desc);
7595
7597 (*buffers_evicted)++;
7598 else
7599 (*buffers_skipped)++;
7600
7601 if (buffer_flushed)
7602 (*buffers_flushed)++;
7603 }
7604}
static bool EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed)
Definition bufmgr.c:7481
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
pg_atomic_uint64 state

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), fb(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u64(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_evict_all().

◆ EvictRelUnpinnedBuffers()

void EvictRelUnpinnedBuffers ( Relation  rel,
int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 7622 of file bufmgr.c.

7624{
7626
7627 *buffers_skipped = 0;
7628 *buffers_evicted = 0;
7629 *buffers_flushed = 0;
7630
7631 for (int buf = 1; buf <= NBuffers; buf++)
7632 {
7633 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7635 bool buffer_flushed;
7636
7638
7639 /* An unlocked precheck should be safe and saves some cycles. */
7640 if ((buf_state & BM_VALID) == 0 ||
7642 continue;
7643
7644 /* Make sure we can pin the buffer. */
7647
7648 buf_state = LockBufHdr(desc);
7649
7650 /* recheck, could have changed without the lock */
7651 if ((buf_state & BM_VALID) == 0 ||
7653 {
7654 UnlockBufHdr(desc);
7655 continue;
7656 }
7657
7659 (*buffers_evicted)++;
7660 else
7661 (*buffers_skipped)++;
7662
7663 if (buffer_flushed)
7664 (*buffers_flushed)++;
7665 }
7666}
#define RelationUsesLocalBuffers(relation)
Definition rel.h:646
RelFileLocator rd_locator
Definition rel.h:57

References Assert, BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), fb(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u64(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_evict_relation().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf,
bool buffer_flushed 
)

◆ EvictUnpinnedBufferInternal()

static bool EvictUnpinnedBufferInternal ( BufferDesc desc,
bool buffer_flushed 
)
static

Definition at line 7481 of file bufmgr.c.

7482{
7484 bool result;
7485
7486 *buffer_flushed = false;
7487
7490
7491 if ((buf_state & BM_VALID) == 0)
7492 {
7493 UnlockBufHdr(desc);
7494 return false;
7495 }
7496
7497 /* Check that it's not pinned already. */
7499 {
7500 UnlockBufHdr(desc);
7501 return false;
7502 }
7503
7504 PinBuffer_Locked(desc); /* releases spinlock */
7505
7506 /* If it was dirty, try to clean it once. */
7507 if (buf_state & BM_DIRTY)
7508 {
7510 *buffer_flushed = true;
7511 }
7512
7513 /* This will return false if it becomes dirty or someone else pins it. */
7514 result = InvalidateVictimBuffer(desc);
7515
7516 UnpinBuffer(desc);
7517
7518 return result;
7519}
#define BM_LOCKED
static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition bufmgr.c:4554
static void PinBuffer_Locked(BufferDesc *buf)
Definition bufmgr.c:3293
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
Definition bufmgr.c:2375

References Assert, BM_DIRTY, BM_LOCKED, BM_VALID, BUF_STATE_GET_REFCOUNT, fb(), FlushUnlockedBuffer(), InvalidateVictimBuffer(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, pg_atomic_read_u64(), PinBuffer_Locked(), BufferDesc::state, UnlockBufHdr(), and UnpinBuffer().

Referenced by EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), and EvictUnpinnedBuffer().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 965 of file bufmgr.c.

969{
970 Buffer buf;
971 uint32 extend_by = 1;
972
973 ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
974 &buf, &extend_by);
975
976 return buf;
977}
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:997

References buf, ExtendBufferedRelBy(), and fb().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 997 of file bufmgr.c.

1004{
1005 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
1006 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
1007 Assert(extend_by > 0);
1008
1009 if (bmr.relpersistence == '\0')
1010 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
1011
1012 return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1014 buffers, extended_by);
1015}
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:2660

References Assert, ExtendBufferedRelCommon(), fb(), and InvalidBlockNumber.

Referenced by ExtendBufferedRel(), grow_rel(), and RelationAddBlocks().

◆ ExtendBufferedRelCommon()

static BlockNumber ExtendBufferedRelCommon ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2660 of file bufmgr.c.

2668{
2670
2672 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.spcOid,
2673 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.dbOid,
2674 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.relNumber,
2675 BMR_GET_SMGR(bmr)->smgr_rlocator.backend,
2676 extend_by);
2677
2678 if (bmr.relpersistence == RELPERSISTENCE_TEMP)
2681 buffers, &extend_by);
2682 else
2683 first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags,
2685 buffers, &extend_by);
2687
2689 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.spcOid,
2690 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.dbOid,
2691 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.relNumber,
2692 BMR_GET_SMGR(bmr)->smgr_rlocator.backend,
2693 *extended_by,
2694 first_block);
2695
2696 return first_block;
2697}
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition bufmgr.c:2704
#define BMR_GET_SMGR(bmr)
Definition bufmgr.h:118
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition localbuf.c:346

References BMR_GET_SMGR, ExtendBufferedRelLocal(), ExtendBufferedRelShared(), and fb().

Referenced by ExtendBufferedRelBy(), and ExtendBufferedRelTo().

◆ ExtendBufferedRelShared()

static BlockNumber ExtendBufferedRelShared ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2704 of file bufmgr.c.

2712{
2716
2718
2719 /*
2720 * Acquire victim buffers for extension without holding extension lock.
2721 * Writing out victim buffers is the most expensive part of extending the
2722 * relation, particularly when doing so requires WAL flushes. Zeroing out
2723 * the buffers is also quite expensive, so do that before holding the
2724 * extension lock as well.
2725 *
2726 * These pages are pinned by us and not valid. While we hold the pin they
2727 * can't be acquired as victim buffers by another backend.
2728 */
2729 for (uint32 i = 0; i < extend_by; i++)
2730 {
2732
2733 buffers[i] = GetVictimBuffer(strategy, io_context);
2735
2736 /* new buffers are zero-filled */
2737 MemSet(buf_block, 0, BLCKSZ);
2738 }
2739
2740 /*
2741 * Lock relation against concurrent extensions, unless requested not to.
2742 *
2743 * We use the same extension lock for all forks. That's unnecessarily
2744 * restrictive, but currently extensions for forks don't happen often
2745 * enough to make it worth locking more granularly.
2746 *
2747 * Note that another backend might have extended the relation by the time
2748 * we get the lock.
2749 */
2750 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2752
2753 /*
2754 * If requested, invalidate size cache, so that smgrnblocks asks the
2755 * kernel.
2756 */
2757 if (flags & EB_CLEAR_SIZE_CACHE)
2758 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
2759
2761
2762 /*
2763 * Now that we have the accurate relation size, check if the caller wants
2764 * us to extend to only up to a specific size. If there were concurrent
2765 * extensions, we might have acquired too many buffers and need to release
2766 * them.
2767 */
2769 {
2771
2773 extend_by = 0;
2774 else if ((uint64) first_block + extend_by > extend_upto)
2776
2777 for (uint32 i = extend_by; i < orig_extend_by; i++)
2778 {
2779 BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
2780
2782 }
2783
2784 if (extend_by == 0)
2785 {
2786 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2789 return first_block;
2790 }
2791 }
2792
2793 /* Fail if relation is already at maximum possible length */
2795 ereport(ERROR,
2797 errmsg("cannot extend relation %s beyond %u blocks",
2798 relpath(BMR_GET_SMGR(bmr)->smgr_rlocator, fork).str,
2799 MaxBlockNumber)));
2800
2801 /*
2802 * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
2803 *
2804 * This needs to happen before we extend the relation, because as soon as
2805 * we do, other backends can start to read in those pages.
2806 */
2807 for (uint32 i = 0; i < extend_by; i++)
2808 {
2809 Buffer victim_buf = buffers[i];
2811 BufferTag tag;
2812 uint32 hash;
2814 int existing_id;
2815
2816 /* in case we need to pin an existing buffer below */
2819
2820 InitBufferTag(&tag, &BMR_GET_SMGR(bmr)->smgr_rlocator.locator, fork,
2821 first_block + i);
2822 hash = BufTableHashCode(&tag);
2824
2826
2828
2829 /*
2830 * We get here only in the corner case where we are trying to extend
2831 * the relation but we found a pre-existing buffer. This can happen
2832 * because a prior attempt at extending the relation failed, and
2833 * because mdread doesn't complain about reads beyond EOF (when
2834 * zero_damaged_pages is ON) and so a previous attempt to read a block
2835 * beyond EOF could have left a "valid" zero-filled buffer.
2836 *
2837 * This has also been observed when relation was overwritten by
2838 * external process. Since the legitimate cases should always have
2839 * left a zero-filled buffer, complain if not PageIsNew.
2840 */
2841 if (existing_id >= 0)
2842 {
2845 bool valid;
2846
2847 /*
2848 * Pin the existing buffer before releasing the partition lock,
2849 * preventing it from being evicted.
2850 */
2851 valid = PinBuffer(existing_hdr, strategy, false);
2852
2855
2858
2859 if (valid && !PageIsNew((Page) buf_block))
2860 ereport(ERROR,
2861 (errmsg("unexpected data beyond EOF in block %u of relation \"%s\"",
2862 existing_hdr->tag.blockNum,
2863 relpath(BMR_GET_SMGR(bmr)->smgr_rlocator, fork).str)));
2864
2865 /*
2866 * We *must* do smgr[zero]extend before succeeding, else the page
2867 * will not be reserved by the kernel, and the next P_NEW call
2868 * will decide to return the same page. Clear the BM_VALID bit,
2869 * do StartBufferIO() and proceed.
2870 *
2871 * Loop to handle the very small possibility that someone re-sets
2872 * BM_VALID between our clearing it and StartBufferIO inspecting
2873 * it.
2874 */
2875 do
2876 {
2878 } while (!StartBufferIO(existing_hdr, true, false));
2879 }
2880 else
2881 {
2883 uint64 set_bits = 0;
2884
2886
2887 /* some sanity checks while we hold the buffer header lock */
2890
2891 victim_buf_hdr->tag = tag;
2892
2894 if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
2896
2898 set_bits, 0,
2899 0);
2900
2902
2903 /* XXX: could combine the locked operations in it with the above */
2904 StartBufferIO(victim_buf_hdr, true, false);
2905 }
2906 }
2907
2909
2910 /*
2911 * Note: if smgrzeroextend fails, we will end up with buffers that are
2912 * allocated but not marked BM_VALID. The next relation extension will
2913 * still select the same block number (because the relation didn't get any
2914 * longer on disk) and so future attempts to extend the relation will find
2915 * the same buffers (if they have not been recycled) but come right back
2916 * here to try smgrzeroextend again.
2917 *
2918 * We don't need to set checksum for all-zero pages.
2919 */
2921
2922 /*
2923 * Release the file-extension lock; it's now OK for someone else to extend
2924 * the relation some more.
2925 *
2926 * We remove IO_IN_PROGRESS after this, as waking up waiting backends can
2927 * take noticeable time.
2928 */
2929 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2931
2933 io_start, 1, extend_by * BLCKSZ);
2934
2935 /* Set BM_VALID, terminate IO, and wake up any waiters */
2936 for (uint32 i = 0; i < extend_by; i++)
2937 {
2938 Buffer buf = buffers[i];
2940 bool lock = false;
2941
2942 if (flags & EB_LOCK_FIRST && i == 0)
2943 lock = true;
2944 else if (flags & EB_LOCK_TARGET)
2945 {
2947 if (first_block + i + 1 == extend_upto)
2948 lock = true;
2949 }
2950
2951 if (lock)
2953
2954 TerminateBufferIO(buf_hdr, false, BM_VALID, true, false);
2955 }
2956
2958
2960
2961 return first_block;
2962}
#define MaxBlockNumber
Definition block.h:35
#define BM_JUST_DIRTIED
#define BufHdrGetBlock(bufHdr)
Definition bufmgr.c:74
void LimitAdditionalPins(uint32 *additional_pins)
Definition bufmgr.c:2642
bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
Definition bufmgr.c:6890
void * Block
Definition bufmgr.h:26
@ EB_LOCK_TARGET
Definition bufmgr.h:93
@ EB_CLEAR_SIZE_CACHE
Definition bufmgr.h:90
@ EB_SKIP_EXTENSION_LOCK
Definition bufmgr.h:75
@ EB_LOCK_FIRST
Definition bufmgr.h:87
static bool PageIsNew(const PageData *page)
Definition bufpage.h:233
#define MemSet(start, val, len)
Definition c.h:1013
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:474
#define ExclusiveLock
Definition lockdefs.h:42
@ IOOP_EXTEND
Definition pgstat.h:314
static unsigned hash(unsigned *uv, int n)
Definition rege_dfa.c:715
#define relpath(rlocator, forknum)
Definition relpath.h:150
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition smgr.c:819
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition smgr.c:649
int64 shared_blks_written
Definition instrument.h:29

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, BMR_GET_SMGR, buf, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BUFFER_LOCK_EXCLUSIVE, BufferDescriptorGetBuffer(), BufHdrGetBlock, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), CurrentResourceOwner, EB_CLEAR_SIZE_CACHE, EB_LOCK_FIRST, EB_LOCK_TARGET, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), errmsg(), ERROR, ExclusiveLock, fb(), GetBufferDescriptor(), GetVictimBuffer(), hash(), i, INIT_FORKNUM, InitBufferTag(), InvalidBlockNumber, IOContextForStrategy(), IOOBJECT_RELATION, IOOP_EXTEND, LimitAdditionalPins(), LockBuffer(), LockBufHdr(), LockRelationForExtension(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MaxBlockNumber, MemSet, PageIsNew(), pg_atomic_fetch_and_u64(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer(), relpath, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_written, smgrnblocks(), smgrzeroextend(), StartBufferIO(), str, TerminateBufferIO(), track_io_timing, UnlockBufHdrExt(), UnlockRelationForExtension(), and UnpinBuffer().

Referenced by ExtendBufferedRelCommon().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 1026 of file bufmgr.c.

1032{
1034 uint32 extended_by = 0;
1035 Buffer buffer = InvalidBuffer;
1036 Buffer buffers[64];
1037
1038 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
1039 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
1041
1042 if (bmr.relpersistence == '\0')
1043 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
1044
1045 /*
1046 * If desired, create the file if it doesn't exist. If
1047 * smgr_cached_nblocks[fork] is positive then it must exist, no need for
1048 * an smgrexists call.
1049 */
1050 if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
1051 (BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == 0 ||
1052 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == InvalidBlockNumber) &&
1054 {
1056
1057 /* recheck, fork might have been created concurrently */
1060
1062 }
1063
1064 /*
1065 * If requested, invalidate size cache, so that smgrnblocks asks the
1066 * kernel.
1067 */
1068 if (flags & EB_CLEAR_SIZE_CACHE)
1069 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
1070
1071 /*
1072 * Estimate how many pages we'll need to extend by. This avoids acquiring
1073 * unnecessarily many victim buffers.
1074 */
1076
1077 /*
1078 * Since no-one else can be looking at the page contents yet, there is no
1079 * difference between an exclusive lock and a cleanup-strength lock. Note
1080 * that we pass the original mode to ReadBuffer_common() below, when
1081 * falling back to reading the buffer to a concurrent relation extension.
1082 */
1084 flags |= EB_LOCK_TARGET;
1085
1086 while (current_size < extend_to)
1087 {
1088 uint32 num_pages = lengthof(buffers);
1090
1091 if ((uint64) current_size + num_pages > extend_to)
1092 num_pages = extend_to - current_size;
1093
1094 first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1095 num_pages, extend_to,
1096 buffers, &extended_by);
1097
1099 Assert(num_pages != 0 || current_size >= extend_to);
1100
1101 for (uint32 i = 0; i < extended_by; i++)
1102 {
1103 if (first_block + i != extend_to - 1)
1104 ReleaseBuffer(buffers[i]);
1105 else
1106 buffer = buffers[i];
1107 }
1108 }
1109
1110 /*
1111 * It's possible that another backend concurrently extended the relation.
1112 * In that case read the buffer.
1113 *
1114 * XXX: Should we control this via a flag?
1115 */
1116 if (buffer == InvalidBuffer)
1117 {
1118 Assert(extended_by == 0);
1119 buffer = ReadBuffer_common(bmr.rel, BMR_GET_SMGR(bmr), bmr.relpersistence,
1120 fork, extend_to - 1, mode, strategy);
1121 }
1122
1123 return buffer;
1124}
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:1294
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5502
@ EB_PERFORMING_RECOVERY
Definition bufmgr.h:78
@ EB_CREATE_FORK_IF_NEEDED
Definition bufmgr.h:84
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition bufmgr.h:49
@ RBM_ZERO_AND_LOCK
Definition bufmgr.h:47
#define lengthof(array)
Definition c.h:803
static int64 current_size

References Assert, BMR_GET_SMGR, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), fb(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, ReadBuffer_common(), ReleaseBuffer(), smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 4971 of file bufmgr.c.

4974{
4975 BlockNumber curBlock;
4976
4977 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4978 {
4979 uint32 bufHash; /* hash value for tag */
4980 BufferTag bufTag; /* identity of requested block */
4981 LWLock *bufPartitionLock; /* buffer partition lock for it */
4982 int buf_id;
4984
4985 /* create a tag so we can lookup the buffer */
4986 InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4987
4988 /* determine its hash code and partition lock ID */
4991
4992 /* Check that it is in the buffer pool. If not, do nothing. */
4994 buf_id = BufTableLookup(&bufTag, bufHash);
4996
4997 if (buf_id < 0)
4998 continue;
4999
5000 bufHdr = GetBufferDescriptor(buf_id);
5001
5002 /*
5003 * We need to lock the buffer header and recheck if the buffer is
5004 * still associated with the same block because the buffer could be
5005 * evicted by some other backend loading blocks for a different
5006 * relation after we release lock on the BufMapping table.
5007 */
5009
5010 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
5011 BufTagGetForkNum(&bufHdr->tag) == forkNum &&
5012 bufHdr->tag.blockNum >= firstDelBlock)
5013 InvalidateBuffer(bufHdr); /* releases spinlock */
5014 else
5016 }
5017}

References BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), fb(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 4417 of file bufmgr.c.

4419{
4421 ErrorContextCallback errcallback;
4424 char *bufToWrite;
4426
4427 /*
4428 * Try to start an I/O operation. If StartBufferIO returns false, then
4429 * someone else flushed the buffer before we could, so we need not do
4430 * anything.
4431 */
4432 if (!StartBufferIO(buf, false, false))
4433 return;
4434
4435 /* Setup error traceback support for ereport() */
4437 errcallback.arg = buf;
4438 errcallback.previous = error_context_stack;
4439 error_context_stack = &errcallback;
4440
4441 /* Find smgr relation for buffer */
4442 if (reln == NULL)
4444
4446 buf->tag.blockNum,
4447 reln->smgr_rlocator.locator.spcOid,
4448 reln->smgr_rlocator.locator.dbOid,
4449 reln->smgr_rlocator.locator.relNumber);
4450
4452
4453 /*
4454 * Run PageGetLSN while holding header lock, since we don't have the
4455 * buffer locked exclusively in all cases.
4456 */
4458
4459 /* To check if block content changes while flushing. - vadim 01/17/97 */
4461 0, BM_JUST_DIRTIED,
4462 0);
4463
4464 /*
4465 * Force XLOG flush up to buffer's LSN. This implements the basic WAL
4466 * rule that log updates must hit disk before any of the data-file changes
4467 * they describe do.
4468 *
4469 * However, this rule does not apply to unlogged relations, which will be
4470 * lost after a crash anyway. Most unlogged relation pages do not bear
4471 * LSNs since we never emit WAL records for them, and therefore flushing
4472 * up through the buffer LSN would be useless, but harmless. However,
4473 * GiST indexes use LSNs internally to track page-splits, and therefore
4474 * unlogged GiST pages bear "fake" LSNs generated by
4475 * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
4476 * LSN counter could advance past the WAL insertion point; and if it did
4477 * happen, attempting to flush WAL through that location would fail, with
4478 * disastrous system-wide consequences. To make sure that can't happen,
4479 * skip the flush if the buffer isn't permanent.
4480 */
4481 if (buf_state & BM_PERMANENT)
4483
4484 /*
4485 * Now it's safe to write the buffer to disk. Note that no one else should
4486 * have been able to write it, while we were busy with log flushing,
4487 * because we got the exclusive right to perform I/O by setting the
4488 * BM_IO_IN_PROGRESS bit.
4489 */
4491
4492 /*
4493 * Update page checksum if desired. Since we have only shared lock on the
4494 * buffer, other processes might be updating hint bits in it, so we must
4495 * copy the page to private storage if we do checksumming.
4496 */
4497 bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
4498
4500
4501 /*
4502 * bufToWrite is either the shared buffer or a copy, as appropriate.
4503 */
4505 BufTagGetForkNum(&buf->tag),
4506 buf->tag.blockNum,
4507 bufToWrite,
4508 false);
4509
4510 /*
4511 * When a strategy is in use, only flushes of dirty buffers already in the
4512 * strategy ring are counted as strategy writes (IOCONTEXT
4513 * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
4514 * statistics tracking.
4515 *
4516 * If a shared buffer initially added to the ring must be flushed before
4517 * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
4518 *
4519 * If a shared buffer which was added to the ring later because the
4520 * current strategy buffer is pinned or in use or because all strategy
4521 * buffers were dirty and rejected (for BAS_BULKREAD operations only)
4522 * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
4523 * (from_ring will be false).
4524 *
4525 * When a strategy is not in use, the write can only be a "regular" write
4526 * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
4527 */
4530
4532
4533 /*
4534 * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
4535 * end the BM_IO_IN_PROGRESS state.
4536 */
4537 TerminateBufferIO(buf, true, 0, true, false);
4538
4540 buf->tag.blockNum,
4541 reln->smgr_rlocator.locator.spcOid,
4542 reln->smgr_rlocator.locator.dbOid,
4543 reln->smgr_rlocator.locator.relNumber);
4544
4545 /* Pop the error context stack */
4546 error_context_stack = errcallback.previous;
4547}
#define BufferGetLSN(bufHdr)
Definition bufmgr.c:75
static void shared_buffer_write_error_callback(void *arg)
Definition bufmgr.c:7049
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition bufpage.c:1509
ErrorContextCallback * error_context_stack
Definition elog.c:95
@ IOOP_WRITE
Definition pgstat.h:316
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition smgr.h:131
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2783

References ErrorContextCallback::arg, BM_JUST_DIRTIED, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, error_context_stack, fb(), INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITE, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), ErrorContextCallback::previous, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), smgropen(), smgrwrite(), StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdrExt(), and XLogFlush().

Referenced by FlushOneBuffer(), FlushUnlockedBuffer(), and GetVictimBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 5442 of file bufmgr.c.

5443{
5444 int i;
5446
5447 for (i = 0; i < NBuffers; i++)
5448 {
5450
5452
5453 /*
5454 * As in DropRelationBuffers, an unlocked precheck should be safe and
5455 * saves some cycles.
5456 */
5457 if (bufHdr->tag.dbOid != dbid)
5458 continue;
5459
5460 /* Make sure we can handle the pin */
5463
5465 if (bufHdr->tag.dbOid == dbid &&
5467 {
5471 }
5472 else
5474 }
5475}

References BM_DIRTY, BM_VALID, CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 5482 of file bufmgr.c.

5483{
5485
5486 /* currently not needed, but no fundamental reason not to support */
5487 Assert(!BufferIsLocal(buffer));
5488
5489 Assert(BufferIsPinned(buffer));
5490
5491 bufHdr = GetBufferDescriptor(buffer - 1);
5492
5493 Assert(BufferIsLockedByMe(buffer));
5494
5496}
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition bufmgr.c:4417
bool BufferIsLockedByMe(Buffer buffer)
Definition bufmgr.c:2972

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsLockedByMe(), BufferIsPinned, fb(), FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), invalidate_rel_block(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 5078 of file bufmgr.c.

5079{
5080 int i;
5082 SMgrRelation srel = RelationGetSmgr(rel);
5083
5084 if (RelationUsesLocalBuffers(rel))
5085 {
5086 for (i = 0; i < NLocBuffer; i++)
5087 {
5089
5091 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5092 ((buf_state = pg_atomic_read_u64(&bufHdr->state)) &
5093 (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5094 {
5095 ErrorContextCallback errcallback;
5096
5097 /* Setup error traceback support for ereport() */
5099 errcallback.arg = bufHdr;
5100 errcallback.previous = error_context_stack;
5101 error_context_stack = &errcallback;
5102
5103 /* Make sure we can handle the pin */
5106
5107 /*
5108 * Pin/unpin mostly to make valgrind work, but it also seems
5109 * like the right thing to do.
5110 */
5111 PinLocalBuffer(bufHdr, false);
5112
5113
5114 FlushLocalBuffer(bufHdr, srel);
5115
5117
5118 /* Pop the error context stack */
5119 error_context_stack = errcallback.previous;
5120 }
5121 }
5122
5123 return;
5124 }
5125
5126 for (i = 0; i < NBuffers; i++)
5127 {
5129
5131
5132 /*
5133 * As in DropRelationBuffers, an unlocked precheck should be safe and
5134 * saves some cycles.
5135 */
5137 continue;
5138
5139 /* Make sure we can handle the pin */
5142
5144 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5146 {
5150 }
5151 else
5153 }
5154}
static void local_buffer_write_error_callback(void *arg)
Definition bufmgr.c:7065
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
Definition localbuf.c:183
void UnpinLocalBuffer(Buffer buffer)
Definition localbuf.c:841
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition localbuf.c:805
int NLocBuffer
Definition localbuf.c:45
static SMgrRelation RelationGetSmgr(Relation rel)
Definition rel.h:576

References ErrorContextCallback::arg, BM_DIRTY, BM_VALID, BufferDescriptorGetBuffer(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, fb(), FlushLocalBuffer(), FlushUnlockedBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, local_buffer_write_error_callback(), LockBufHdr(), NBuffers, NLocBuffer, pg_atomic_read_u64(), PinBuffer_Locked(), PinLocalBuffer(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), UnlockBufHdr(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 5166 of file bufmgr.c.

5167{
5168 int i;
5170 bool use_bsearch;
5171
5172 if (nrels == 0)
5173 return;
5174
5175 /* fill-in array for qsort */
5177
5178 for (i = 0; i < nrels; i++)
5179 {
5180 Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
5181
5182 srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
5183 srels[i].srel = smgrs[i];
5184 }
5185
5186 /*
5187 * Save the bsearch overhead for low number of relations to sync. See
5188 * DropRelationsAllBuffers for details.
5189 */
5191
5192 /* sort the list of SMgrRelations if necessary */
5193 if (use_bsearch)
5194 qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
5195
5196 for (i = 0; i < NBuffers; i++)
5197 {
5201
5202 /*
5203 * As in DropRelationBuffers, an unlocked precheck should be safe and
5204 * saves some cycles.
5205 */
5206
5207 if (!use_bsearch)
5208 {
5209 int j;
5210
5211 for (j = 0; j < nrels; j++)
5212 {
5213 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
5214 {
5215 srelent = &srels[j];
5216 break;
5217 }
5218 }
5219 }
5220 else
5221 {
5222 RelFileLocator rlocator;
5223
5224 rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
5225 srelent = bsearch(&rlocator,
5226 srels, nrels, sizeof(SMgrSortArray),
5228 }
5229
5230 /* buffer doesn't belong to any of the given relfilelocators; skip it */
5231 if (srelent == NULL)
5232 continue;
5233
5234 /* Make sure we can handle the pin */
5237
5239 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
5241 {
5245 }
5246 else
5248 }
5249
5250 pfree(srels);
5251}

References Assert, BM_DIRTY, BM_VALID, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, LockBufHdr(), NBuffers, palloc_array, pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), rlocator_comparator(), UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ FlushUnlockedBuffer()

static void FlushUnlockedBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 4554 of file bufmgr.c.

4556{
4558
4561 BufferLockUnlock(buffer, buf);
4562}
static void BufferLockAcquire(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
Definition bufmgr.c:5756
static void BufferLockUnlock(Buffer buffer, BufferDesc *buf_hdr)
Definition bufmgr.c:5872

References buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_SHARE, BufferDescriptorGetBuffer(), BufferLockAcquire(), BufferLockUnlock(), fb(), FlushBuffer(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by EvictUnpinnedBufferInternal(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), and SyncOneBuffer().

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 552 of file bufmgr.c.

553{
554 Assert(ref->data.refcount == 0);
555 Assert(ref->data.lockmode == BUFFER_LOCK_UNLOCK);
556
557 if (ref >= &PrivateRefCountArray[0] &&
559 {
560 ref->buffer = InvalidBuffer;
562
563
564 /*
565 * Mark the just used entry as reserved - in many scenarios that
566 * allows us to avoid ever having to search the array/hash for free
567 * entries.
568 */
570 }
571 else
572 {
573 bool found;
574 Buffer buffer = ref->buffer;
575
577 Assert(found);
580 }
581}
static int ReservedRefCountSlot
Definition bufmgr.c:253
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_REMOVE
Definition hsearch.h:115

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, fb(), HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountSlot.

Referenced by UnpinBufferNoOwner().

◆ GetAdditionalPinLimit()

uint32 GetAdditionalPinLimit ( void  )

Definition at line 2616 of file bufmgr.c.

2617{
2619
2620 /*
2621 * We get the number of "overflowed" pins for free, but don't know the
2622 * number of pins in PrivateRefCountArray. The cost of calculating that
2623 * exactly doesn't seem worth it, so just assume the max.
2624 */
2626
2627 /* Is this backend already holding more than its fair share? */
2629 return 0;
2630
2632}
static uint32 MaxProportionalPins
Definition bufmgr.c:256

References fb(), MaxProportionalPins, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by LimitAdditionalPins(), and read_stream_start_pending_read().

◆ GetPinLimit()

uint32 GetPinLimit ( void  )

Definition at line 2604 of file bufmgr.c.

2605{
2606 return MaxProportionalPins;
2607}

References MaxProportionalPins.

Referenced by GetAccessStrategy(), and read_stream_begin_impl().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 529 of file bufmgr.c.

530{
532
533 Assert(BufferIsValid(buffer));
534 Assert(!BufferIsLocal(buffer));
535
536 /*
537 * Not moving the entry - that's ok for the current users, but we might
538 * want to change this one day.
539 */
540 ref = GetPrivateRefCountEntry(buffer, false);
541
542 if (ref == NULL)
543 return 0;
544 return ref->data.refcount;
545}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), fb(), and GetPrivateRefCountEntry().

Referenced by CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), and MarkBufferDirtyHint().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
inlinestatic

Definition at line 494 of file bufmgr.c.

495{
496 Assert(BufferIsValid(buffer));
497 Assert(!BufferIsLocal(buffer));
498
499 /*
500 * It's very common to look up the same buffer repeatedly. To make that
501 * fast, we have a one-entry cache.
502 *
503 * In contrast to the loop in GetPrivateRefCountEntrySlow(), here it
504 * faster to check PrivateRefCountArray[].buffer, as in the case of a hit
505 * fewer addresses are computed and fewer cachelines are accessed. Whereas
506 * in GetPrivateRefCountEntrySlow()'s case, checking
507 * PrivateRefCountArrayKeys saves a lot of memory accesses.
508 */
509 if (likely(PrivateRefCountEntryLast != -1) &&
511 {
513 }
514
515 /*
516 * The code for the cached lookup is small enough to be worth inlining
517 * into the caller. In the miss case however, that empirically doesn't
518 * seem worth it.
519 */
520 return GetPrivateRefCountEntrySlow(buffer, do_move);
521}
static pg_noinline PrivateRefCountEntry * GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move)
Definition bufmgr.c:405
static int PrivateRefCountEntryLast
Definition bufmgr.c:254

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), fb(), GetPrivateRefCountEntrySlow(), likely, PrivateRefCountArray, and PrivateRefCountEntryLast.

Referenced by BufferLockAcquire(), BufferLockConditional(), BufferLockDisownInternal(), BufferLockHeldByMe(), BufferLockHeldByMeInMode(), GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), ResOwnerReleaseBuffer(), and UnpinBufferNoOwner().

◆ GetPrivateRefCountEntrySlow()

static pg_noinline PrivateRefCountEntry * GetPrivateRefCountEntrySlow ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 405 of file bufmgr.c.

406{
408 int match = -1;
409 int i;
410
411 /*
412 * First search for references in the array, that'll be sufficient in the
413 * majority of cases.
414 */
415 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
416 {
417 if (PrivateRefCountArrayKeys[i] == buffer)
418 {
419 match = i;
420 /* see ReservePrivateRefCountEntry() for why we don't return */
421 }
422 }
423
424 if (likely(match != -1))
425 {
426 /* update cache for the next lookup */
428
429 return &PrivateRefCountArray[match];
430 }
431
432 /*
433 * By here we know that the buffer, if already pinned, isn't residing in
434 * the array.
435 *
436 * Only look up the buffer in the hashtable if we've previously overflowed
437 * into it.
438 */
440 return NULL;
441
443
444 if (res == NULL)
445 return NULL;
446 else if (!do_move)
447 {
448 /* caller doesn't want us to move the hash entry into the array */
449 return res;
450 }
451 else
452 {
453 /* move buffer from hashtable into the free array slot */
454 bool found;
456
457 /* Ensure there's a free array slot */
459
460 /* Use up the reserved slot */
464 Assert(free->buffer == InvalidBuffer);
465
466 /* and fill it */
467 free->buffer = buffer;
468 free->data = res->data;
470 /* update cache for the next lookup */
472
474
475
476 /* delete from hashtable */
478 Assert(found);
481
482 return free;
483 }
484}
@ HASH_FIND
Definition hsearch.h:113
#define free(a)

References Assert, PrivateRefCountEntry::buffer, PrivateRefCountEntry::data, fb(), free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, likely, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountEntryLast, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountSlot, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCountEntry().

◆ GetVictimBuffer()

static Buffer GetVictimBuffer ( BufferAccessStrategy  strategy,
IOContext  io_context 
)
static

Definition at line 2452 of file bufmgr.c.

2453{
2455 Buffer buf;
2457 bool from_ring;
2458
2459 /*
2460 * Ensure, before we pin a victim buffer, that there's a free refcount
2461 * entry and resource owner slot for the pin.
2462 */
2465
2466 /* we return here if a prospective victim buffer gets used concurrently */
2467again:
2468
2469 /*
2470 * Select a victim buffer. The buffer is returned pinned and owned by
2471 * this backend.
2472 */
2475
2476 /*
2477 * We shouldn't have any other pins for this buffer.
2478 */
2480
2481 /*
2482 * If the buffer was dirty, try to write it out. There is a race
2483 * condition here, in that someone might dirty it after we released the
2484 * buffer header lock above, or even while we are writing it out (since
2485 * our share-lock won't prevent hint-bit updates). We will recheck the
2486 * dirty bit after re-locking the buffer header.
2487 */
2488 if (buf_state & BM_DIRTY)
2489 {
2492
2493 /*
2494 * We need a share-lock on the buffer contents to write it out (else
2495 * we might write invalid data, eg because someone else is compacting
2496 * the page contents while we write). We must use a conditional lock
2497 * acquisition here to avoid deadlock. Even though the buffer was not
2498 * pinned (and therefore surely not locked) when StrategyGetBuffer
2499 * returned it, someone else could have pinned and exclusive-locked it
2500 * by the time we get here. If we try to get the lock unconditionally,
2501 * we'd block waiting for them; if they later block waiting for us,
2502 * deadlock ensues. (This has been observed to happen when two
2503 * backends are both trying to split btree index pages, and the second
2504 * one just happens to be trying to split the page the first one got
2505 * from StrategyGetBuffer.)
2506 */
2508 {
2509 /*
2510 * Someone else has locked the buffer, so give it up and loop back
2511 * to get another one.
2512 */
2514 goto again;
2515 }
2516
2517 /*
2518 * If using a nondefault strategy, and writing the buffer would
2519 * require a WAL flush, let the strategy decide whether to go ahead
2520 * and write/reuse the buffer or to choose another victim. We need a
2521 * lock to inspect the page LSN, so this can't be done inside
2522 * StrategyGetBuffer.
2523 */
2524 if (strategy != NULL)
2525 {
2526 XLogRecPtr lsn;
2527
2528 /* Read the LSN while holding buffer header lock */
2530 lsn = BufferGetLSN(buf_hdr);
2532
2533 if (XLogNeedsFlush(lsn)
2534 && StrategyRejectBuffer(strategy, buf_hdr, from_ring))
2535 {
2538 goto again;
2539 }
2540 }
2541
2542 /* OK, do the I/O */
2545
2547 &buf_hdr->tag);
2548 }
2549
2550
2551 if (buf_state & BM_VALID)
2552 {
2553 /*
2554 * When a BufferAccessStrategy is in use, blocks evicted from shared
2555 * buffers are counted as IOOP_EVICT in the corresponding context
2556 * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
2557 * strategy in two cases: 1) while initially claiming buffers for the
2558 * strategy ring 2) to replace an existing strategy ring buffer
2559 * because it is pinned or in use and cannot be reused.
2560 *
2561 * Blocks evicted from buffers already in the strategy ring are
2562 * counted as IOOP_REUSE in the corresponding strategy context.
2563 *
2564 * At this point, we can accurately count evictions and reuses,
2565 * because we have successfully claimed the valid buffer. Previously,
2566 * we may have been forced to release the buffer due to concurrent
2567 * pinners or erroring out.
2568 */
2570 from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0);
2571 }
2572
2573 /*
2574 * If the buffer has an entry in the buffer mapping table, delete it. This
2575 * can fail because another backend could have pinned or dirtied the
2576 * buffer.
2577 */
2579 {
2581 goto again;
2582 }
2583
2584 /* a final set of sanity checks */
2585#ifdef USE_ASSERT_CHECKING
2587
2590
2592#endif
2593
2594 return buf;
2595}
WritebackContext BackendWritebackContext
Definition buf_init.c:25
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition bufmgr.c:6495
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
Definition bufmgr.c:7280
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint64 *buf_state, bool *from_ring)
Definition freelist.c:174
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
Definition freelist.c:787
@ IOOP_EVICT
Definition pgstat.h:307
@ IOOP_REUSE
Definition pgstat.h:310
bool XLogNeedsFlush(XLogRecPtr record)
Definition xlog.c:3145

References Assert, BackendWritebackContext, BM_DIRTY, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetBuffer(), BufferGetLSN, BufferLockConditional(), CheckBufferIsPinnedOnce(), CurrentResourceOwner, fb(), FlushBuffer(), InvalidateVictimBuffer(), IOOBJECT_RELATION, IOOP_EVICT, IOOP_REUSE, LockBuffer(), LockBufHdr(), pg_atomic_read_u64(), pgstat_count_io_op(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), StrategyGetBuffer(), StrategyRejectBuffer(), UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by BufferAlloc(), and ExtendBufferedRelShared().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 6675 of file bufmgr.c.

6676{
6678
6679 /*
6680 * If we get woken slowly then it's possible that the Startup process was
6681 * already woken by other backends before we got here. Also possible that
6682 * we get here by multiple interrupts or interrupts at inappropriate
6683 * times, so make sure we do nothing if the bufid is not set.
6684 */
6685 if (bufid < 0)
6686 return false;
6687
6688 if (GetPrivateRefCount(bufid + 1) > 0)
6689 return true;
6690
6691 return false;
6692}
int GetStartupBufferPinWaitBufId(void)
Definition proc.c:769

References fb(), GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

Definition at line 5534 of file bufmgr.c.

5535{
5536 Assert(BufferIsPinned(buffer));
5538 if (BufferIsLocal(buffer))
5539 LocalRefCount[-buffer - 1]++;
5540 else
5541 {
5543
5544 ref = GetPrivateRefCountEntry(buffer, true);
5545 Assert(ref != NULL);
5546 ref->data.refcount++;
5547 }
5549}
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, CurrentResourceOwner, fb(), GetPrivateRefCountEntry(), LocalRefCount, ResourceOwnerEnlarge(), and ResourceOwnerRememberBuffer().

Referenced by _bt_steppage(), btrestrpos(), entryLoadMoreItems(), ReadBufferBI(), RelationAddBlocks(), scanPostingTree(), startScanEntry(), and tts_buffer_heap_store_tuple().

◆ InitBufferManagerAccess()

void InitBufferManagerAccess ( void  )

Definition at line 4121 of file bufmgr.c.

4122{
4124
4125 /*
4126 * An advisory limit on the number of pins each backend should hold, based
4127 * on shared_buffers and the maximum number of connections possible.
4128 * That's very pessimistic, but outside toy-sized shared_buffers it should
4129 * allow plenty of pins. LimitAdditionalPins() and
4130 * GetAdditionalPinLimit() can be used to check the remaining balance.
4131 */
4133
4136
4137 hash_ctl.keysize = sizeof(Buffer);
4138 hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
4139
4140 PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
4142
4143 /*
4144 * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
4145 * the corresponding phase of backend shutdown.
4146 */
4147 Assert(MyProc != NULL);
4149}
static void AtProcExit_Buffers(int code, Datum arg)
Definition bufmgr.c:4156
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
int MaxBackends
Definition globals.c:146
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_BLOBS
Definition hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:372
#define NUM_AUXILIARY_PROCS
Definition proc.h:473

References Assert, AtProcExit_Buffers(), fb(), HASH_BLOBS, hash_create(), HASH_ELEM, MaxBackends, MaxProportionalPins, MyProc, NBuffers, NUM_AUXILIARY_PROCS, on_shmem_exit(), PrivateRefCountArray, PrivateRefCountArrayKeys, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 2274 of file bufmgr.c.

2275{
2277 uint32 oldHash; /* hash value for oldTag */
2278 LWLock *oldPartitionLock; /* buffer partition lock for it */
2281
2282 /* Save the original buffer tag before dropping the spinlock */
2283 oldTag = buf->tag;
2284
2286
2287 /*
2288 * Need to compute the old tag's hashcode and partition lock ID. XXX is it
2289 * worth storing the hashcode in BufferDesc so we need not recompute it
2290 * here? Probably not.
2291 */
2294
2295retry:
2296
2297 /*
2298 * Acquire exclusive mapping lock in preparation for changing the buffer's
2299 * association.
2300 */
2302
2303 /* Re-lock the buffer header */
2305
2306 /* If it's changed while we were waiting for lock, do nothing */
2307 if (!BufferTagsEqual(&buf->tag, &oldTag))
2308 {
2311 return;
2312 }
2313
2314 /*
2315 * We assume the reason for it to be pinned is that either we were
2316 * asynchronously reading the page in before erroring out or someone else
2317 * is flushing the page out. Wait for the IO to finish. (This could be
2318 * an infinite loop if the refcount is messed up... it would be nice to
2319 * time out after awhile, but there seems no way to be sure how many loops
2320 * may be needed. Note that if the other guy has pinned the buffer but
2321 * not yet done StartBufferIO, WaitIO will fall through and we'll
2322 * effectively be busy-looping here.)
2323 */
2325 {
2328 /* safety check: should definitely not be our *own* pin */
2330 elog(ERROR, "buffer is pinned in InvalidateBuffer");
2331 WaitIO(buf);
2332 goto retry;
2333 }
2334
2335 /*
2336 * An invalidated buffer should not have any backends waiting to lock the
2337 * buffer, therefore BM_LOCK_WAKE_IN_PROGRESS should not be set.
2338 */
2340
2341 /*
2342 * Clear out the buffer's tag and flags. We must do this to ensure that
2343 * linear scans of the buffer array don't think the buffer is valid.
2344 */
2346 ClearBufferTag(&buf->tag);
2347
2349 0,
2351 0);
2352
2353 /*
2354 * Remove the buffer from the lookup hashtable, if it was in there.
2355 */
2356 if (oldFlags & BM_TAG_VALID)
2358
2359 /*
2360 * Done with mapping lock.
2361 */
2363}
#define BUF_USAGECOUNT_MASK
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static void ClearBufferTag(BufferTag *tag)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition buf_table.c:148
static void WaitIO(BufferDesc *buf)
Definition bufmgr.c:6811

References Assert, BM_LOCK_WAKE_IN_PROGRESS, BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog, ERROR, fb(), GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), UnlockBufHdr(), UnlockBufHdrExt(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ InvalidateVictimBuffer()

static bool InvalidateVictimBuffer ( BufferDesc buf_hdr)
static

Definition at line 2375 of file bufmgr.c.

2376{
2378 uint32 hash;
2380 BufferTag tag;
2381
2383
2384 /* have buffer pinned, so it's safe to read tag without lock */
2385 tag = buf_hdr->tag;
2386
2387 hash = BufTableHashCode(&tag);
2389
2391
2392 /* lock the buffer header */
2394
2395 /*
2396 * We have the buffer pinned nobody else should have been able to unset
2397 * this concurrently.
2398 */
2401 Assert(BufferTagsEqual(&buf_hdr->tag, &tag));
2402
2403 /*
2404 * If somebody else pinned the buffer since, or even worse, dirtied it,
2405 * give up on this buffer: It's clearly in use.
2406 */
2408 {
2410
2413
2414 return false;
2415 }
2416
2417 /*
2418 * An invalidated buffer should not have any backends waiting to lock the
2419 * buffer, therefore BM_LOCK_WAKE_IN_PROGRESS should not be set.
2420 */
2422
2423 /*
2424 * Clear out the buffer's tag and flags and usagecount. This is not
2425 * strictly required, as BM_TAG_VALID/BM_VALID needs to be checked before
2426 * doing anything with the buffer. But currently it's beneficial, as the
2427 * cheaper pre-check for several linear scans of shared buffers use the
2428 * tag (see e.g. FlushDatabaseBuffers()).
2429 */
2430 ClearBufferTag(&buf_hdr->tag);
2432 0,
2434 0);
2435
2437
2438 /* finally delete buffer from the buffer mapping table */
2439 BufTableDelete(&tag, hash);
2440
2442
2447
2448 return true;
2449}

References Assert, BM_DIRTY, BM_LOCK_WAKE_IN_PROGRESS, BM_TAG_VALID, BM_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), fb(), GetPrivateRefCount(), hash(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u64(), UnlockBufHdr(), and UnlockBufHdrExt().

Referenced by EvictUnpinnedBufferInternal(), and GetVictimBuffer().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 6759 of file bufmgr.c.

6760{
6763
6764 Assert(BufferIsValid(buffer));
6765
6766 /* see AIO related comment in LockBufferForCleanup() */
6767
6768 if (BufferIsLocal(buffer))
6769 {
6770 /* There should be exactly one pin */
6771 if (LocalRefCount[-buffer - 1] != 1)
6772 return false;
6773 /* Nobody else to wait for */
6774 return true;
6775 }
6776
6777 /* There should be exactly one local pin */
6778 if (GetPrivateRefCount(buffer) != 1)
6779 return false;
6780
6781 bufHdr = GetBufferDescriptor(buffer - 1);
6782
6783 /* caller must hold exclusive lock on buffer */
6785
6787
6790 {
6791 /* pincount is OK. */
6793 return true;
6794 }
6795
6797 return false;
6798}

References Assert, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsValid(), fb(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext wb_context,
IOContext  io_context 
)

Definition at line 7330 of file bufmgr.c.

7331{
7333 int i;
7334
7335 if (wb_context->nr_pending == 0)
7336 return;
7337
7338 /*
7339 * Executing the writes in-order can make them a lot faster, and allows to
7340 * merge writeback requests to consecutive blocks into larger writebacks.
7341 */
7342 sort_pending_writebacks(wb_context->pending_writebacks,
7343 wb_context->nr_pending);
7344
7346
7347 /*
7348 * Coalesce neighbouring writes, but nothing else. For that we iterate
7349 * through the, now sorted, array of pending flushes, and look forward to
7350 * find all neighbouring (or identical) writes.
7351 */
7352 for (i = 0; i < wb_context->nr_pending; i++)
7353 {
7357 int ahead;
7358 BufferTag tag;
7360 Size nblocks = 1;
7361
7362 cur = &wb_context->pending_writebacks[i];
7363 tag = cur->tag;
7365
7366 /*
7367 * Peek ahead, into following writeback requests, to see if they can
7368 * be combined with the current one.
7369 */
7370 for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
7371 {
7372
7373 next = &wb_context->pending_writebacks[i + ahead + 1];
7374
7375 /* different file, stop */
7377 BufTagGetRelFileLocator(&next->tag)) ||
7378 BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
7379 break;
7380
7381 /* ok, block queued twice, skip */
7382 if (cur->tag.blockNum == next->tag.blockNum)
7383 continue;
7384
7385 /* only merge consecutive writes */
7386 if (cur->tag.blockNum + 1 != next->tag.blockNum)
7387 break;
7388
7389 nblocks++;
7390 cur = next;
7391 }
7392
7393 i += ahead;
7394
7395 /* and finally tell the kernel to write the data to storage */
7397 smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
7398 }
7399
7400 /*
7401 * Assume that writeback requests are only issued for buffers containing
7402 * blocks of permanent relations.
7403 */
7405 IOOP_WRITEBACK, io_start, wb_context->nr_pending, 0);
7406
7407 wb_context->nr_pending = 0;
7408}
static int32 next
Definition blutils.c:225
struct cursor * cur
Definition ecpg.c:29
@ IOOP_WRITEBACK
Definition pgstat.h:311
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition smgr.c:805

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, fb(), i, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITEBACK, next, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RelFileLocatorEquals, smgropen(), smgrwriteback(), and track_io_timing.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2642 of file bufmgr.c.

2643{
2644 uint32 limit;
2645
2646 if (*additional_pins <= 1)
2647 return;
2648
2649 limit = GetAdditionalPinLimit();
2650 limit = Max(limit, 1);
2651 if (limit < *additional_pins)
2652 *additional_pins = limit;
2653}
uint32 GetAdditionalPinLimit(void)
Definition bufmgr.c:2616
#define Max(x, y)
Definition c.h:991

References fb(), GetAdditionalPinLimit(), and Max.

Referenced by ExtendBufferedRelShared().

◆ local_buffer_readv_complete()

static PgAioResult local_buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 8514 of file bufmgr.c.

8516{
8518}
static pg_attribute_always_inline PgAioResult buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
Definition bufmgr.c:8259

References buffer_readv_complete(), and fb().

◆ local_buffer_readv_stage()

static void local_buffer_readv_stage ( PgAioHandle ioh,
uint8  cb_data 
)
static

Definition at line 8508 of file bufmgr.c.

8509{
8510 buffer_stage_common(ioh, false, true);
8511}
static pg_attribute_always_inline void buffer_stage_common(PgAioHandle *ioh, bool is_write, bool is_temp)
Definition bufmgr.c:7871

References buffer_stage_common(), and fb().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void arg)
static

Definition at line 7065 of file bufmgr.c.

7066{
7068
7069 if (bufHdr != NULL)
7070 errcontext("writing block %u of relation \"%s\"",
7071 bufHdr->tag.blockNum,
7074 BufTagGetForkNum(&bufHdr->tag)).str);
7075}
#define errcontext
Definition elog.h:198
void * arg

References arg, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, fb(), MyProcNumber, and relpathbackend.

Referenced by FlushRelationBuffers().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 6528 of file bufmgr.c.

6529{
6531 TimestampTz waitStart = 0;
6532 bool waiting = false;
6533 bool logged_recovery_conflict = false;
6534
6535 Assert(BufferIsPinned(buffer));
6537
6539
6540 /*
6541 * We do not yet need to be worried about in-progress AIOs holding a pin,
6542 * as we, so far, only support doing reads via AIO and this function can
6543 * only be called once the buffer is valid (i.e. no read can be in
6544 * flight).
6545 */
6546
6547 /* Nobody else to wait for */
6548 if (BufferIsLocal(buffer))
6549 return;
6550
6551 bufHdr = GetBufferDescriptor(buffer - 1);
6552
6553 for (;;)
6554 {
6556 uint64 unset_bits = 0;
6557
6558 /* Try to acquire lock */
6561
6564 {
6565 /* Successfully acquired exclusive lock with pincount 1 */
6567
6568 /*
6569 * Emit the log message if recovery conflict on buffer pin was
6570 * resolved but the startup process waited longer than
6571 * deadlock_timeout for it.
6572 */
6575 waitStart, GetCurrentTimestamp(),
6576 NULL, false);
6577
6578 if (waiting)
6579 {
6580 /* reset ps display to remove the suffix if we added one */
6582 waiting = false;
6583 }
6584 return;
6585 }
6586 /* Failed, so mark myself as waiting for pincount 1 */
6588 {
6591 elog(ERROR, "multiple backends attempting to wait for pincount 1");
6592 }
6593 bufHdr->wait_backend_pgprocno = MyProcNumber;
6597 0);
6599
6600 /* Wait to be signaled by UnpinBuffer() */
6601 if (InHotStandby)
6602 {
6603 if (!waiting)
6604 {
6605 /* adjust the process title to indicate that it's waiting */
6606 set_ps_display_suffix("waiting");
6607 waiting = true;
6608 }
6609
6610 /*
6611 * Emit the log message if the startup process is waiting longer
6612 * than deadlock_timeout for recovery conflict on buffer pin.
6613 *
6614 * Skip this if first time through because the startup process has
6615 * not started waiting yet in this case. So, the wait start
6616 * timestamp is set after this logic.
6617 */
6618 if (waitStart != 0 && !logged_recovery_conflict)
6619 {
6621
6622 if (TimestampDifferenceExceeds(waitStart, now,
6624 {
6626 waitStart, now, NULL, true);
6628 }
6629 }
6630
6631 /*
6632 * Set the wait start timestamp if logging is enabled and first
6633 * time through.
6634 */
6635 if (log_recovery_conflict_waits && waitStart == 0)
6636 waitStart = GetCurrentTimestamp();
6637
6638 /* Publish the bufid that Startup process waits on */
6639 SetStartupBufferPinWaitBufId(buffer - 1);
6640 /* Set alarm and then wait to be signaled by UnpinBuffer() */
6642 /* Reset the published bufid */
6644 }
6645 else
6647
6648 /*
6649 * Remove flag marking us as waiter. Normally this will not be set
6650 * anymore, but ProcWaitForSignal() can return for other signals as
6651 * well. We take care to only reset the flag if we're the waiter, as
6652 * theoretically another backend could have started waiting. That's
6653 * impossible with the current usages due to table level locking, but
6654 * better be safe.
6655 */
6657 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
6658 bufHdr->wait_backend_pgprocno == MyProcNumber)
6660
6662 0, unset_bits,
6663 0);
6664
6666 /* Loop back and try again */
6667 }
6668}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1609
#define BM_PIN_COUNT_WAITER
static BufferDesc * PinCountWaitBuf
Definition bufmgr.c:213
int64 TimestampTz
Definition timestamp.h:39
void set_ps_display_remove_suffix(void)
Definition ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition ps_status.c:387
int DeadlockTimeout
Definition proc.c:58
void SetStartupBufferPinWaitBufId(int bufid)
Definition proc.c:757
void ProcWaitForSignal(uint32 wait_event_info)
Definition proc.c:1984
void ResolveRecoveryConflictWithBufferPin(void)
Definition standby.c:793
bool log_recovery_conflict_waits
Definition standby.c:42
void LogRecoveryConflict(RecoveryConflictReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition standby.c:274
@ RECOVERY_CONFLICT_BUFFERPIN
Definition standby.h:46
static volatile sig_atomic_t waiting
#define InHotStandby
Definition xlogutils.h:60

References Assert, BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, fb(), GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, ProcWaitForSignal(), RECOVERY_CONFLICT_BUFFERPIN, ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), UnlockBufHdrExt(), and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroAndLockBuffer().

◆ LockBufferInternal()

void LockBufferInternal ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 6432 of file bufmgr.c.

6433{
6435
6436 /*
6437 * We can't wait if we haven't got a PGPROC. This should only occur
6438 * during bootstrap or shared memory initialization. Put an Assert here
6439 * to catch unsafe coding practices.
6440 */
6442
6443 /* handled in LockBuffer() wrapper */
6445
6446 Assert(BufferIsPinned(buffer));
6447 if (BufferIsLocal(buffer))
6448 return; /* local buffers need no lock */
6449
6450 buf_hdr = GetBufferDescriptor(buffer - 1);
6451
6452 /*
6453 * Test the most frequent lock modes first. While a switch (mode) would be
6454 * nice, at least gcc generates considerably worse code for it.
6455 *
6456 * Call BufferLockAcquire() with a constant argument for mode, to generate
6457 * more efficient code for the different lock modes.
6458 */
6459 if (mode == BUFFER_LOCK_SHARE)
6461 else if (mode == BUFFER_LOCK_EXCLUSIVE)
6465 else
6466 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
6467}
bool IsUnderPostmaster
Definition globals.c:120

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_SHARE_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, BufferLockAcquire(), elog, ERROR, fb(), GetBufferDescriptor(), IsUnderPostmaster, mode, and MyProc.

Referenced by LockBuffer().

◆ LockBufHdr()

uint64 LockBufHdr ( BufferDesc desc)

Definition at line 7108 of file bufmgr.c.

7109{
7111
7113
7114 while (true)
7115 {
7116 /*
7117 * Always try once to acquire the lock directly, without setting up
7118 * the spin-delay infrastructure. The work necessary for that shows up
7119 * in profiles and is rarely necessary.
7120 */
7122 if (likely(!(old_buf_state & BM_LOCKED)))
7123 break; /* got lock */
7124
7125 /* and then spin without atomic operations until lock is released */
7126 {
7128
7130
7131 while (old_buf_state & BM_LOCKED)
7132 {
7135 }
7137 }
7138
7139 /*
7140 * Retry. The lock might obviously already be re-acquired by the time
7141 * we're attempting to get it again.
7142 */
7143 }
7144
7145 return old_buf_state | BM_LOCKED;
7146}
void perform_spin_delay(SpinDelayStatus *status)
Definition s_lock.c:126
void finish_spin_delay(SpinDelayStatus *status)
Definition s_lock.c:186
#define init_local_spin_delay(status)
Definition s_lock.h:757

References Assert, BM_LOCKED, BufferDescriptorGetBuffer(), BufferIsLocal, fb(), finish_spin_delay(), init_local_spin_delay, likely, perform_spin_delay(), pg_atomic_fetch_or_u64(), pg_atomic_read_u64(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), buffer_stage_common(), BufferAlloc(), BufferGetLSNAtomic(), BufferLockDequeueSelf(), BufferLockQueueSelf(), BufferLockWakeup(), BufferSync(), ConditionalLockBufferForCleanup(), create_toy_buffer(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), MarkDirtyUnpinnedBuffer(), pg_buffercache_os_pages_internal(), pg_buffercache_pages(), StartBufferIO(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), WaitIO(), and WakePinCountWaiter().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 3057 of file bufmgr.c.

3058{
3062
3063 if (!BufferIsValid(buffer))
3064 elog(ERROR, "bad buffer ID: %d", buffer);
3065
3066 if (BufferIsLocal(buffer))
3067 {
3068 MarkLocalBufferDirty(buffer);
3069 return;
3070 }
3071
3072 bufHdr = GetBufferDescriptor(buffer - 1);
3073
3074 Assert(BufferIsPinned(buffer));
3076
3077 /*
3078 * NB: We have to wait for the buffer header spinlock to be not held, as
3079 * TerminateBufferIO() relies on the spinlock.
3080 */
3082 for (;;)
3083 {
3086
3088
3091
3093 buf_state))
3094 break;
3095 }
3096
3097 /*
3098 * If the buffer was not dirty already, do vacuum accounting.
3099 */
3100 if (!(old_buf_state & BM_DIRTY))
3101 {
3103 if (VacuumCostActive)
3105 }
3106}
pg_noinline uint64 WaitBufHdrUnlocked(BufferDesc *buf)
Definition bufmgr.c:7156
int VacuumCostPageDirty
Definition globals.c:153
void MarkLocalBufferDirty(Buffer buffer)
Definition localbuf.c:491
int64 shared_blks_dirtied
Definition instrument.h:28

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, BufferIsValid(), elog, ERROR, fb(), GetBufferDescriptor(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), pgBufferUsage, BufferUsage::shared_blks_dirtied, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), identify_and_fix_vm_corruption(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), MarkDirtyUnpinnedBufferInternal(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 5566 of file bufmgr.c.

5567{
5569 Page page = BufferGetPage(buffer);
5570
5571 if (!BufferIsValid(buffer))
5572 elog(ERROR, "bad buffer ID: %d", buffer);
5573
5574 if (BufferIsLocal(buffer))
5575 {
5576 MarkLocalBufferDirty(buffer);
5577 return;
5578 }
5579
5580 bufHdr = GetBufferDescriptor(buffer - 1);
5581
5582 Assert(GetPrivateRefCount(buffer) > 0);
5583 /* here, either share or exclusive lock is OK */
5584 Assert(BufferIsLockedByMe(buffer));
5585
5586 /*
5587 * This routine might get called many times on the same page, if we are
5588 * making the first scan after commit of an xact that added/deleted many
5589 * tuples. So, be as quick as we can if the buffer is already dirty. We
5590 * do this by not acquiring spinlock if it looks like the status bits are
5591 * already set. Since we make this test unlocked, there's a chance we
5592 * might fail to notice that the flags have just been cleared, and failed
5593 * to reset them, due to memory-ordering issues. But since this function
5594 * is only intended to be used in cases where failing to write out the
5595 * data would be harmless anyway, it doesn't really matter.
5596 */
5597 if ((pg_atomic_read_u64(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
5599 {
5601 bool dirtied = false;
5602 bool delayChkptFlags = false;
5604
5605 /*
5606 * If we need to protect hint bit updates from torn writes, WAL-log a
5607 * full page image of the page. This full page image is only necessary
5608 * if the hint bit update is the first change to the page since the
5609 * last checkpoint.
5610 *
5611 * We don't check full_page_writes here because that logic is included
5612 * when we call XLogInsert() since the value changes dynamically.
5613 */
5614 if (XLogHintBitIsNeeded() &&
5616 {
5617 /*
5618 * If we must not write WAL, due to a relfilelocator-specific
5619 * condition or being in recovery, don't dirty the page. We can
5620 * set the hint, just not dirty the page as a result so the hint
5621 * is lost when we evict the page or shutdown.
5622 *
5623 * See src/backend/storage/page/README for longer discussion.
5624 */
5625 if (RecoveryInProgress() ||
5627 return;
5628
5629 /*
5630 * If the block is already dirty because we either made a change
5631 * or set a hint already, then we don't need to write a full page
5632 * image. Note that aggressive cleaning of blocks dirtied by hint
5633 * bit setting would increase the call rate. Bulk setting of hint
5634 * bits would reduce the call rate...
5635 *
5636 * We must issue the WAL record before we mark the buffer dirty.
5637 * Otherwise we might write the page before we write the WAL. That
5638 * causes a race condition, since a checkpoint might occur between
5639 * writing the WAL record and marking the buffer dirty. We solve
5640 * that with a kluge, but one that is already in use during
5641 * transaction commit to prevent race conditions. Basically, we
5642 * simply prevent the checkpoint WAL record from being written
5643 * until we have marked the buffer dirty. We don't start the
5644 * checkpoint flush until we have marked dirty, so our checkpoint
5645 * must flush the change to disk successfully or the checkpoint
5646 * never gets written, so crash recovery will fix.
5647 *
5648 * It's possible we may enter here without an xid, so it is
5649 * essential that CreateCheckPoint waits for virtual transactions
5650 * rather than full transactionids.
5651 */
5654 delayChkptFlags = true;
5655 lsn = XLogSaveBufferForHint(buffer, buffer_std);
5656 }
5657
5659
5661
5662 if (!(buf_state & BM_DIRTY))
5663 {
5664 dirtied = true; /* Means "will be dirtied by this action" */
5665
5666 /*
5667 * Set the page LSN if we wrote a backup block. We aren't supposed
5668 * to set this when only holding a share lock but as long as we
5669 * serialise it somehow we're OK. We choose to set LSN while
5670 * holding the buffer header lock, which causes any reader of an
5671 * LSN who holds only a share lock to also obtain a buffer header
5672 * lock before using PageGetLSN(), which is enforced in
5673 * BufferGetLSNAtomic().
5674 *
5675 * If checksums are enabled, you might think we should reset the
5676 * checksum here. That will happen when the page is written
5677 * sometime later in this checkpoint cycle.
5678 */
5679 if (XLogRecPtrIsValid(lsn))
5680 PageSetLSN(page, lsn);
5681 }
5682
5685 0, 0);
5686
5687 if (delayChkptFlags)
5689
5690 if (dirtied)
5691 {
5693 if (VacuumCostActive)
5695 }
5696 }
5697}
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
#define DELAY_CHKPT_START
Definition proc.h:136
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition storage.c:573
int delayChkptFlags
Definition proc.h:267
bool RecoveryInProgress(void)
Definition xlog.c:6460
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)

References Assert, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsLockedByMe(), BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, fb(), GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u64(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, UnlockBufHdrExt(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsValid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ MarkDirtyAllUnpinnedBuffers()

void MarkDirtyAllUnpinnedBuffers ( int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 7822 of file bufmgr.c.

7825{
7826 *buffers_dirtied = 0;
7828 *buffers_skipped = 0;
7829
7830 for (int buf = 1; buf <= NBuffers; buf++)
7831 {
7832 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7835
7837
7839 if (!(buf_state & BM_VALID))
7840 continue;
7841
7844
7845 LockBufHdr(desc);
7846
7848 (*buffers_dirtied)++;
7849 else if (buffer_already_dirty)
7850 (*buffers_already_dirty)++;
7851 else
7852 (*buffers_skipped)++;
7853 }
7854}
static bool MarkDirtyUnpinnedBufferInternal(Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
Definition bufmgr.c:7673

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u64(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_mark_dirty_all().

◆ MarkDirtyRelUnpinnedBuffers()

void MarkDirtyRelUnpinnedBuffers ( Relation  rel,
int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 7765 of file bufmgr.c.

7769{
7771
7772 *buffers_dirtied = 0;
7774 *buffers_skipped = 0;
7775
7776 for (int buf = 1; buf <= NBuffers; buf++)
7777 {
7778 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7781
7783
7784 /* An unlocked precheck should be safe and saves some cycles. */
7785 if ((buf_state & BM_VALID) == 0 ||
7787 continue;
7788
7789 /* Make sure we can pin the buffer. */
7792
7793 buf_state = LockBufHdr(desc);
7794
7795 /* recheck, could have changed without the lock */
7796 if ((buf_state & BM_VALID) == 0 ||
7798 {
7799 UnlockBufHdr(desc);
7800 continue;
7801 }
7802
7804 (*buffers_dirtied)++;
7805 else if (buffer_already_dirty)
7806 (*buffers_already_dirty)++;
7807 else
7808 (*buffers_skipped)++;
7809 }
7810}

References Assert, BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u64(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_mark_dirty_relation().

◆ MarkDirtyUnpinnedBuffer()

bool MarkDirtyUnpinnedBuffer ( Buffer  buf,
bool buffer_already_dirty 
)

Definition at line 7729 of file bufmgr.c.

7730{
7731 BufferDesc *desc;
7732 bool buffer_dirtied = false;
7733
7735
7736 /* Make sure we can pin the buffer. */
7739
7740 desc = GetBufferDescriptor(buf - 1);
7741 LockBufHdr(desc);
7742
7744 /* Both can not be true at the same time */
7746
7747 return buffer_dirtied;
7748}

References Assert, buf, BufferIsLocal, CurrentResourceOwner, fb(), GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by pg_buffercache_mark_dirty().

◆ MarkDirtyUnpinnedBufferInternal()

static bool MarkDirtyUnpinnedBufferInternal ( Buffer  buf,
BufferDesc desc,
bool buffer_already_dirty 
)
static

Definition at line 7673 of file bufmgr.c.

7675{
7677 bool result = false;
7678
7679 *buffer_already_dirty = false;
7680
7683
7684 if ((buf_state & BM_VALID) == 0)
7685 {
7686 UnlockBufHdr(desc);
7687 return false;
7688 }
7689
7690 /* Check that it's not pinned already. */
7692 {
7693 UnlockBufHdr(desc);
7694 return false;
7695 }
7696
7697 /* Pin the buffer and then release the buffer spinlock */
7698 PinBuffer_Locked(desc);
7699
7700 /* If it was not already dirty, mark it as dirty. */
7701 if (!(buf_state & BM_DIRTY))
7702 {
7705 result = true;
7706 BufferLockUnlock(buf, desc);
7707 }
7708 else
7709 *buffer_already_dirty = true;
7710
7711 UnpinBuffer(desc);
7712
7713 return result;
7714}
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3057

References Assert, BM_DIRTY, BM_LOCKED, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BUFFER_LOCK_EXCLUSIVE, BufferLockAcquire(), BufferLockUnlock(), fb(), MarkBufferDirty(), pg_atomic_read_u64(), PinBuffer_Locked(), BufferDesc::state, UnlockBufHdr(), and UnpinBuffer().

Referenced by MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), and MarkDirtyUnpinnedBuffer().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 374 of file bufmgr.c.

375{
377
378 /* only allowed to be called when a reservation has been made */
380
381 /* use up the reserved entry */
383
384 /* and fill it */
386 res->buffer = buffer;
387 res->data.refcount = 0;
389
390 /* update cache for the next lookup */
392
394
395 return res;
396}

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, PrivateRefCountEntry::data, PrivateRefCountData::lockmode, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountEntryLast, PrivateRefCountData::refcount, and ReservedRefCountSlot.

Referenced by TrackNewBufferPin().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy,
bool  skip_if_not_valid 
)
static

Definition at line 3182 of file bufmgr.c.

3184{
3186 bool result;
3188
3191
3192 ref = GetPrivateRefCountEntry(b, true);
3193
3194 if (ref == NULL)
3195 {
3198
3200 for (;;)
3201 {
3203 return false;
3204
3205 /*
3206 * We're not allowed to increase the refcount while the buffer
3207 * header spinlock is held. Wait for the lock to be released.
3208 */
3211
3213
3214 /* increase refcount */
3216
3217 if (strategy == NULL)
3218 {
3219 /* Default case: increase usagecount unless already max. */
3222 }
3223 else
3224 {
3225 /*
3226 * Ring buffers shouldn't evict others from pool. Thus we
3227 * don't make usagecount more than 1.
3228 */
3231 }
3232
3234 buf_state))
3235 {
3236 result = (buf_state & BM_VALID) != 0;
3237
3239 break;
3240 }
3241 }
3242 }
3243 else
3244 {
3245 /*
3246 * If we previously pinned the buffer, it is likely to be valid, but
3247 * it may not be if StartReadBuffers() was called and
3248 * WaitReadBuffers() hasn't been called yet. We'll check by loading
3249 * the flags without locking. This is racy, but it's OK to return
3250 * false spuriously: when WaitReadBuffers() calls StartBufferIO(),
3251 * it'll see that it's now valid.
3252 *
3253 * Note: We deliberately avoid a Valgrind client request here.
3254 * Individual access methods can optionally superimpose buffer page
3255 * client requests on top of our client requests to enforce that
3256 * buffers are only accessed while locked (and pinned). It's possible
3257 * that the buffer page is legitimately non-accessible here. We
3258 * cannot meddle with that.
3259 */
3260 result = (pg_atomic_read_u64(&buf->state) & BM_VALID) != 0;
3261
3262 Assert(ref->data.refcount > 0);
3263 ref->data.refcount++;
3265 }
3266
3267 return result;
3268}
#define BM_MAX_USAGE_COUNT
#define BUF_STATE_GET_USAGECOUNT(state)
void TrackNewBufferPin(Buffer buf)
Definition bufmgr.c:3417

References Assert, b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, CurrentResourceOwner, fb(), GetPrivateRefCountEntry(), pg_atomic_compare_exchange_u64(), pg_atomic_read_u64(), ReservedRefCountSlot, ResourceOwnerRememberBuffer(), TrackNewBufferPin(), unlikely, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 3293 of file bufmgr.c.

3294{
3296
3297 /*
3298 * As explained, We don't expect any preexisting pins. That allows us to
3299 * manipulate the PrivateRefCount after releasing the spinlock
3300 */
3302
3303 /*
3304 * Since we hold the buffer spinlock, we can update the buffer state and
3305 * release the lock in one operation.
3306 */
3308
3310 0, 0, 1);
3311
3313}

References Assert, buf, BufferDescriptorGetBuffer(), fb(), GetPrivateRefCountEntry(), pg_atomic_read_u64(), TrackNewBufferPin(), and UnlockBufHdrExt().

Referenced by EvictUnpinnedBufferInternal(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), MarkDirtyUnpinnedBufferInternal(), and SyncOneBuffer().

◆ PinBufferForBlock()

static pg_attribute_always_inline Buffer PinBufferForBlock ( Relation  rel,
SMgrRelation  smgr,
char  persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool foundPtr 
)
static

Definition at line 1211 of file bufmgr.c.

1218{
1222
1223 Assert(blockNum != P_NEW);
1224
1225 /* Persistence should be set before */
1226 Assert((persistence == RELPERSISTENCE_TEMP ||
1227 persistence == RELPERSISTENCE_PERMANENT ||
1228 persistence == RELPERSISTENCE_UNLOGGED));
1229
1230 if (persistence == RELPERSISTENCE_TEMP)
1231 {
1234 }
1235 else
1236 {
1237 io_context = IOContextForStrategy(strategy);
1239 }
1240
1241 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1245 smgr->smgr_rlocator.backend);
1246
1247 if (persistence == RELPERSISTENCE_TEMP)
1248 {
1249 bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1250 if (*foundPtr)
1252 }
1253 else
1254 {
1255 bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1256 strategy, foundPtr, io_context);
1257 if (*foundPtr)
1259 }
1260 if (rel)
1261 {
1262 /*
1263 * While pgBufferUsage's "read" counter isn't bumped unless we reach
1264 * WaitReadBuffers() (so, not for hits, and not for buffers that are
1265 * zeroed instead), the per-relation stats always count them.
1266 */
1268 if (*foundPtr)
1270 }
1271 if (*foundPtr)
1272 {
1274 if (VacuumCostActive)
1276
1277 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1281 smgr->smgr_rlocator.backend,
1282 true);
1283 }
1284
1286}
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
Definition bufmgr.c:2101
#define P_NEW
Definition bufmgr.h:198
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition localbuf.c:119
#define pgstat_count_buffer_read(rel)
Definition pgstat.h:715

References Assert, RelFileLocatorBackend::backend, BufferAlloc(), BufferDescriptorGetBuffer(), RelFileLocator::dbOid, fb(), IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, BufferUsage::local_blks_hit, LocalBufferAlloc(), RelFileLocatorBackend::locator, P_NEW, pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_buffer_read, pgstat_count_io_op(), RelFileLocator::relNumber, BufferUsage::shared_blks_hit, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, VacuumCostActive, VacuumCostBalance, and VacuumCostPageHit.

Referenced by ReadBuffer_common(), and StartReadBuffersImpl().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 773 of file bufmgr.c.

774{
776 Assert(BlockNumberIsValid(blockNum));
777
779 {
780 /* see comments in ReadBufferExtended */
784 errmsg("cannot access temporary tables of other sessions")));
785
786 /* pass it off to localbuf.c */
787 return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
788 }
789 else
790 {
791 /* pass it to the shared buffer version */
792 return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
793 }
794}
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:683
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition localbuf.c:72
#define RELATION_IS_OTHER_TEMP(relation)
Definition rel.h:667
#define RelationIsValid(relation)
Definition rel.h:489

References Assert, BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, fb(), PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by count_nondeletable_pages(), invalidate_rel_block(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 683 of file bufmgr.c.

686{
687 PrefetchBufferResult result = {InvalidBuffer, false};
688 BufferTag newTag; /* identity of requested block */
689 uint32 newHash; /* hash value for newTag */
690 LWLock *newPartitionLock; /* buffer partition lock for it */
691 int buf_id;
692
693 Assert(BlockNumberIsValid(blockNum));
694
695 /* create a tag so we can lookup the buffer */
696 InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
697 forkNum, blockNum);
698
699 /* determine its hash code and partition lock ID */
702
703 /* see if the block is in the buffer pool already */
705 buf_id = BufTableLookup(&newTag, newHash);
707
708 /* If not in buffers, initiate prefetch */
709 if (buf_id < 0)
710 {
711#ifdef USE_PREFETCH
712 /*
713 * Try to initiate an asynchronous read. This returns false in
714 * recovery if the relation file doesn't exist.
715 */
716 if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
717 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
718 {
719 result.initiated_io = true;
720 }
721#endif /* USE_PREFETCH */
722 }
723 else
724 {
725 /*
726 * Report the buffer it was in at that time. The caller may be able
727 * to avoid a buffer table lookup, but it's not pinned and it must be
728 * rechecked!
729 */
730 result.recent_buffer = buf_id + 1;
731 }
732
733 /*
734 * If the block *is* in buffers, we do nothing. This is not really ideal:
735 * the block might be just about to be evicted, which would be stupid
736 * since we know we are going to need it soon. But the only easy answer
737 * is to bump the usage_count, which does not seem like a great solution:
738 * when the caller does ultimately touch the block, usage_count would get
739 * bumped again, resulting in too much favoritism for blocks that are
740 * involved in a prefetch sequence. A real fix would involve some
741 * additional per-buffer state, and it's not clear that there's enough of
742 * a problem to justify that.
743 */
744
745 return result;
746}
int io_direct_flags
Definition fd.c:171
#define IO_DIRECT_DATA
Definition fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition smgr.c:678
Buffer recent_buffer
Definition bufmgr.h:61

References Assert, BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), fb(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ProcessReadBuffersResult()

static void ProcessReadBuffersResult ( ReadBuffersOperation operation)
static

Definition at line 1694 of file bufmgr.c.

1695{
1696 PgAioReturn *aio_ret = &operation->io_return;
1698 int newly_read_blocks = 0;
1699
1700 Assert(pgaio_wref_valid(&operation->io_wref));
1701 Assert(aio_ret->result.status != PGAIO_RS_UNKNOWN);
1702
1703 /*
1704 * SMGR reports the number of blocks successfully read as the result of
1705 * the IO operation. Thus we can simply add that to ->nblocks_done.
1706 */
1707
1708 if (likely(rs != PGAIO_RS_ERROR))
1709 newly_read_blocks = aio_ret->result.result;
1710
1711 if (rs == PGAIO_RS_ERROR || rs == PGAIO_RS_WARNING)
1712 pgaio_result_report(aio_ret->result, &aio_ret->target_data,
1713 rs == PGAIO_RS_ERROR ? ERROR : WARNING);
1714 else if (aio_ret->result.status == PGAIO_RS_PARTIAL)
1715 {
1716 /*
1717 * We'll retry, so we just emit a debug message to the server log (or
1718 * not even that in prod scenarios).
1719 */
1720 pgaio_result_report(aio_ret->result, &aio_ret->target_data, DEBUG1);
1721 elog(DEBUG3, "partial read, will retry");
1722 }
1723
1726
1727 operation->nblocks_done += newly_read_blocks;
1728
1729 Assert(operation->nblocks_done <= operation->nblocks);
1730}
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition aio.c:971
PgAioResultStatus
Definition aio_types.h:79
@ PGAIO_RS_UNKNOWN
Definition aio_types.h:80
@ PGAIO_RS_PARTIAL
Definition aio_types.h:82
#define DEBUG3
Definition elog.h:28
PgAioResult result
Definition aio_types.h:132

References Assert, DEBUG1, DEBUG3, elog, ERROR, fb(), ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, likely, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, pgaio_result_report(), PGAIO_RS_ERROR, PGAIO_RS_PARTIAL, PGAIO_RS_UNKNOWN, PGAIO_RS_WARNING, pgaio_wref_valid(), PgAioReturn::result, PgAioResult::status, and WARNING.

Referenced by WaitReadBuffers().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 865 of file bufmgr.c.

866{
868}
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:912
@ RBM_NORMAL
Definition bufmgr.h:46

References fb(), MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static pg_attribute_always_inline Buffer ReadBuffer_common ( Relation  rel,
SMgrRelation  smgr,
char  smgr_persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
static

Definition at line 1294 of file bufmgr.c.

1298{
1299 ReadBuffersOperation operation;
1300 Buffer buffer;
1301 int flags;
1302 char persistence;
1303
1304 /*
1305 * Backward compatibility path, most code should use ExtendBufferedRel()
1306 * instead, as acquiring the extension lock inside ExtendBufferedRel()
1307 * scales a lot better.
1308 */
1309 if (unlikely(blockNum == P_NEW))
1310 {
1312
1313 /*
1314 * Since no-one else can be looking at the page contents yet, there is
1315 * no difference between an exclusive lock and a cleanup-strength
1316 * lock.
1317 */
1319 flags |= EB_LOCK_FIRST;
1320
1321 return ExtendBufferedRel(BMR_REL(rel), forkNum, strategy, flags);
1322 }
1323
1324 if (rel)
1325 persistence = rel->rd_rel->relpersistence;
1326 else
1327 persistence = smgr_persistence;
1328
1331 {
1332 bool found;
1333
1334 buffer = PinBufferForBlock(rel, smgr, persistence,
1335 forkNum, blockNum, strategy, &found);
1336 ZeroAndLockBuffer(buffer, mode, found);
1337 return buffer;
1338 }
1339
1340 /*
1341 * Signal that we are going to immediately wait. If we're immediately
1342 * waiting, there is no benefit in actually executing the IO
1343 * asynchronously, it would just add dispatch overhead.
1344 */
1346 if (mode == RBM_ZERO_ON_ERROR)
1348 operation.smgr = smgr;
1349 operation.rel = rel;
1350 operation.persistence = persistence;
1351 operation.forknum = forkNum;
1352 operation.strategy = strategy;
1353 if (StartReadBuffer(&operation,
1354 &buffer,
1355 blockNum,
1356 flags))
1357 WaitReadBuffers(&operation);
1358
1359 return buffer;
1360}
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition bufmgr.c:965
static void ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
Definition bufmgr.c:1132
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition bufmgr.c:1211
void WaitReadBuffers(ReadBuffersOperation *operation)
Definition bufmgr.c:1733
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Definition bufmgr.c:1609
@ RBM_ZERO_ON_ERROR
Definition bufmgr.h:51
#define BMR_REL(p_rel)
Definition bufmgr.h:114
Form_pg_class rd_rel
Definition rel.h:111

References BMR_REL, PrivateRefCountEntry::buffer, EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, ExtendBufferedRel(), fb(), ReadBuffersOperation::forknum, mode, P_NEW, ReadBuffersOperation::persistence, PinBufferForBlock(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelationData::rd_rel, READ_BUFFERS_SYNCHRONOUSLY, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, StartReadBuffer(), ReadBuffersOperation::strategy, unlikely, WaitReadBuffers(), and ZeroAndLockBuffer().

Referenced by ExtendBufferedRelTo(), ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 912 of file bufmgr.c.

914{
915 Buffer buf;
916
917 /*
918 * Reject attempts to read non-local temporary relations; we would be
919 * likely to get wrong data since we have no visibility into the owning
920 * session's local buffers.
921 */
925 errmsg("cannot access temporary tables of other sessions")));
926
927 /*
928 * Read the buffer, and update pgstat counters to reflect a cache hit or
929 * miss.
930 */
932 forkNum, blockNum, mode, strategy);
933
934 return buf;
935}

References buf, ereport, errcode(), errmsg(), ERROR, fb(), mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), bt_recheck_sibling_links(), btvacuumpage(), count_nondeletable_pages(), create_toy_buffer(), fsm_readbuf(), get_raw_page_internal(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), statapprox_heap(), and vm_readbuf().

◆ ReadBuffersCanStartIO()

static bool ReadBuffersCanStartIO ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1665 of file bufmgr.c.

1666{
1667 /*
1668 * If this backend currently has staged IO, we need to submit the pending
1669 * IO before waiting for the right to issue IO, to avoid the potential for
1670 * deadlocks (and, more commonly, unnecessary delays for other backends).
1671 */
1672 if (!nowait && pgaio_have_staged())
1673 {
1674 if (ReadBuffersCanStartIOOnce(buffer, true))
1675 return true;
1676
1677 /*
1678 * Unfortunately StartBufferIO() returning false doesn't allow to
1679 * distinguish between the buffer already being valid and IO already
1680 * being in progress. Since IO already being in progress is quite
1681 * rare, this approach seems fine.
1682 */
1684 }
1685
1686 return ReadBuffersCanStartIOOnce(buffer, nowait);
1687}
bool pgaio_have_staged(void)
Definition aio.c:1107
static bool ReadBuffersCanStartIOOnce(Buffer buffer, bool nowait)
Definition bufmgr.c:1652

References PrivateRefCountEntry::buffer, pgaio_have_staged(), pgaio_submit_staged(), and ReadBuffersCanStartIOOnce().

Referenced by AsyncReadBuffers().

◆ ReadBuffersCanStartIOOnce()

static bool ReadBuffersCanStartIOOnce ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1652 of file bufmgr.c.

1653{
1654 if (BufferIsLocal(buffer))
1655 return StartLocalBufferIO(GetLocalBufferDescriptor(-buffer - 1),
1656 true, nowait);
1657 else
1658 return StartBufferIO(GetBufferDescriptor(buffer - 1), true, nowait);
1659}
bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait)
Definition localbuf.c:523

References PrivateRefCountEntry::buffer, BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), StartBufferIO(), and StartLocalBufferIO().

Referenced by ReadBuffersCanStartIO().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 949 of file bufmgr.c.

952{
953 SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
954
955 return ReadBuffer_common(NULL, smgr,
957 forkNum, blockNum,
958 mode, strategy);
959}

References fb(), INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 804 of file bufmgr.c.

806{
808 BufferTag tag;
810
811 Assert(BufferIsValid(recent_buffer));
812
815 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
816
817 if (BufferIsLocal(recent_buffer))
818 {
819 int b = -recent_buffer - 1;
820
823
824 /* Is it still valid and holding the right tag? */
825 if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
826 {
827 PinLocalBuffer(bufHdr, true);
828
830
831 return true;
832 }
833 }
834 else
835 {
836 bufHdr = GetBufferDescriptor(recent_buffer - 1);
837
838 /*
839 * Is it still valid and holding the right tag? We do an unlocked tag
840 * comparison first, to make it unlikely that we'll increment the
841 * usage counter of the wrong buffer, if someone calls us with a very
842 * out of date recent_buffer. Then we'll check it again if we get the
843 * pin.
844 */
845 if (BufferTagsEqual(&tag, &bufHdr->tag) &&
846 PinBuffer(bufHdr, NULL, true))
847 {
848 if (BufferTagsEqual(&tag, &bufHdr->tag))
849 {
851 return true;
852 }
854 }
855 }
856
857 return false;
858}

References Assert, b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), InitBufferTag(), BufferUsage::local_blks_hit, pg_atomic_read_u64(), pgBufferUsage, PinBuffer(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, and UnpinBuffer().

Referenced by invalidate_rel_block(), and XLogReadBufferExtended().

◆ RelationCopyStorageUsingBuffer()

static void RelationCopyStorageUsingBuffer ( RelFileLocator  srclocator,
RelFileLocator  dstlocator,
ForkNumber  forkNum,
bool  permanent 
)
static

Definition at line 5264 of file bufmgr.c.

5267{
5268 Buffer srcBuf;
5269 Buffer dstBuf;
5270 Page srcPage;
5271 Page dstPage;
5272 bool use_wal;
5273 BlockNumber nblocks;
5274 BlockNumber blkno;
5281
5282 /*
5283 * In general, we want to write WAL whenever wal_level > 'minimal', but we
5284 * can skip it when copying any fork of an unlogged relation other than
5285 * the init fork.
5286 */
5287 use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
5288
5289 /* Get number of blocks in the source relation. */
5291 forkNum);
5292
5293 /* Nothing to copy; just return. */
5294 if (nblocks == 0)
5295 return;
5296
5297 /*
5298 * Bulk extend the destination relation of the same size as the source
5299 * relation before starting to copy block by block.
5300 */
5301 memset(buf.data, 0, BLCKSZ);
5302 smgrextend(smgropen(dstlocator, INVALID_PROC_NUMBER), forkNum, nblocks - 1,
5303 buf.data, true);
5304
5305 /* This is a bulk operation, so use buffer access strategies. */
5308
5309 /* Initialize streaming read */
5310 p.current_blocknum = 0;
5311 p.last_exclusive = nblocks;
5313
5314 /*
5315 * It is safe to use batchmode as block_range_read_stream_cb takes no
5316 * locks.
5317 */
5321 src_smgr,
5323 forkNum,
5325 &p,
5326 0);
5327
5328 /* Iterate over each block of the source relation file. */
5329 for (blkno = 0; blkno < nblocks; blkno++)
5330 {
5332
5333 /* Read block from source relation. */
5337
5341 permanent);
5343
5345
5346 /* Copy page data from the source to the destination. */
5349
5350 /* WAL-log the copied page. */
5351 if (use_wal)
5353
5355
5358 }
5361
5364}
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5519
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
Definition bufmgr.c:949
@ BAS_BULKREAD
Definition bufmgr.h:37
@ BAS_BULKWRITE
Definition bufmgr.h:39
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition freelist.c:643
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
ReadStream * read_stream_begin_smgr_relation(int flags, BufferAccessStrategy strategy, SMgrRelation smgr, char smgr_persistence, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
void read_stream_end(ReadStream *stream)
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define READ_STREAM_FULL
Definition read_stream.h:43
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition smgr.c:620
#define XLogIsNeeded()
Definition xlog.h:111
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)

References Assert, BAS_BULKREAD, BAS_BULKWRITE, block_range_read_stream_cb(), buf, BUFFER_LOCK_SHARE, BufferGetBlockNumber(), BufferGetPage(), CHECK_FOR_INTERRUPTS, BlockRangeReadStreamPrivate::current_blocknum, END_CRIT_SECTION, fb(), FreeAccessStrategy(), GetAccessStrategy(), INIT_FORKNUM, INVALID_PROC_NUMBER, InvalidBuffer, BlockRangeReadStreamPrivate::last_exclusive, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), RBM_ZERO_AND_LOCK, read_stream_begin_smgr_relation(), read_stream_end(), READ_STREAM_FULL, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, ReadBufferWithoutRelcache(), smgrextend(), smgrnblocks(), smgropen(), START_CRIT_SECTION, UnlockReleaseBuffer(), and XLogIsNeeded.

Referenced by CreateAndCopyRelationData().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 4573 of file bufmgr.c.

4574{
4575 if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
4576 {
4577 /*
4578 * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
4579 * tableam returns the size in bytes - but for the purpose of this
4580 * routine, we want the number of blocks. Therefore divide, rounding
4581 * up.
4582 */
4584
4585 szbytes = table_relation_size(relation, forkNum);
4586
4587 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
4588 }
4589 else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
4590 {
4591 return smgrnblocks(RelationGetSmgr(relation), forkNum);
4592 }
4593 else
4594 Assert(false);
4595
4596 return 0; /* keep compiler quiet */
4597}
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition tableam.h:1859

References Assert, fb(), RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 3122 of file bufmgr.c.

3125{
3126 ForkNumber forkNum = MAIN_FORKNUM;
3128
3129 if (BufferIsValid(buffer))
3130 {
3131 Assert(BufferIsPinned(buffer));
3132 if (BufferIsLocal(buffer))
3133 {
3134 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3135 if (bufHdr->tag.blockNum == blockNum &&
3136 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3137 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3138 return buffer;
3139 UnpinLocalBuffer(buffer);
3140 }
3141 else
3142 {
3143 bufHdr = GetBufferDescriptor(buffer - 1);
3144 /* we have pin, so it's ok to examine tag without spinlock */
3145 if (bufHdr->tag.blockNum == blockNum &&
3146 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3147 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3148 return buffer;
3150 }
3151 }
3152
3153 return ReadBuffer(relation, blockNum);
3154}
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:865

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), and heapam_index_fetch_tuple().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 5502 of file bufmgr.c.

5503{
5504 if (!BufferIsValid(buffer))
5505 elog(ERROR, "bad buffer ID: %d", buffer);
5506
5507 if (BufferIsLocal(buffer))
5508 UnpinLocalBuffer(buffer);
5509 else
5510 UnpinBuffer(GetBufferDescriptor(buffer - 1));
5511}

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), BitmapHeapScanNextBlock(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), buffer_create_toy(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndIndexOnlyScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_search(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), grow_rel(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_fetch_next_buffer(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgettup(), heapgettup_pagemode(), invalidate_rel_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), modify_rel_block(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pgstatindex_impl(), read_rel_block_ll(), read_stream_reset(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ ReservePrivateRefCountEntry()

static void ReservePrivateRefCountEntry ( void  )
static

Definition at line 294 of file bufmgr.c.

295{
296 /* Already reserved (or freed), nothing to do */
297 if (ReservedRefCountSlot != -1)
298 return;
299
300 /*
301 * First search for a free entry the array, that'll be sufficient in the
302 * majority of cases.
303 */
304 {
305 int i;
306
307 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
308 {
310 {
312
313 /*
314 * We could return immediately, but iterating till the end of
315 * the array allows compiler-autovectorization.
316 */
317 }
318 }
319
320 if (ReservedRefCountSlot != -1)
321 return;
322 }
323
324 /*
325 * No luck. All array entries are full. Move one array entry into the hash
326 * table.
327 */
328 {
329 /*
330 * Move entry from the current clock position in the array into the
331 * hashtable. Use that slot.
332 */
333 int victim_slot;
336 bool found;
337
338 /* select victim slot */
342
343 /* Better be used, otherwise we shouldn't get here. */
347
348 /* enter victim array entry into hashtable */
352 &found);
353 Assert(!found);
354 /* move data from the entry in the array to the hash entry */
355 hashent->data = victim_entry->data;
356
357 /* clear the now free array slot */
359 victim_entry->buffer = InvalidBuffer;
360
361 /* clear the whole data member, just for future proofing */
362 memset(&victim_entry->data, 0, sizeof(victim_entry->data));
363 victim_entry->data.refcount = 0;
364 victim_entry->data.lockmode = BUFFER_LOCK_UNLOCK;
365
367 }
368}
static uint32 PrivateRefCountClock
Definition bufmgr.c:252
@ HASH_ENTER
Definition hsearch.h:114

References Assert, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, fb(), HASH_ENTER, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountClock, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountSlot.

Referenced by BufferAlloc(), EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetPrivateRefCountEntrySlow(), GetVictimBuffer(), MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), MarkDirtyUnpinnedBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ ResOwnerPrintBuffer()

static char * ResOwnerPrintBuffer ( Datum  res)
static

Definition at line 7471 of file bufmgr.c.

7472{
7474}
static int32 DatumGetInt32(Datum X)
Definition postgres.h:212

References DatumGetInt32(), and DebugPrintBufferRefcount().

◆ ResOwnerPrintBufferIO()

static char * ResOwnerPrintBufferIO ( Datum  res)
static

Definition at line 7421 of file bufmgr.c.

7422{
7423 Buffer buffer = DatumGetInt32(res);
7424
7425 return psprintf("lost track of buffer IO on buffer %d", buffer);
7426}

References PrivateRefCountEntry::buffer, DatumGetInt32(), and psprintf().

◆ ResOwnerReleaseBuffer()

static void ResOwnerReleaseBuffer ( Datum  res)
static

Definition at line 7435 of file bufmgr.c.

7436{
7437 Buffer buffer = DatumGetInt32(res);
7438
7439 /* Like ReleaseBuffer, but don't call ResourceOwnerForgetBuffer */
7440 if (!BufferIsValid(buffer))
7441 elog(ERROR, "bad buffer ID: %d", buffer);
7442
7443 if (BufferIsLocal(buffer))
7445 else
7446 {
7448
7449 ref = GetPrivateRefCountEntry(buffer, false);
7450
7451 /* not having a private refcount would imply resowner corruption */
7452 Assert(ref != NULL);
7453
7454 /*
7455 * If the buffer was locked at the time of the resowner release,
7456 * release the lock now. This should only happen after errors.
7457 */
7458 if (ref->data.lockmode != BUFFER_LOCK_UNLOCK)
7459 {
7460 BufferDesc *buf = GetBufferDescriptor(buffer - 1);
7461
7462 HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
7463 BufferLockUnlock(buffer, buf);
7464 }
7465
7467 }
7468}
static void UnpinBufferNoOwner(BufferDesc *buf)
Definition bufmgr.c:3370
void UnpinLocalBufferNoOwner(Buffer buffer)
Definition localbuf.c:848

References Assert, buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), BufferLockUnlock(), DatumGetInt32(), elog, ERROR, fb(), GetBufferDescriptor(), GetPrivateRefCountEntry(), HOLD_INTERRUPTS, UnpinBufferNoOwner(), and UnpinLocalBufferNoOwner().

◆ ResOwnerReleaseBufferIO()

static void ResOwnerReleaseBufferIO ( Datum  res)
static

Definition at line 7413 of file bufmgr.c.

7414{
7415 Buffer buffer = DatumGetInt32(res);
7416
7417 AbortBufferIO(buffer);
7418}
static void AbortBufferIO(Buffer buffer)
Definition bufmgr.c:7010

References AbortBufferIO(), PrivateRefCountEntry::buffer, and DatumGetInt32().

◆ rlocator_comparator()

static int rlocator_comparator ( const void p1,
const void p2 
)
static

Definition at line 7081 of file bufmgr.c.

7082{
7083 RelFileLocator n1 = *(const RelFileLocator *) p1;
7084 RelFileLocator n2 = *(const RelFileLocator *) p2;
7085
7086 if (n1.relNumber < n2.relNumber)
7087 return -1;
7088 else if (n1.relNumber > n2.relNumber)
7089 return 1;
7090
7091 if (n1.dbOid < n2.dbOid)
7092 return -1;
7093 else if (n1.dbOid > n2.dbOid)
7094 return 1;
7095
7096 if (n1.spcOid < n2.spcOid)
7097 return -1;
7098 else if (n1.spcOid > n2.spcOid)
7099 return 1;
7100 else
7101 return 0;
7102}

References fb().

Referenced by buffertag_comparator(), DropRelationsAllBuffers(), and FlushRelationsAllBuffers().

◆ ScheduleBufferTagForWriteback()

void ScheduleBufferTagForWriteback ( WritebackContext wb_context,
IOContext  io_context,
BufferTag tag 
)

Definition at line 7280 of file bufmgr.c.

7282{
7283 PendingWriteback *pending;
7284
7285 /*
7286 * As pg_flush_data() doesn't do anything with fsync disabled, there's no
7287 * point in tracking in that case.
7288 */
7290 !enableFsync)
7291 return;
7292
7293 /*
7294 * Add buffer to the pending writeback array, unless writeback control is
7295 * disabled.
7296 */
7297 if (*wb_context->max_pending > 0)
7298 {
7300
7301 pending = &wb_context->pending_writebacks[wb_context->nr_pending++];
7302
7303 pending->tag = *tag;
7304 }
7305
7306 /*
7307 * Perform pending flushes if the writeback limit is exceeded. This
7308 * includes the case where previously an item has been added, but control
7309 * is now disabled.
7310 */
7311 if (wb_context->nr_pending >= *wb_context->max_pending)
7313}
bool enableFsync
Definition globals.c:129
#define WRITEBACK_MAX_PENDING_FLUSHES

References Assert, enableFsync, fb(), IO_DIRECT_DATA, io_direct_flags, IssuePendingWritebacks(), PendingWriteback::tag, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by GetVictimBuffer(), and SyncOneBuffer().

◆ shared_buffer_readv_complete()

static PgAioResult shared_buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 8463 of file bufmgr.c.

8465{
8467}

References buffer_readv_complete(), and fb().

◆ shared_buffer_readv_complete_local()

static PgAioResult shared_buffer_readv_complete_local ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

◆ shared_buffer_readv_stage()

static void shared_buffer_readv_stage ( PgAioHandle ioh,
uint8  cb_data 
)
static

Definition at line 8457 of file bufmgr.c.

8458{
8459 buffer_stage_common(ioh, false, false);
8460}

References buffer_stage_common(), and fb().

◆ shared_buffer_write_error_callback()

static void shared_buffer_write_error_callback ( void arg)
static

Definition at line 7049 of file bufmgr.c.

7050{
7052
7053 /* Buffer is pinned, so we can read the tag without locking the spinlock */
7054 if (bufHdr != NULL)
7055 errcontext("writing block %u of relation \"%s\"",
7056 bufHdr->tag.blockNum,
7058 BufTagGetForkNum(&bufHdr->tag)).str);
7059}

References arg, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, fb(), and relpathperm.

Referenced by FlushBuffer().

◆ StartBufferIO()

bool StartBufferIO ( BufferDesc buf,
bool  forInput,
bool  nowait 
)

Definition at line 6890 of file bufmgr.c.

6891{
6893
6895
6896 for (;;)
6897 {
6899
6901 break;
6903 if (nowait)
6904 return false;
6905 WaitIO(buf);
6906 }
6907
6908 /* Once we get here, there is definitely no I/O active on this buffer */
6909
6910 /* Check if someone else already did the I/O */
6911 if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
6912 {
6914 return false;
6915 }
6916
6919 0);
6920
6923
6924 return true;
6925}
static void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)

References BM_DIRTY, BM_IO_IN_PROGRESS, BM_VALID, buf, BufferDescriptorGetBuffer(), CurrentResourceOwner, fb(), LockBufHdr(), ResourceOwnerEnlarge(), ResourceOwnerRememberBufferIO(), UnlockBufHdr(), UnlockBufHdrExt(), and WaitIO().

Referenced by buffer_call_start_io(), ExtendBufferedRelShared(), FlushBuffer(), read_rel_block_ll(), ReadBuffersCanStartIOOnce(), and ZeroAndLockBuffer().

◆ StartReadBuffer()

bool StartReadBuffer ( ReadBuffersOperation operation,
Buffer buffer,
BlockNumber  blocknum,
int  flags 
)

Definition at line 1609 of file bufmgr.c.

1613{
1614 int nblocks = 1;
1615 bool result;
1616
1617 result = StartReadBuffersImpl(operation, buffer, blocknum, &nblocks, flags,
1618 false /* single block, no forwarding */ );
1619 Assert(nblocks == 1); /* single block can't be short */
1620
1621 return result;
1622}
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
Definition bufmgr.c:1363

References Assert, PrivateRefCountEntry::buffer, and StartReadBuffersImpl().

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ StartReadBuffers()

bool StartReadBuffers ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int nblocks,
int  flags 
)

Definition at line 1590 of file bufmgr.c.

1595{
1596 return StartReadBuffersImpl(operation, buffers, blockNum, nblocks, flags,
1597 true /* expect forwarded buffers */ );
1598}

References StartReadBuffersImpl().

Referenced by read_stream_start_pending_read().

◆ StartReadBuffersImpl()

static pg_attribute_always_inline bool StartReadBuffersImpl ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int nblocks,
int  flags,
bool  allow_forwarding 
)
static

Definition at line 1363 of file bufmgr.c.

1369{
1370 int actual_nblocks = *nblocks;
1371 int maxcombine = 0;
1372 bool did_start_io;
1373
1374 Assert(*nblocks == 1 || allow_forwarding);
1375 Assert(*nblocks > 0);
1376 Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
1377
1378 for (int i = 0; i < actual_nblocks; ++i)
1379 {
1380 bool found;
1381
1382 if (allow_forwarding && buffers[i] != InvalidBuffer)
1383 {
1385
1386 /*
1387 * This is a buffer that was pinned by an earlier call to
1388 * StartReadBuffers(), but couldn't be handled in one operation at
1389 * that time. The operation was split, and the caller has passed
1390 * an already pinned buffer back to us to handle the rest of the
1391 * operation. It must continue at the expected block number.
1392 */
1393 Assert(BufferGetBlockNumber(buffers[i]) == blockNum + i);
1394
1395 /*
1396 * It might be an already valid buffer (a hit) that followed the
1397 * final contiguous block of an earlier I/O (a miss) marking the
1398 * end of it, or a buffer that some other backend has since made
1399 * valid by performing the I/O for us, in which case we can handle
1400 * it as a hit now. It is safe to check for a BM_VALID flag with
1401 * a relaxed load, because we got a fresh view of it while pinning
1402 * it in the previous call.
1403 *
1404 * On the other hand if we don't see BM_VALID yet, it must be an
1405 * I/O that was split by the previous call and we need to try to
1406 * start a new I/O from this block. We're also racing against any
1407 * other backend that might start the I/O or even manage to mark
1408 * it BM_VALID after this check, but StartBufferIO() will handle
1409 * those cases.
1410 */
1411 if (BufferIsLocal(buffers[i]))
1412 bufHdr = GetLocalBufferDescriptor(-buffers[i] - 1);
1413 else
1414 bufHdr = GetBufferDescriptor(buffers[i] - 1);
1416 found = pg_atomic_read_u64(&bufHdr->state) & BM_VALID;
1417 }
1418 else
1419 {
1420 buffers[i] = PinBufferForBlock(operation->rel,
1421 operation->smgr,
1422 operation->persistence,
1423 operation->forknum,
1424 blockNum + i,
1425 operation->strategy,
1426 &found);
1427 }
1428
1429 if (found)
1430 {
1431 /*
1432 * We have a hit. If it's the first block in the requested range,
1433 * we can return it immediately and report that WaitReadBuffers()
1434 * does not need to be called. If the initial value of *nblocks
1435 * was larger, the caller will have to call again for the rest.
1436 */
1437 if (i == 0)
1438 {
1439 *nblocks = 1;
1440
1441#ifdef USE_ASSERT_CHECKING
1442
1443 /*
1444 * Initialize enough of ReadBuffersOperation to make
1445 * CheckReadBuffersOperation() work. Outside of assertions
1446 * that's not necessary when no IO is issued.
1447 */
1448 operation->buffers = buffers;
1449 operation->blocknum = blockNum;
1450 operation->nblocks = 1;
1451 operation->nblocks_done = 1;
1452 CheckReadBuffersOperation(operation, true);
1453#endif
1454 return false;
1455 }
1456
1457 /*
1458 * Otherwise we already have an I/O to perform, but this block
1459 * can't be included as it is already valid. Split the I/O here.
1460 * There may or may not be more blocks requiring I/O after this
1461 * one, we haven't checked, but they can't be contiguous with this
1462 * one in the way. We'll leave this buffer pinned, forwarding it
1463 * to the next call, avoiding the need to unpin it here and re-pin
1464 * it in the next call.
1465 */
1466 actual_nblocks = i;
1467 break;
1468 }
1469 else
1470 {
1471 /*
1472 * Check how many blocks we can cover with the same IO. The smgr
1473 * implementation might e.g. be limited due to a segment boundary.
1474 */
1475 if (i == 0 && actual_nblocks > 1)
1476 {
1477 maxcombine = smgrmaxcombine(operation->smgr,
1478 operation->forknum,
1479 blockNum);
1481 {
1482 elog(DEBUG2, "limiting nblocks at %u from %u to %u",
1483 blockNum, actual_nblocks, maxcombine);
1485 }
1486 }
1487 }
1488 }
1489 *nblocks = actual_nblocks;
1490
1491 /* Populate information needed for I/O. */
1492 operation->buffers = buffers;
1493 operation->blocknum = blockNum;
1494 operation->flags = flags;
1495 operation->nblocks = actual_nblocks;
1496 operation->nblocks_done = 0;
1497 pgaio_wref_clear(&operation->io_wref);
1498
1499 /*
1500 * When using AIO, start the IO in the background. If not, issue prefetch
1501 * requests if desired by the caller.
1502 *
1503 * The reason we have a dedicated path for IOMETHOD_SYNC here is to
1504 * de-risk the introduction of AIO somewhat. It's a large architectural
1505 * change, with lots of chances for unanticipated performance effects.
1506 *
1507 * Use of IOMETHOD_SYNC already leads to not actually performing IO
1508 * asynchronously, but without the check here we'd execute IO earlier than
1509 * we used to. Eventually this IOMETHOD_SYNC specific path should go away.
1510 */
1511 if (io_method != IOMETHOD_SYNC)
1512 {
1513 /*
1514 * Try to start IO asynchronously. It's possible that no IO needs to
1515 * be started, if another backend already performed the IO.
1516 *
1517 * Note that if an IO is started, it might not cover the entire
1518 * requested range, e.g. because an intermediary block has been read
1519 * in by another backend. In that case any "trailing" buffers we
1520 * already pinned above will be "forwarded" by read_stream.c to the
1521 * next call to StartReadBuffers().
1522 *
1523 * This is signalled to the caller by decrementing *nblocks *and*
1524 * reducing operation->nblocks. The latter is done here, but not below
1525 * WaitReadBuffers(), as in WaitReadBuffers() we can't "shorten" the
1526 * overall read size anymore, we need to retry until done in its
1527 * entirety or until failed.
1528 */
1529 did_start_io = AsyncReadBuffers(operation, nblocks);
1530
1531 operation->nblocks = *nblocks;
1532 }
1533 else
1534 {
1535 operation->flags |= READ_BUFFERS_SYNCHRONOUSLY;
1536
1537 if (flags & READ_BUFFERS_ISSUE_ADVICE)
1538 {
1539 /*
1540 * In theory we should only do this if PinBufferForBlock() had to
1541 * allocate new buffers above. That way, if two calls to
1542 * StartReadBuffers() were made for the same blocks before
1543 * WaitReadBuffers(), only the first would issue the advice.
1544 * That'd be a better simulation of true asynchronous I/O, which
1545 * would only start the I/O once, but isn't done here for
1546 * simplicity.
1547 */
1548 smgrprefetch(operation->smgr,
1549 operation->forknum,
1550 blockNum,
1552 }
1553
1554 /*
1555 * Indicate that WaitReadBuffers() should be called. WaitReadBuffers()
1556 * will initiate the necessary IO.
1557 */
1558 did_start_io = true;
1559 }
1560
1562
1563 return did_start_io;
1564}
int io_method
Definition aio.c:74
@ IOMETHOD_SYNC
Definition aio.h:34
static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete)
Definition bufmgr.c:1628
static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
Definition bufmgr.c:1865
#define READ_BUFFERS_ISSUE_ADVICE
Definition bufmgr.h:124
uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition smgr.c:697

References Assert, AsyncReadBuffers(), ReadBuffersOperation::blocknum, BM_TAG_VALID, BM_VALID, BufferGetBlockNumber(), BufferIsLocal, ReadBuffersOperation::buffers, CheckReadBuffersOperation(), DEBUG2, elog, fb(), ReadBuffersOperation::flags, ReadBuffersOperation::forknum, GetBufferDescriptor(), GetLocalBufferDescriptor(), i, InvalidBuffer, io_method, ReadBuffersOperation::io_wref, IOMETHOD_SYNC, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, pg_atomic_read_u64(), pgaio_wref_clear(), PinBufferForBlock(), READ_BUFFERS_ISSUE_ADVICE, READ_BUFFERS_SYNCHRONOUSLY, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, smgrmaxcombine(), smgrprefetch(), ReadBuffersOperation::strategy, and unlikely.

Referenced by StartReadBuffer(), and StartReadBuffers().

◆ SyncOneBuffer()

static int SyncOneBuffer ( int  buf_id,
bool  skip_recently_used,
WritebackContext wb_context 
)
static

Definition at line 4034 of file bufmgr.c.

4035{
4037 int result = 0;
4039 BufferTag tag;
4040
4041 /* Make sure we can handle the pin */
4044
4045 /*
4046 * Check whether buffer needs writing.
4047 *
4048 * We can make this check without taking the buffer content lock so long
4049 * as we mark pages dirty in access methods *before* logging changes with
4050 * XLogInsert(): if someone marks the buffer dirty just after our check we
4051 * don't worry because our checkpoint.redo points before log record for
4052 * upcoming changes and so we are not required to write such dirty buffer.
4053 */
4055
4058 {
4059 result |= BUF_REUSABLE;
4060 }
4061 else if (skip_recently_used)
4062 {
4063 /* Caller told us not to write recently-used buffers */
4065 return result;
4066 }
4067
4068 if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
4069 {
4070 /* It's clean, so nothing to do */
4072 return result;
4073 }
4074
4075 /*
4076 * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
4077 * buffer is clean by the time we've locked it.)
4078 */
4080
4082
4083 tag = bufHdr->tag;
4084
4086
4087 /*
4088 * SyncOneBuffer() is only called by checkpointer and bgwriter, so
4089 * IOContext will always be IOCONTEXT_NORMAL.
4090 */
4092
4093 return result | BUF_WRITTEN;
4094}

References BM_DIRTY, BM_VALID, BUF_REUSABLE, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BUF_WRITTEN, CurrentResourceOwner, fb(), FlushUnlockedBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), UnlockBufHdr(), and UnpinBuffer().

Referenced by BgBufferSync(), and BufferSync().

◆ TerminateBufferIO()

void TerminateBufferIO ( BufferDesc buf,
bool  clear_dirty,
uint64  set_flag_bits,
bool  forget_owner,
bool  release_aio 
)

Definition at line 6948 of file bufmgr.c.

6950{
6953 int refcount_change = 0;
6954
6956
6959
6960 /* Clear earlier errors, if this IO failed, it'll be marked again */
6962
6965
6966 if (release_aio)
6967 {
6968 /* release ownership by the AIO subsystem */
6970 refcount_change = -1;
6971 pgaio_wref_clear(&buf->io_wref);
6972 }
6973
6977
6978 if (forget_owner)
6981
6983
6984 /*
6985 * Support LockBufferForCleanup()
6986 *
6987 * We may have just released the last pin other than the waiter's. In most
6988 * cases, this backend holds another pin on the buffer. But, if, for
6989 * example, this backend is completing an IO issued by another backend, it
6990 * may be time to wake the waiter.
6991 */
6994}
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
static void WakePinCountWaiter(BufferDesc *buf)
Definition bufmgr.c:3325
void ConditionVariableBroadcast(ConditionVariable *cv)

References Assert, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_JUST_DIRTIED, BM_PIN_COUNT_WAITER, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetIOCV(), ConditionVariableBroadcast(), CurrentResourceOwner, fb(), LockBufHdr(), pgaio_wref_clear(), ResourceOwnerForgetBufferIO(), UnlockBufHdrExt(), and WakePinCountWaiter().

Referenced by AbortBufferIO(), buffer_call_terminate_io(), buffer_readv_complete_one(), ExtendBufferedRelShared(), FlushBuffer(), and ZeroAndLockBuffer().

◆ TrackNewBufferPin()

void TrackNewBufferPin ( Buffer  buf)
inline

Definition at line 3417 of file bufmgr.c.

3418{
3420
3422 ref->data.refcount++;
3423
3425
3426 /*
3427 * This is the first pin for this page by this backend, mark its page as
3428 * defined to valgrind. While the page contents might not actually be
3429 * valid yet, we don't currently guarantee that such pages are marked
3430 * undefined or non-accessible.
3431 *
3432 * It's not necessarily the prettiest to do this here, but otherwise we'd
3433 * need this block of code in multiple places.
3434 */
3436 BLCKSZ);
3437}
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition bufmgr.c:374

References buf, BufHdrGetBlock, CurrentResourceOwner, fb(), GetBufferDescriptor(), NewPrivateRefCountEntry(), ResourceOwnerRememberBuffer(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by GetBufferFromRing(), PinBuffer(), PinBuffer_Locked(), and StrategyGetBuffer().

◆ ts_ckpt_progress_comparator()

static int ts_ckpt_progress_comparator ( Datum  a,
Datum  b,
void arg 
)
static

Definition at line 7245 of file bufmgr.c.

7246{
7249
7250 /* we want a min-heap, so return 1 for the a < b */
7251 if (sa->progress < sb->progress)
7252 return 1;
7253 else if (sa->progress == sb->progress)
7254 return 0;
7255 else
7256 return -1;
7257}

References a, b, DatumGetPointer(), and fb().

Referenced by BufferSync().

◆ UnlockBuffer()

void UnlockBuffer ( Buffer  buffer)

Definition at line 6416 of file bufmgr.c.

6417{
6419
6420 Assert(BufferIsPinned(buffer));
6421 if (BufferIsLocal(buffer))
6422 return; /* local buffers need no lock */
6423
6424 buf_hdr = GetBufferDescriptor(buffer - 1);
6425 BufferLockUnlock(buffer, buf_hdr);
6426}

References Assert, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferLockUnlock(), fb(), and GetBufferDescriptor().

Referenced by LockBuffer().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 5710 of file bufmgr.c.

5711{
5713
5714 if (buf)
5715 {
5717 uint64 unset_bits = 0;
5718
5720
5721 /*
5722 * Don't complain if flag bit not set; it could have been reset but we
5723 * got a cancel/die interrupt before getting the signal.
5724 */
5725 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5726 buf->wait_backend_pgprocno == MyProcNumber)
5728
5730 0, unset_bits,
5731 0);
5732
5734 }
5735}

References BM_PIN_COUNT_WAITER, buf, fb(), LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdrExt().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 5519 of file bufmgr.c.

5520{
5522 ReleaseBuffer(buffer);
5523}

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), gin_refind_parent(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_get_sequence_data(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ UnpinBuffer()

◆ UnpinBufferNoOwner()

static void UnpinBufferNoOwner ( BufferDesc buf)
static

Definition at line 3370 of file bufmgr.c.

3371{
3374
3376
3377 /* not moving as we're likely deleting it soon anyway */
3378 ref = GetPrivateRefCountEntry(b, false);
3379 Assert(ref != NULL);
3380 Assert(ref->data.refcount > 0);
3381 ref->data.refcount--;
3382 if (ref->data.refcount == 0)
3383 {
3385
3386 /*
3387 * Mark buffer non-accessible to Valgrind.
3388 *
3389 * Note that the buffer may have already been marked non-accessible
3390 * within access method code that enforces that buffers are only
3391 * accessed while a buffer lock is held.
3392 */
3394
3395 /*
3396 * I'd better not still hold the buffer content lock. Can't use
3397 * BufferIsLockedByMe(), as that asserts the buffer is pinned.
3398 */
3400
3401 /* decrement the shared reference count */
3403
3404 /* Support LockBufferForCleanup() */
3407
3409 }
3410}
static uint64 pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
Definition atomics.h:541
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
Definition bufmgr.c:552

References Assert, b, BM_PIN_COUNT_WAITER, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, BufferLockHeldByMe(), BufHdrGetBlock, fb(), ForgetPrivateRefCountEntry(), GetPrivateRefCountEntry(), pg_atomic_fetch_sub_u64(), VALGRIND_MAKE_MEM_NOACCESS, and WakePinCountWaiter().

Referenced by ResOwnerReleaseBuffer(), and UnpinBuffer().

◆ WaitBufHdrUnlocked()

pg_noinline uint64 WaitBufHdrUnlocked ( BufferDesc buf)

◆ WaitIO()

static void WaitIO ( BufferDesc buf)
static

Definition at line 6811 of file bufmgr.c.

6812{
6814
6816 for (;;)
6817 {
6820
6821 /*
6822 * It may not be necessary to acquire the spinlock to check the flag
6823 * here, but since this test is essential for correctness, we'd better
6824 * play it safe.
6825 */
6827
6828 /*
6829 * Copy the wait reference while holding the spinlock. This protects
6830 * against a concurrent TerminateBufferIO() in another backend from
6831 * clearing the wref while it's being read.
6832 */
6833 iow = buf->io_wref;
6835
6836 /* no IO in progress, we don't need to wait */
6838 break;
6839
6840 /*
6841 * The buffer has asynchronous IO in progress, wait for it to
6842 * complete.
6843 */
6844 if (pgaio_wref_valid(&iow))
6845 {
6847
6848 /*
6849 * The AIO subsystem internally uses condition variables and thus
6850 * might remove this backend from the BufferDesc's CV. While that
6851 * wouldn't cause a correctness issue (the first CV sleep just
6852 * immediately returns if not already registered), it seems worth
6853 * avoiding unnecessary loop iterations, given that we take care
6854 * to do so at the start of the function.
6855 */
6857 continue;
6858 }
6859
6860 /* wait on BufferDesc->cv, e.g. for concurrent synchronous IO */
6862 }
6864}
void pgaio_wref_wait(PgAioWaitRef *iow)
Definition aio.c:991
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)

References BM_IO_IN_PROGRESS, buf, BufferDescriptorGetIOCV(), ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), fb(), LockBufHdr(), pgaio_wref_valid(), pgaio_wref_wait(), and UnlockBufHdr().

Referenced by InvalidateBuffer(), and StartBufferIO().

◆ WaitReadBuffers()

void WaitReadBuffers ( ReadBuffersOperation operation)

Definition at line 1733 of file bufmgr.c.

1734{
1735 PgAioReturn *aio_ret = &operation->io_return;
1738
1739 if (operation->persistence == RELPERSISTENCE_TEMP)
1740 {
1743 }
1744 else
1745 {
1748 }
1749
1750 /*
1751 * If we get here without an IO operation having been issued, the
1752 * io_method == IOMETHOD_SYNC path must have been used. Otherwise the
1753 * caller should not have called WaitReadBuffers().
1754 *
1755 * In the case of IOMETHOD_SYNC, we start - as we used to before the
1756 * introducing of AIO - the IO in WaitReadBuffers(). This is done as part
1757 * of the retry logic below, no extra code is required.
1758 *
1759 * This path is expected to eventually go away.
1760 */
1761 if (!pgaio_wref_valid(&operation->io_wref) && io_method != IOMETHOD_SYNC)
1762 elog(ERROR, "waiting for read operation that didn't read");
1763
1764 /*
1765 * To handle partial reads, and IOMETHOD_SYNC, we re-issue IO until we're
1766 * done. We may need multiple retries, not just because we could get
1767 * multiple partial reads, but also because some of the remaining
1768 * to-be-read buffers may have been read in by other backends, limiting
1769 * the IO size.
1770 */
1771 while (true)
1772 {
1774
1775 CheckReadBuffersOperation(operation, false);
1776
1777 /*
1778 * If there is an IO associated with the operation, we may need to
1779 * wait for it.
1780 */
1781 if (pgaio_wref_valid(&operation->io_wref))
1782 {
1783 /*
1784 * Track the time spent waiting for the IO to complete. As
1785 * tracking a wait even if we don't actually need to wait
1786 *
1787 * a) is not cheap, due to the timestamping overhead
1788 *
1789 * b) reports some time as waiting, even if we never waited
1790 *
1791 * we first check if we already know the IO is complete.
1792 */
1793 if (aio_ret->result.status == PGAIO_RS_UNKNOWN &&
1794 !pgaio_wref_check_done(&operation->io_wref))
1795 {
1797
1798 pgaio_wref_wait(&operation->io_wref);
1799
1800 /*
1801 * The IO operation itself was already counted earlier, in
1802 * AsyncReadBuffers(), this just accounts for the wait time.
1803 */
1805 io_start, 0, 0);
1806 }
1807 else
1808 {
1809 Assert(pgaio_wref_check_done(&operation->io_wref));
1810 }
1811
1812 /*
1813 * We now are sure the IO completed. Check the results. This
1814 * includes reporting on errors if there were any.
1815 */
1816 ProcessReadBuffersResult(operation);
1817 }
1818
1819 /*
1820 * Most of the time, the one IO we already started, will read in
1821 * everything. But we need to deal with partial reads and buffers not
1822 * needing IO anymore.
1823 */
1824 if (operation->nblocks_done == operation->nblocks)
1825 break;
1826
1828
1829 /*
1830 * This may only complete the IO partially, either because some
1831 * buffers were already valid, or because of a partial read.
1832 *
1833 * NB: In contrast to after the AsyncReadBuffers() call in
1834 * StartReadBuffers(), we do *not* reduce
1835 * ReadBuffersOperation->nblocks here, callers expect the full
1836 * operation to be completed at this point (as more operations may
1837 * have been queued).
1838 */
1840 }
1841
1842 CheckReadBuffersOperation(operation, true);
1843
1844 /* NB: READ_DONE tracepoint was already executed in completion callback */
1845}
bool pgaio_wref_check_done(PgAioWaitRef *iow)
Definition aio.c:1005
static void ProcessReadBuffersResult(ReadBuffersOperation *operation)
Definition bufmgr.c:1694

References Assert, AsyncReadBuffers(), CHECK_FOR_INTERRUPTS, CheckReadBuffersOperation(), elog, ERROR, fb(), io_method, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOMETHOD_SYNC, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_READ, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_RS_UNKNOWN, pgaio_wref_check_done(), pgaio_wref_valid(), pgaio_wref_wait(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), ProcessReadBuffersResult(), ReadBuffersOperation::strategy, and track_io_timing.

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ WakePinCountWaiter()

static void WakePinCountWaiter ( BufferDesc buf)
static

Definition at line 3325 of file bufmgr.c.

3326{
3327 /*
3328 * Acquire the buffer header lock, re-check that there's a waiter. Another
3329 * backend could have unpinned this buffer, and already woken up the
3330 * waiter.
3331 *
3332 * There's no danger of the buffer being replaced after we unpinned it
3333 * above, as it's pinned by the waiter. The waiter removes
3334 * BM_PIN_COUNT_WAITER if it stops waiting for a reason other than this
3335 * backend waking it up.
3336 */
3338
3341 {
3342 /* we just released the last pin other than the waiter's */
3343 int wait_backend_pgprocno = buf->wait_backend_pgprocno;
3344
3347 0);
3348 ProcSendSignal(wait_backend_pgprocno);
3349 }
3350 else
3352}
void ProcSendSignal(ProcNumber procNumber)
Definition proc.c:1996

References BM_PIN_COUNT_WAITER, buf, BUF_STATE_GET_REFCOUNT, fb(), LockBufHdr(), ProcSendSignal(), UnlockBufHdr(), and UnlockBufHdrExt().

Referenced by TerminateBufferIO(), and UnpinBufferNoOwner().

◆ WritebackContextInit()

void WritebackContextInit ( WritebackContext context,
int max_pending 
)

Definition at line 7268 of file bufmgr.c.

7269{
7270 Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES);
7271
7272 context->max_pending = max_pending;
7273 context->nr_pending = 0;
7274}

References Assert, WritebackContext::max_pending, WritebackContext::nr_pending, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by BackgroundWriterMain(), BufferManagerShmemInit(), and BufferSync().

◆ ZeroAndLockBuffer()

static void ZeroAndLockBuffer ( Buffer  buffer,
ReadBufferMode  mode,
bool  already_valid 
)
static

Definition at line 1132 of file bufmgr.c.

1133{
1135 bool need_to_zero;
1136 bool isLocalBuf = BufferIsLocal(buffer);
1137
1139
1140 if (already_valid)
1141 {
1142 /*
1143 * If the caller already knew the buffer was valid, we can skip some
1144 * header interaction. The caller just wants to lock the buffer.
1145 */
1146 need_to_zero = false;
1147 }
1148 else if (isLocalBuf)
1149 {
1150 /* Simple case for non-shared buffers. */
1151 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1152 need_to_zero = StartLocalBufferIO(bufHdr, true, false);
1153 }
1154 else
1155 {
1156 /*
1157 * Take BM_IO_IN_PROGRESS, or discover that BM_VALID has been set
1158 * concurrently. Even though we aren't doing I/O, that ensures that
1159 * we don't zero a page that someone else has pinned. An exclusive
1160 * content lock wouldn't be enough, because readers are allowed to
1161 * drop the content lock after determining that a tuple is visible
1162 * (see buffer access rules in README).
1163 */
1164 bufHdr = GetBufferDescriptor(buffer - 1);
1165 need_to_zero = StartBufferIO(bufHdr, true, false);
1166 }
1167
1168 if (need_to_zero)
1169 {
1170 memset(BufferGetPage(buffer), 0, BLCKSZ);
1171
1172 /*
1173 * Grab the buffer content lock before marking the page as valid, to
1174 * make sure that no other backend sees the zeroed page before the
1175 * caller has had a chance to initialize it.
1176 *
1177 * Since no-one else can be looking at the page contents yet, there is
1178 * no difference between an exclusive lock and a cleanup-strength
1179 * lock. (Note that we cannot use LockBuffer() or
1180 * LockBufferForCleanup() here, because they assert that the buffer is
1181 * already valid.)
1182 */
1183 if (!isLocalBuf)
1185
1186 /* Set BM_VALID, terminate IO, and wake up any waiters */
1187 if (isLocalBuf)
1188 TerminateLocalBufferIO(bufHdr, false, BM_VALID, false);
1189 else
1190 TerminateBufferIO(bufHdr, false, BM_VALID, true, false);
1191 }
1192 else if (!isLocalBuf)
1193 {
1194 /*
1195 * The buffer is valid, so we can't zero it. The caller still expects
1196 * the page to be locked on return.
1197 */
1198 if (mode == RBM_ZERO_AND_LOCK)
1200 else
1201 LockBufferForCleanup(buffer);
1202 }
1203}
void LockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6528

References Assert, BM_VALID, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), BufferIsLocal, fb(), GetBufferDescriptor(), GetLocalBufferDescriptor(), LockBuffer(), LockBufferForCleanup(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, StartBufferIO(), StartLocalBufferIO(), TerminateBufferIO(), and TerminateLocalBufferIO().

Referenced by ReadBuffer_common().

Variable Documentation

◆ aio_local_buffer_readv_cb

const PgAioHandleCallbacks aio_local_buffer_readv_cb
Initial value:
= {
.complete_local = local_buffer_readv_complete,
}
static PgAioResult local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8514
static void local_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
Definition bufmgr.c:8508
static void buffer_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
Definition bufmgr.c:8362

Definition at line 8530 of file bufmgr.c.

8530 {
8531 .stage = local_buffer_readv_stage,
8532
8533 /*
8534 * Note that this, in contrast to the shared_buffers case, uses
8535 * complete_local, as only the issuing backend has access to the required
8536 * datastructures. This is important in case the IO completion may be
8537 * consumed incidentally by another backend.
8538 */
8539 .complete_local = local_buffer_readv_complete,
8540 .report = buffer_readv_report,
8541};

◆ aio_shared_buffer_readv_cb

const PgAioHandleCallbacks aio_shared_buffer_readv_cb
Initial value:
= {
.complete_shared = shared_buffer_readv_complete,
}
static PgAioResult shared_buffer_readv_complete_local(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8477
static void shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
Definition bufmgr.c:8457
static PgAioResult shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition bufmgr.c:8463

Definition at line 8521 of file bufmgr.c.

8521 {
8523 .complete_shared = shared_buffer_readv_complete,
8524 /* need a local callback to report checksum failures */
8525 .complete_local = shared_buffer_readv_complete_local,
8526 .report = buffer_readv_report,
8527};

◆ backend_flush_after

int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER

Definition at line 210 of file bufmgr.c.

Referenced by BufferManagerShmemInit().

◆ bgwriter_flush_after

int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER

Definition at line 209 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

int bgwriter_lru_maxpages = 100

Definition at line 175 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

double bgwriter_lru_multiplier = 2.0

Definition at line 176 of file bufmgr.c.

Referenced by BgBufferSync().

◆ buffer_io_resowner_desc

const ResourceOwnerDesc buffer_io_resowner_desc
Initial value:
=
{
.name = "buffer io",
.release_priority = RELEASE_PRIO_BUFFER_IOS,
.ReleaseResource = ResOwnerReleaseBufferIO,
.DebugPrint = ResOwnerPrintBufferIO
}
static void ResOwnerReleaseBufferIO(Datum res)
Definition bufmgr.c:7413
static char * ResOwnerPrintBufferIO(Datum res)
Definition bufmgr.c:7421
#define RELEASE_PRIO_BUFFER_IOS
Definition resowner.h:62
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition resowner.h:54

Definition at line 270 of file bufmgr.c.

271{
272 .name = "buffer io",
273 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
274 .release_priority = RELEASE_PRIO_BUFFER_IOS,
275 .ReleaseResource = ResOwnerReleaseBufferIO,
276 .DebugPrint = ResOwnerPrintBufferIO
277};

Referenced by ResourceOwnerForgetBufferIO(), and ResourceOwnerRememberBufferIO().

◆ buffer_resowner_desc

const ResourceOwnerDesc buffer_resowner_desc
Initial value:
=
{
.name = "buffer",
.release_priority = RELEASE_PRIO_BUFFER_PINS,
.ReleaseResource = ResOwnerReleaseBuffer,
.DebugPrint = ResOwnerPrintBuffer
}
static void ResOwnerReleaseBuffer(Datum res)
Definition bufmgr.c:7435
static char * ResOwnerPrintBuffer(Datum res)
Definition bufmgr.c:7471
#define RELEASE_PRIO_BUFFER_PINS
Definition resowner.h:63

Definition at line 279 of file bufmgr.c.

280{
281 .name = "buffer",
282 .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
283 .release_priority = RELEASE_PRIO_BUFFER_PINS,
284 .ReleaseResource = ResOwnerReleaseBuffer,
285 .DebugPrint = ResOwnerPrintBuffer
286};

Referenced by ResourceOwnerForgetBuffer(), and ResourceOwnerRememberBuffer().

◆ checkpoint_flush_after

int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER

Definition at line 208 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

◆ io_combine_limit

◆ io_combine_limit_guc

int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT

Definition at line 201 of file bufmgr.c.

Referenced by assign_io_max_combine_limit().

◆ io_max_combine_limit

◆ maintenance_io_concurrency

◆ MaxProportionalPins

uint32 MaxProportionalPins
static

Definition at line 256 of file bufmgr.c.

Referenced by GetAdditionalPinLimit(), GetPinLimit(), and InitBufferManagerAccess().

◆ PinCountWaitBuf

BufferDesc* PinCountWaitBuf = NULL
static

Definition at line 213 of file bufmgr.c.

Referenced by LockBufferForCleanup(), and UnlockBuffers().

◆ PrivateRefCountArray

◆ PrivateRefCountArrayKeys

◆ PrivateRefCountClock

uint32 PrivateRefCountClock = 0
static

Definition at line 252 of file bufmgr.c.

Referenced by ReservePrivateRefCountEntry().

◆ PrivateRefCountEntryLast

int PrivateRefCountEntryLast = -1
static

◆ PrivateRefCountHash

◆ PrivateRefCountOverflowed

◆ ReservedRefCountSlot

int ReservedRefCountSlot = -1
static

◆ track_io_timing

◆ zero_damaged_pages

bool zero_damaged_pages = false

Definition at line 174 of file bufmgr.c.

Referenced by AsyncReadBuffers(), mdreadv(), and read_rel_block_ll().