68#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
69#define BufferGetLSN(bufHdr) (PageGetLSN(BufHdrGetBlock(bufHdr)))
72#define LocalBufHdrGetBlock(bufHdr) \
73 LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
76#define BUF_WRITTEN 0x01
77#define BUF_REUSABLE 0x02
79#define RELS_BSEARCH_THRESHOLD 20
87#define BUF_DROP_FULL_SCAN_THRESHOLD (uint64) (NBuffers / 32)
96#define REFCOUNT_ARRAY_ENTRIES 8
237 .
name =
"buffer pin",
398 free->refcount =
res->refcount;
474#define BufferIsPinned(bufnum) \
476 !BufferIsValid(bufnum) ? \
479 BufferIsLocal(bufnum) ? \
480 (LocalRefCount[-(bufnum) - 1] > 0) \
482 (GetPrivateRefCount(bufnum) > 0) \
517 uint32 set_flag_bits,
bool forget_owner);
649 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
650 errmsg(
"cannot access temporary tables of other sessions")));
676 bool have_private_ref;
686 int b = -recent_buffer - 1;
711 if (have_private_ref)
723 if (have_private_ref)
734 if (!have_private_ref)
805 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
806 errmsg(
"cannot access temporary tables of other sessions")));
813 forkNum, blockNum,
mode, strategy);
837 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
890 if (bmr.
smgr == NULL)
898 buffers, extended_by);
926 if (bmr.
smgr == NULL)
982 num_pages, extend_to,
983 buffers, &extended_by);
990 if (first_block +
i != extend_to - 1)
1005 Assert(extended_by == 0);
1007 fork, extend_to - 1,
mode, strategy);
1033 need_to_zero =
false;
1035 else if (isLocalBuf)
1087 else if (!isLocalBuf)
1121 Assert((persistence == RELPERSISTENCE_TEMP ||
1122 persistence == RELPERSISTENCE_PERMANENT ||
1123 persistence == RELPERSISTENCE_UNLOGGED));
1125 if (persistence == RELPERSISTENCE_TEMP)
1136 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1142 if (persistence == RELPERSISTENCE_TEMP)
1150 bufHdr =
BufferAlloc(smgr, persistence, forkNum, blockNum,
1151 strategy, foundPtr, io_context);
1172 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1220 persistence = rel->
rd_rel->relpersistence;
1222 persistence = smgr_persistence;
1230 forkNum, blockNum, strategy, &found);
1239 operation.
smgr = smgr;
1240 operation.
rel = rel;
1260 int actual_nblocks = *nblocks;
1261 int io_buffers_len = 0;
1267 for (
int i = 0;
i < actual_nblocks; ++
i)
1287 actual_nblocks =
i + 1;
1299 if (
i == 0 && actual_nblocks > 1)
1304 if (
unlikely(maxcombine < actual_nblocks))
1306 elog(
DEBUG2,
"limiting nblocks at %u from %u to %u",
1307 blockNum, actual_nblocks, maxcombine);
1308 actual_nblocks = maxcombine;
1313 *nblocks = actual_nblocks;
1315 if (
likely(io_buffers_len == 0))
1321 operation->
flags = flags;
1322 operation->
nblocks = actual_nblocks;
1433 buffers = &operation->
buffers[0];
1438 if (persistence == RELPERSISTENCE_TEMP)
1457 if (persistence == RELPERSISTENCE_TEMP)
1462 for (
int i = 0;
i < nblocks; ++
i)
1482 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum +
i,
1492 io_buffers[0] = buffers[
i];
1494 io_first_block = blocknum +
i;
1504 while ((
i + 1) < nblocks &&
1511 io_buffers[io_buffers_len] = buffers[++
i];
1516 smgrreadv(operation->
smgr, forknum, io_first_block, io_pages, io_buffers_len);
1518 1, io_buffers_len * BLCKSZ);
1521 for (
int j = 0;
j < io_buffers_len; ++
j)
1526 if (persistence == RELPERSISTENCE_TEMP)
1545 errmsg(
"invalid page in block %u of relation %s; zeroing out page",
1548 memset(bufBlock, 0, BLCKSZ);
1553 errmsg(
"invalid page in block %u of relation %s",
1559 if (persistence == RELPERSISTENCE_TEMP)
1573 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, io_first_block +
j,
1613 LWLock *newPartitionLock;
1614 int existing_buf_id;
1633 if (existing_buf_id >= 0)
1686 if (existing_buf_id >= 0)
1713 valid =
PinBuffer(existing_buf_hdr, strategy);
1730 return existing_buf_hdr;
1736 victim_buf_state =
LockBufHdr(victim_buf_hdr);
1742 victim_buf_hdr->
tag = newTag;
1751 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum ==
INIT_FORKNUM)
1763 return victim_buf_hdr;
1788 LWLock *oldPartitionLock;
1841 elog(
ERROR,
"buffer is pinned in InvalidateBuffer");
2030 if (strategy != NULL)
2091#ifdef USE_ASSERT_CHECKING
2119 int max_proportional_pins;
2121 if (*additional_pins <= 1)
2125 max_proportional_pins =
NBuffers / max_backends;
2135 if (max_proportional_pins <= 0)
2136 max_proportional_pins = 1;
2138 if (*additional_pins > max_proportional_pins)
2139 *additional_pins = max_proportional_pins;
2158 TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
2167 extend_by, extend_upto,
2168 buffers, &extend_by);
2171 extend_by, extend_upto,
2172 buffers, &extend_by);
2173 *extended_by = extend_by;
2175 TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
2224 MemSet((
char *) buf_block, 0, BLCKSZ);
2257 uint32 orig_extend_by = extend_by;
2259 if (first_block > extend_upto)
2261 else if ((
uint64) first_block + extend_by > extend_upto)
2262 extend_by = extend_upto - first_block;
2264 for (
uint32 i = extend_by;
i < orig_extend_by;
i++)
2280 *extended_by = extend_by;
2288 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2289 errmsg(
"cannot extend relation %s beyond %u blocks",
2301 Buffer victim_buf = buffers[
i];
2334 if (existing_id >= 0)
2344 valid =
PinBuffer(existing_hdr, strategy);
2360 (
errmsg(
"unexpected data beyond EOF in block %u of relation %s",
2362 errhint(
"This has been seen to occur with buggy kernels; consider updating your system.")));
2378 buf_state &= ~BM_VALID;
2392 victim_buf_hdr->
tag = tag;
2432 io_start, 1, extend_by * BLCKSZ);
2446 if (first_block +
i + 1 == extend_upto)
2458 *extended_by = extend_by;
2559 buf_state = old_buf_state;
2676 buf_state = old_buf_state;
2681 if (strategy == NULL)
2700 result = (buf_state &
BM_VALID) != 0;
2856 buf_state = old_buf_state;
2881 int wait_backend_pgprocno =
buf->wait_backend_pgprocno;
2883 buf_state &= ~BM_PIN_COUNT_WAITER;
2894#define ST_SORT sort_checkpoint_bufferids
2895#define ST_ELEMENT_TYPE CkptSortItem
2896#define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b)
2897#define ST_SCOPE static
2953 for (buf_id = 0; buf_id <
NBuffers; buf_id++)
2963 if ((buf_state & mask) == mask)
2984 if (num_to_scan == 0)
2989 TRACE_POSTGRESQL_BUFFER_SYNC_START(
NBuffers, num_to_scan);
3007 for (
i = 0;
i < num_to_scan;
i++)
3018 if (last_tsid ==
InvalidOid || last_tsid != cur_tsid)
3030 if (per_ts_stat == NULL)
3035 s = &per_ts_stat[num_spaces - 1];
3036 memset(s, 0,
sizeof(*s));
3051 last_tsid = cur_tsid;
3055 s = &per_ts_stat[num_spaces - 1];
3076 for (
i = 0;
i < num_spaces;
i++)
3124 TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
3173 TRACE_POSTGRESQL_BUFFER_SYNC_DONE(
NBuffers, num_written, num_to_scan);
3191 int strategy_buf_id;
3199 static bool saved_info_valid =
false;
3200 static int prev_strategy_buf_id;
3201 static uint32 prev_strategy_passes;
3202 static int next_to_clean;
3203 static uint32 next_passes;
3206 static float smoothed_alloc = 0;
3207 static float smoothed_density = 10.0;
3210 float smoothing_samples = 16;
3211 float scan_whole_pool_milliseconds = 120000.0;
3214 long strategy_delta;
3217 float scans_per_alloc;
3218 int reusable_buffers_est;
3219 int upcoming_alloc_est;
3220 int min_scan_buffers;
3225 int reusable_buffers;
3228 long new_strategy_delta;
3247 saved_info_valid =
false;
3259 if (saved_info_valid)
3261 int32 passes_delta = strategy_passes - prev_strategy_passes;
3263 strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3264 strategy_delta += (long) passes_delta *
NBuffers;
3266 Assert(strategy_delta >= 0);
3268 if ((
int32) (next_passes - strategy_passes) > 0)
3271 bufs_to_lap = strategy_buf_id - next_to_clean;
3273 elog(
DEBUG2,
"bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3274 next_passes, next_to_clean,
3275 strategy_passes, strategy_buf_id,
3276 strategy_delta, bufs_to_lap);
3279 else if (next_passes == strategy_passes &&
3280 next_to_clean >= strategy_buf_id)
3283 bufs_to_lap =
NBuffers - (next_to_clean - strategy_buf_id);
3285 elog(
DEBUG2,
"bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3286 next_passes, next_to_clean,
3287 strategy_passes, strategy_buf_id,
3288 strategy_delta, bufs_to_lap);
3298 elog(
DEBUG2,
"bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3299 next_passes, next_to_clean,
3300 strategy_passes, strategy_buf_id,
3303 next_to_clean = strategy_buf_id;
3304 next_passes = strategy_passes;
3315 elog(
DEBUG2,
"bgwriter initializing: strategy %u-%u",
3316 strategy_passes, strategy_buf_id);
3319 next_to_clean = strategy_buf_id;
3320 next_passes = strategy_passes;
3325 prev_strategy_buf_id = strategy_buf_id;
3326 prev_strategy_passes = strategy_passes;
3327 saved_info_valid =
true;
3335 if (strategy_delta > 0 && recent_alloc > 0)
3337 scans_per_alloc = (float) strategy_delta / (
float) recent_alloc;
3338 smoothed_density += (scans_per_alloc - smoothed_density) /
3347 bufs_ahead =
NBuffers - bufs_to_lap;
3348 reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3355 if (smoothed_alloc <= (
float) recent_alloc)
3356 smoothed_alloc = recent_alloc;
3358 smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3372 if (upcoming_alloc_est == 0)
3387 if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3390 elog(
DEBUG2,
"bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3391 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3393 upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3403 num_to_scan = bufs_to_lap;
3405 reusable_buffers = reusable_buffers_est;
3408 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3436 elog(
DEBUG1,
"bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3437 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3438 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3439 bufs_to_lap - num_to_scan,
3441 reusable_buffers - reusable_buffers_est);
3452 new_strategy_delta = bufs_to_lap - num_to_scan;
3453 new_recent_alloc = reusable_buffers - reusable_buffers_est;
3454 if (new_strategy_delta > 0 && new_recent_alloc > 0)
3456 scans_per_alloc = (float) new_strategy_delta / (
float) new_recent_alloc;
3457 smoothed_density += (scans_per_alloc - smoothed_density) /
3461 elog(
DEBUG2,
"bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3462 new_recent_alloc, new_strategy_delta,
3463 scans_per_alloc, smoothed_density);
3468 return (bufs_to_lap == 0 && recent_alloc == 0);
3513 else if (skip_recently_used)
3621#ifdef USE_ASSERT_CHECKING
3622 int RefCountErrors = 0;
3657 Assert(RefCountErrors == 0);
3693 result =
psprintf(
"[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
3827 buf_state &= ~BM_JUST_DIRTIED;
3925 if (RELKIND_HAS_TABLE_AM(relation->
rd_rel->relkind))
3937 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
3939 else if (RELKIND_HAS_STORAGE(relation->
rd_rel->relkind))
4038 uint64 nBlocksToInvalidate = 0;
4047 for (
j = 0;
j < nforks;
j++)
4076 for (
i = 0;
i < nforks;
i++)
4088 nBlocksToInvalidate += (nForkBlock[
i] - firstDelBlock[
i]);
4098 for (
j = 0;
j < nforks;
j++)
4100 nForkBlock[
j], firstDelBlock[
j]);
4130 for (
j = 0;
j < nforks;
j++)
4160 uint64 nBlocksToInvalidate = 0;
4171 for (
i = 0;
i < nlocators;
i++)
4179 rels[n++] = smgr_reln[
i];
4203 for (
i = 0;
i < n && cached;
i++)
4220 nBlocksToInvalidate += block[
i][
j];
4230 for (
i = 0;
i < n;
i++)
4251 for (
i = 0;
i < n;
i++)
4252 locators[
i] = rels[
i]->smgr_rlocator.locator;
4281 for (
j = 0;
j < n;
j++)
4285 rlocator = &locators[
j];
4295 rlocator = bsearch(&locator,
4301 if (rlocator == NULL)
4331 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4335 LWLock *bufPartitionLock;
4424PrintBufferDescs(
void)
4435 "[%02d] (freeNext=%d, rel=%s, "
4436 "blockNum=%u, flags=0x%x, refcount=%u %d)",
4440 buf->tag.blockNum,
buf->flags,
4448PrintPinnedBufs(
void)
4461 "[%02d] (freeNext=%d, rel=%s, "
4462 "blockNum=%u, flags=0x%x, refcount=%u %d)",
4466 buf->tag.blockNum,
buf->flags,
4517 errcallback.
arg = bufHdr;
4533 io_start, 1, BLCKSZ);
4602 for (
i = 0;
i < nrels;
i++)
4607 srels[
i].
srel = smgrs[
i];
4635 for (
j = 0;
j < nrels;
j++)
4639 srelent = &srels[
j];
4649 srelent = bsearch(&rlocator,
4655 if (srelent == NULL)
4727 memset(
buf.data, 0, BLCKSZ);
4742 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
4749 for (blkno = 0; blkno < nblocks; blkno++)
4767 memcpy(dstPage, srcPage, BLCKSZ);
4801 char relpersistence;
4806 relpersistence = permanent ?
4807 RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
5023 bool dirtied =
false;
5024 bool delayChkptFlags =
false;
5076 delayChkptFlags =
true;
5108 if (delayChkptFlags)
5146 buf_state &= ~BM_PIN_COUNT_WAITER;
5210 elog(
ERROR,
"incorrect local pin count: %d",
5216 elog(
ERROR,
"incorrect local pin count: %d",
5243 bool logged_recovery_conflict =
false;
5275 if (logged_recovery_conflict)
5293 elog(
ERROR,
"multiple backends attempting to wait for pincount 1");
5319 if (waitStart != 0 && !logged_recovery_conflict)
5327 waitStart,
now, NULL,
true);
5328 logged_recovery_conflict =
true;
5360 buf_state &= ~BM_PIN_COUNT_WAITER;