68 #define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
69 #define BufferGetLSN(bufHdr) (PageGetLSN(BufHdrGetBlock(bufHdr)))
72 #define LocalBufHdrGetBlock(bufHdr) \
73 LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
76 #define BUF_WRITTEN 0x01
77 #define BUF_REUSABLE 0x02
79 #define RELS_BSEARCH_THRESHOLD 20
87 #define BUF_DROP_FULL_SCAN_THRESHOLD (uint64) (NBuffers / 32)
96 #define REFCOUNT_ARRAY_ENTRIES 8
237 .
name =
"buffer pin",
398 free->refcount =
res->refcount;
474 #define BufferIsPinned(bufnum) \
476 !BufferIsValid(bufnum) ? \
479 BufferIsLocal(bufnum) ? \
480 (LocalRefCount[-(bufnum) - 1] > 0) \
482 (GetPrivateRefCount(bufnum) > 0) \
512 static int SyncOneBuffer(
int buf_id,
bool skip_recently_used,
517 uint32 set_flag_bits,
bool forget_owner);
649 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
650 errmsg(
"cannot access temporary tables of other sessions")));
676 bool have_private_ref;
686 int b = -recent_buffer - 1;
711 if (have_private_ref)
723 if (have_private_ref)
734 if (!have_private_ref)
805 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
806 errmsg(
"cannot access temporary tables of other sessions")));
813 forkNum, blockNum,
mode, strategy);
837 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
890 if (bmr.
smgr == NULL)
898 buffers, extended_by);
926 if (bmr.
smgr == NULL)
982 num_pages, extend_to,
983 buffers, &extended_by);
990 if (first_block +
i != extend_to - 1)
1005 Assert(extended_by == 0);
1007 fork, extend_to - 1,
mode, strategy);
1033 need_to_zero =
false;
1035 else if (isLocalBuf)
1087 else if (!isLocalBuf)
1121 Assert((persistence == RELPERSISTENCE_TEMP ||
1122 persistence == RELPERSISTENCE_PERMANENT ||
1123 persistence == RELPERSISTENCE_UNLOGGED));
1125 if (persistence == RELPERSISTENCE_TEMP)
1136 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1142 if (persistence == RELPERSISTENCE_TEMP)
1150 bufHdr =
BufferAlloc(smgr, persistence, forkNum, blockNum,
1151 strategy, foundPtr, io_context);
1172 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1220 persistence = rel->
rd_rel->relpersistence;
1222 persistence = smgr_persistence;
1230 forkNum, blockNum, strategy, &found);
1239 operation.
smgr = smgr;
1240 operation.
rel = rel;
1260 int actual_nblocks = *nblocks;
1261 int io_buffers_len = 0;
1266 for (
int i = 0;
i < actual_nblocks; ++
i)
1286 actual_nblocks =
i + 1;
1295 *nblocks = actual_nblocks;
1297 if (
likely(io_buffers_len == 0))
1303 operation->
flags = flags;
1304 operation->
nblocks = actual_nblocks;
1415 buffers = &operation->
buffers[0];
1420 if (persistence == RELPERSISTENCE_TEMP)
1439 if (persistence == RELPERSISTENCE_TEMP)
1444 for (
int i = 0;
i < nblocks; ++
i)
1464 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum +
i,
1474 io_buffers[0] = buffers[
i];
1476 io_first_block = blocknum +
i;
1486 while ((
i + 1) < nblocks &&
1493 io_buffers[io_buffers_len] = buffers[++
i];
1498 smgrreadv(operation->
smgr, forknum, io_first_block, io_pages, io_buffers_len);
1503 for (
int j = 0;
j < io_buffers_len; ++
j)
1508 if (persistence == RELPERSISTENCE_TEMP)
1527 errmsg(
"invalid page in block %u of relation %s; zeroing out page",
1530 memset(bufBlock, 0, BLCKSZ);
1535 errmsg(
"invalid page in block %u of relation %s",
1541 if (persistence == RELPERSISTENCE_TEMP)
1555 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, io_first_block +
j,
1595 LWLock *newPartitionLock;
1596 int existing_buf_id;
1615 if (existing_buf_id >= 0)
1668 if (existing_buf_id >= 0)
1695 valid =
PinBuffer(existing_buf_hdr, strategy);
1712 return existing_buf_hdr;
1718 victim_buf_state =
LockBufHdr(victim_buf_hdr);
1724 victim_buf_hdr->
tag = newTag;
1733 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum ==
INIT_FORKNUM)
1745 return victim_buf_hdr;
1770 LWLock *oldPartitionLock;
1823 elog(
ERROR,
"buffer is pinned in InvalidateBuffer");
2012 if (strategy != NULL)
2073 #ifdef USE_ASSERT_CHECKING
2101 int max_proportional_pins;
2103 if (*additional_pins <= 1)
2107 max_proportional_pins =
NBuffers / max_backends;
2117 if (max_proportional_pins <= 0)
2118 max_proportional_pins = 1;
2120 if (*additional_pins > max_proportional_pins)
2121 *additional_pins = max_proportional_pins;
2140 TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
2149 extend_by, extend_upto,
2150 buffers, &extend_by);
2153 extend_by, extend_upto,
2154 buffers, &extend_by);
2155 *extended_by = extend_by;
2157 TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
2206 MemSet((
char *) buf_block, 0, BLCKSZ);
2239 uint32 orig_extend_by = extend_by;
2241 if (first_block > extend_upto)
2243 else if ((uint64) first_block + extend_by > extend_upto)
2244 extend_by = extend_upto - first_block;
2246 for (
uint32 i = extend_by;
i < orig_extend_by;
i++)
2262 *extended_by = extend_by;
2270 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2271 errmsg(
"cannot extend relation %s beyond %u blocks",
2283 Buffer victim_buf = buffers[
i];
2316 if (existing_id >= 0)
2326 valid =
PinBuffer(existing_hdr, strategy);
2342 (
errmsg(
"unexpected data beyond EOF in block %u of relation %s",
2344 errhint(
"This has been seen to occur with buggy kernels; consider updating your system.")));
2374 victim_buf_hdr->
tag = tag;
2414 io_start, extend_by);
2428 if (first_block +
i + 1 == extend_upto)
2440 *extended_by = extend_by;
2541 buf_state = old_buf_state;
2658 buf_state = old_buf_state;
2663 if (strategy == NULL)
2682 result = (buf_state &
BM_VALID) != 0;
2838 buf_state = old_buf_state;
2863 int wait_backend_pgprocno =
buf->wait_backend_pgprocno;
2876 #define ST_SORT sort_checkpoint_bufferids
2877 #define ST_ELEMENT_TYPE CkptSortItem
2878 #define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b)
2879 #define ST_SCOPE static
2935 for (buf_id = 0; buf_id <
NBuffers; buf_id++)
2945 if ((buf_state & mask) == mask)
2966 if (num_to_scan == 0)
2971 TRACE_POSTGRESQL_BUFFER_SYNC_START(
NBuffers, num_to_scan);
2989 for (
i = 0;
i < num_to_scan;
i++)
3000 if (last_tsid ==
InvalidOid || last_tsid != cur_tsid)
3012 if (per_ts_stat == NULL)
3017 s = &per_ts_stat[num_spaces - 1];
3018 memset(s, 0,
sizeof(*s));
3033 last_tsid = cur_tsid;
3037 s = &per_ts_stat[num_spaces - 1];
3058 for (
i = 0;
i < num_spaces;
i++)
3106 TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
3155 TRACE_POSTGRESQL_BUFFER_SYNC_DONE(
NBuffers, num_written, num_to_scan);
3173 int strategy_buf_id;
3181 static bool saved_info_valid =
false;
3182 static int prev_strategy_buf_id;
3183 static uint32 prev_strategy_passes;
3184 static int next_to_clean;
3185 static uint32 next_passes;
3188 static float smoothed_alloc = 0;
3189 static float smoothed_density = 10.0;
3192 float smoothing_samples = 16;
3193 float scan_whole_pool_milliseconds = 120000.0;
3196 long strategy_delta;
3199 float scans_per_alloc;
3200 int reusable_buffers_est;
3201 int upcoming_alloc_est;
3202 int min_scan_buffers;
3207 int reusable_buffers;
3210 long new_strategy_delta;
3229 saved_info_valid =
false;
3241 if (saved_info_valid)
3243 int32 passes_delta = strategy_passes - prev_strategy_passes;
3245 strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3246 strategy_delta += (long) passes_delta *
NBuffers;
3248 Assert(strategy_delta >= 0);
3250 if ((
int32) (next_passes - strategy_passes) > 0)
3253 bufs_to_lap = strategy_buf_id - next_to_clean;
3255 elog(
DEBUG2,
"bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3256 next_passes, next_to_clean,
3257 strategy_passes, strategy_buf_id,
3258 strategy_delta, bufs_to_lap);
3261 else if (next_passes == strategy_passes &&
3262 next_to_clean >= strategy_buf_id)
3265 bufs_to_lap =
NBuffers - (next_to_clean - strategy_buf_id);
3267 elog(
DEBUG2,
"bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3268 next_passes, next_to_clean,
3269 strategy_passes, strategy_buf_id,
3270 strategy_delta, bufs_to_lap);
3280 elog(
DEBUG2,
"bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3281 next_passes, next_to_clean,
3282 strategy_passes, strategy_buf_id,
3285 next_to_clean = strategy_buf_id;
3286 next_passes = strategy_passes;
3297 elog(
DEBUG2,
"bgwriter initializing: strategy %u-%u",
3298 strategy_passes, strategy_buf_id);
3301 next_to_clean = strategy_buf_id;
3302 next_passes = strategy_passes;
3307 prev_strategy_buf_id = strategy_buf_id;
3308 prev_strategy_passes = strategy_passes;
3309 saved_info_valid =
true;
3317 if (strategy_delta > 0 && recent_alloc > 0)
3319 scans_per_alloc = (float) strategy_delta / (
float) recent_alloc;
3320 smoothed_density += (scans_per_alloc - smoothed_density) /
3329 bufs_ahead =
NBuffers - bufs_to_lap;
3330 reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3337 if (smoothed_alloc <= (
float) recent_alloc)
3338 smoothed_alloc = recent_alloc;
3340 smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3354 if (upcoming_alloc_est == 0)
3369 if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3372 elog(
DEBUG2,
"bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3373 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3375 upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3385 num_to_scan = bufs_to_lap;
3387 reusable_buffers = reusable_buffers_est;
3390 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3418 elog(
DEBUG1,
"bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3419 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3420 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3421 bufs_to_lap - num_to_scan,
3423 reusable_buffers - reusable_buffers_est);
3434 new_strategy_delta = bufs_to_lap - num_to_scan;
3435 new_recent_alloc = reusable_buffers - reusable_buffers_est;
3436 if (new_strategy_delta > 0 && new_recent_alloc > 0)
3438 scans_per_alloc = (float) new_strategy_delta / (
float) new_recent_alloc;
3439 smoothed_density += (scans_per_alloc - smoothed_density) /
3443 elog(
DEBUG2,
"bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3444 new_recent_alloc, new_strategy_delta,
3445 scans_per_alloc, smoothed_density);
3450 return (bufs_to_lap == 0 && recent_alloc == 0);
3495 else if (skip_recently_used)
3603 #ifdef USE_ASSERT_CHECKING
3604 int RefCountErrors = 0;
3639 Assert(RefCountErrors == 0);
3675 result =
psprintf(
"[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
3786 errcallback.
arg = (
void *)
buf;
3907 if (RELKIND_HAS_TABLE_AM(relation->
rd_rel->relkind))
3919 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
3921 else if (RELKIND_HAS_STORAGE(relation->
rd_rel->relkind))
4020 uint64 nBlocksToInvalidate = 0;
4029 for (
j = 0;
j < nforks;
j++)
4058 for (
i = 0;
i < nforks;
i++)
4070 nBlocksToInvalidate += (nForkBlock[
i] - firstDelBlock[
i]);
4080 for (
j = 0;
j < nforks;
j++)
4082 nForkBlock[
j], firstDelBlock[
j]);
4112 for (
j = 0;
j < nforks;
j++)
4142 uint64 nBlocksToInvalidate = 0;
4153 for (
i = 0;
i < nlocators;
i++)
4161 rels[n++] = smgr_reln[
i];
4185 for (
i = 0;
i < n && cached;
i++)
4202 nBlocksToInvalidate += block[
i][
j];
4212 for (
i = 0;
i < n;
i++)
4233 for (
i = 0;
i < n;
i++)
4234 locators[
i] = rels[
i]->smgr_rlocator.locator;
4263 for (
j = 0;
j < n;
j++)
4267 rlocator = &locators[
j];
4277 rlocator = bsearch((
const void *) &(locator),
4283 if (rlocator == NULL)
4313 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4317 LWLock *bufPartitionLock;
4406 PrintBufferDescs(
void)
4417 "[%02d] (freeNext=%d, rel=%s, "
4418 "blockNum=%u, flags=0x%x, refcount=%u %d)",
4422 buf->tag.blockNum,
buf->flags,
4430 PrintPinnedBufs(
void)
4443 "[%02d] (freeNext=%d, rel=%s, "
4444 "blockNum=%u, flags=0x%x, refcount=%u %d)",
4448 buf->tag.blockNum,
buf->flags,
4499 errcallback.
arg = (
void *) bufHdr;
4584 for (
i = 0;
i < nrels;
i++)
4589 srels[
i].
srel = smgrs[
i];
4617 for (
j = 0;
j < nrels;
j++)
4621 srelent = &srels[
j];
4631 srelent = bsearch((
const void *) &(rlocator),
4637 if (srelent == NULL)
4709 memset(
buf.data, 0, BLCKSZ);
4724 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
4731 for (blkno = 0; blkno < nblocks; blkno++)
4749 memcpy(dstPage, srcPage, BLCKSZ);
4783 char relpersistence;
4788 relpersistence = permanent ?
4789 RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
5005 bool dirtied =
false;
5006 bool delayChkptFlags =
false;
5058 delayChkptFlags =
true;
5090 if (delayChkptFlags)
5192 elog(
ERROR,
"incorrect local pin count: %d",
5198 elog(
ERROR,
"incorrect local pin count: %d",
5225 bool logged_recovery_conflict =
false;
5257 if (logged_recovery_conflict)
5275 elog(
ERROR,
"multiple backends attempting to wait for pincount 1");
5301 if (waitStart != 0 && !logged_recovery_conflict)
5309 waitStart,
now, NULL,
true);
5310 logged_recovery_conflict =
true;
5610 buf_state |= set_flag_bits;
5662 errmsg(
"could not write block %u of %s",
5664 errdetail(
"Multiple failures --- write error might be permanent.")));
5686 errcontext(
"writing block %u of relation %s",
5706 errcontext(
"writing block %u of relation %s",