PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
slru.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * slru.c
4 * Simple LRU buffering for wrap-around-able permanent metadata
5 *
6 * This module is used to maintain various pieces of transaction status
7 * indexed by TransactionId (such as commit status, parent transaction ID,
8 * commit timestamp), as well as storage for multixacts, serializable
9 * isolation locks and NOTIFY traffic. Extensions can define their own
10 * SLRUs, too.
11 *
12 * Under ordinary circumstances we expect that write traffic will occur
13 * mostly to the latest page (and to the just-prior page, soon after a
14 * page transition). Read traffic will probably touch a larger span of
15 * pages, but a relatively small number of buffers should be sufficient.
16 *
17 * We use a simple least-recently-used scheme to manage a pool of shared
18 * page buffers, split in banks by the lowest bits of the page number, and
19 * the management algorithm only processes the bank to which the desired
20 * page belongs, so a linear search is sufficient; there's no need for a
21 * hashtable or anything fancy. The algorithm is straight LRU except that
22 * we will never swap out the latest page (since we know it's going to be
23 * hit again eventually).
24 *
25 * We use per-bank control LWLocks to protect the shared data structures,
26 * plus per-buffer LWLocks that synchronize I/O for each buffer. The
27 * bank's control lock must be held to examine or modify any of the bank's
28 * shared state. A process that is reading in or writing out a page
29 * buffer does not hold the control lock, only the per-buffer lock for the
30 * buffer it is working on. One exception is latest_page_number, which is
31 * read and written using atomic ops.
32 *
33 * "Holding the bank control lock" means exclusive lock in all cases
34 * except for SimpleLruReadPage_ReadOnly(); see comments for
35 * SlruRecentlyUsed() for the implications of that.
36 *
37 * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
38 * before releasing the control lock. The per-buffer lock is released after
39 * completing the I/O, re-acquiring the control lock, and updating the shared
40 * state. (Deadlock is not possible here, because we never try to initiate
41 * I/O when someone else is already doing I/O on the same buffer.)
42 * To wait for I/O to complete, release the control lock, acquire the
43 * per-buffer lock in shared mode, immediately release the per-buffer lock,
44 * reacquire the control lock, and then recheck state (since arbitrary things
45 * could have happened while we didn't have the lock).
46 *
47 * As with the regular buffer manager, it is possible for another process
48 * to re-dirty a page that is currently being written out. This is handled
49 * by re-setting the page's page_dirty flag.
50 *
51 *
52 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
53 * Portions Copyright (c) 1994, Regents of the University of California
54 *
55 * src/backend/access/transam/slru.c
56 *
57 *-------------------------------------------------------------------------
58 */
59#include "postgres.h"
60
61#include <fcntl.h>
62#include <sys/stat.h>
63#include <unistd.h>
64
65#include "access/slru.h"
66#include "access/transam.h"
67#include "access/xlog.h"
68#include "access/xlogutils.h"
69#include "miscadmin.h"
70#include "pgstat.h"
71#include "storage/fd.h"
72#include "storage/shmem.h"
73#include "utils/guc.h"
74
75/*
76 * Converts segment number to the filename of the segment.
77 *
78 * "path" should point to a buffer at least MAXPGPATH characters long.
79 *
80 * If ctl->long_segment_names is true, segno can be in the range [0, 2^60-1].
81 * The resulting file name is made of 15 characters, e.g. dir/123456789ABCDEF.
82 *
83 * If ctl->long_segment_names is false, segno can be in the range [0, 2^24-1].
84 * The resulting file name is made of 4 to 6 characters, as of:
85 *
86 * dir/1234 for [0, 2^16-1]
87 * dir/12345 for [2^16, 2^20-1]
88 * dir/123456 for [2^20, 2^24-1]
89 */
90static inline int
91SlruFileName(SlruCtl ctl, char *path, int64 segno)
92{
93 if (ctl->long_segment_names)
94 {
95 /*
96 * We could use 16 characters here but the disadvantage would be that
97 * the SLRU segments will be hard to distinguish from WAL segments.
98 *
99 * For this reason we use 15 characters. It is enough but also means
100 * that in the future we can't decrease SLRU_PAGES_PER_SEGMENT easily.
101 */
102 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
103 return snprintf(path, MAXPGPATH, "%s/%015" PRIX64, ctl->Dir, segno);
104 }
105 else
106 {
107 /*
108 * Despite the fact that %04X format string is used up to 24 bit
109 * integers are allowed. See SlruCorrectSegmentFilenameLength()
110 */
111 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
112 return snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir,
113 (unsigned int) segno);
114 }
115}
116
117/*
118 * During SimpleLruWriteAll(), we will usually not need to write more than one
119 * or two physical files, but we may need to write several pages per file. We
120 * can consolidate the I/O requests by leaving files open until control returns
121 * to SimpleLruWriteAll(). This data structure remembers which files are open.
122 */
123#define MAX_WRITEALL_BUFFERS 16
124
125typedef struct SlruWriteAllData
126{
127 int num_files; /* # files actually open */
128 int fd[MAX_WRITEALL_BUFFERS]; /* their FD's */
129 int64 segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */
131
133
134
135/*
136 * Bank size for the slot array. Pages are assigned a bank according to their
137 * page number, with each bank being this size. We want a power of 2 so that
138 * we can determine the bank number for a page with just bit shifting; we also
139 * want to keep the bank size small so that LRU victim search is fast. 16
140 * buffers per bank seems a good number.
141 */
142#define SLRU_BANK_BITSHIFT 4
143#define SLRU_BANK_SIZE (1 << SLRU_BANK_BITSHIFT)
144
145/*
146 * Macro to get the bank number to which the slot belongs.
147 */
148#define SlotGetBankNumber(slotno) ((slotno) >> SLRU_BANK_BITSHIFT)
149
150
151/*
152 * Populate a file tag describing a segment file. We only use the segment
153 * number, since we can derive everything else we need by having separate
154 * sync handler functions for clog, multixact etc.
155 */
156#define INIT_SLRUFILETAG(a,xx_handler,xx_segno) \
157( \
158 memset(&(a), 0, sizeof(FileTag)), \
159 (a).handler = (xx_handler), \
160 (a).segno = (xx_segno) \
161)
162
163/* Saved info for SlruReportIOError */
164typedef enum
165{
173
175static int slru_errno;
176
177
178static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
179static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
180static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
181static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno);
182static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno,
183 SlruWriteAll fdata);
184static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid);
185static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno);
186
188 int64 segpage, void *data);
190static inline void SlruRecentlyUsed(SlruShared shared, int slotno);
191
192
193/*
194 * Initialization of shared memory
195 */
196
197Size
198SimpleLruShmemSize(int nslots, int nlsns)
199{
200 int nbanks = nslots / SLRU_BANK_SIZE;
201 Size sz;
202
204 Assert(nslots % SLRU_BANK_SIZE == 0);
205
206 /* we assume nslots isn't so large as to risk overflow */
207 sz = MAXALIGN(sizeof(SlruSharedData));
208 sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
209 sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
210 sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
211 sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
212 sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
213 sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
214 sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */
215 sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */
216
217 if (nlsns > 0)
218 sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
219
220 return BUFFERALIGN(sz) + BLCKSZ * nslots;
221}
222
223/*
224 * Determine a number of SLRU buffers to use.
225 *
226 * We simply divide shared_buffers by the divisor given and cap
227 * that at the maximum given; but always at least SLRU_BANK_SIZE.
228 * Round down to the nearest multiple of SLRU_BANK_SIZE.
229 */
230int
231SimpleLruAutotuneBuffers(int divisor, int max)
232{
233 return Min(max - (max % SLRU_BANK_SIZE),
235 NBuffers / divisor - (NBuffers / divisor) % SLRU_BANK_SIZE));
236}
237
238/*
239 * Initialize, or attach to, a simple LRU cache in shared memory.
240 *
241 * ctl: address of local (unshared) control structure.
242 * name: name of SLRU. (This is user-visible, pick with care!)
243 * nslots: number of page slots to use.
244 * nlsns: number of LSN groups per page (set to zero if not relevant).
245 * subdir: PGDATA-relative subdirectory that will contain the files.
246 * buffer_tranche_id: tranche ID to use for the SLRU's per-buffer LWLocks.
247 * bank_tranche_id: tranche ID to use for the bank LWLocks.
248 * sync_handler: which set of functions to use to handle sync requests
249 */
250void
251SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
252 const char *subdir, int buffer_tranche_id, int bank_tranche_id,
253 SyncRequestHandler sync_handler, bool long_segment_names)
254{
255 SlruShared shared;
256 bool found;
257 int nbanks = nslots / SLRU_BANK_SIZE;
258
260
262 SimpleLruShmemSize(nslots, nlsns),
263 &found);
264
266 {
267 /* Initialize locks and shared memory area */
268 char *ptr;
269 Size offset;
270
271 Assert(!found);
272
273 memset(shared, 0, sizeof(SlruSharedData));
274
275 shared->num_slots = nslots;
276 shared->lsn_groups_per_page = nlsns;
277
279
281
282 ptr = (char *) shared;
283 offset = MAXALIGN(sizeof(SlruSharedData));
284 shared->page_buffer = (char **) (ptr + offset);
285 offset += MAXALIGN(nslots * sizeof(char *));
286 shared->page_status = (SlruPageStatus *) (ptr + offset);
287 offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
288 shared->page_dirty = (bool *) (ptr + offset);
289 offset += MAXALIGN(nslots * sizeof(bool));
290 shared->page_number = (int64 *) (ptr + offset);
291 offset += MAXALIGN(nslots * sizeof(int64));
292 shared->page_lru_count = (int *) (ptr + offset);
293 offset += MAXALIGN(nslots * sizeof(int));
294
295 /* Initialize LWLocks */
296 shared->buffer_locks = (LWLockPadded *) (ptr + offset);
297 offset += MAXALIGN(nslots * sizeof(LWLockPadded));
298 shared->bank_locks = (LWLockPadded *) (ptr + offset);
299 offset += MAXALIGN(nbanks * sizeof(LWLockPadded));
300 shared->bank_cur_lru_count = (int *) (ptr + offset);
301 offset += MAXALIGN(nbanks * sizeof(int));
302
303 if (nlsns > 0)
304 {
305 shared->group_lsn = (XLogRecPtr *) (ptr + offset);
306 offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
307 }
308
309 ptr += BUFFERALIGN(offset);
310 for (int slotno = 0; slotno < nslots; slotno++)
311 {
312 LWLockInitialize(&shared->buffer_locks[slotno].lock,
313 buffer_tranche_id);
314
315 shared->page_buffer[slotno] = ptr;
316 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
317 shared->page_dirty[slotno] = false;
318 shared->page_lru_count[slotno] = 0;
319 ptr += BLCKSZ;
320 }
321
322 /* Initialize the slot banks. */
323 for (int bankno = 0; bankno < nbanks; bankno++)
324 {
325 LWLockInitialize(&shared->bank_locks[bankno].lock, bank_tranche_id);
326 shared->bank_cur_lru_count[bankno] = 0;
327 }
328
329 /* Should fit to estimated shmem size */
330 Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
331 }
332 else
333 {
334 Assert(found);
335 Assert(shared->num_slots == nslots);
336 }
337
338 /*
339 * Initialize the unshared control struct, including directory path. We
340 * assume caller set PagePrecedes.
341 */
342 ctl->shared = shared;
343 ctl->sync_handler = sync_handler;
344 ctl->long_segment_names = long_segment_names;
345 ctl->nbanks = nbanks;
346 strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
347}
348
349/*
350 * Helper function for GUC check_hook to check whether slru buffers are in
351 * multiples of SLRU_BANK_SIZE.
352 */
353bool
355{
356 /* Valid values are multiples of SLRU_BANK_SIZE */
357 if (*newval % SLRU_BANK_SIZE == 0)
358 return true;
359
360 GUC_check_errdetail("\"%s\" must be a multiple of %d.", name,
362 return false;
363}
364
365/*
366 * Initialize (or reinitialize) a page to zeroes.
367 *
368 * The page is not actually written, just set up in shared memory.
369 * The slot number of the new page is returned.
370 *
371 * Bank lock must be held at entry, and will be held at exit.
372 */
373int
375{
376 SlruShared shared = ctl->shared;
377 int slotno;
378
380
381 /* Find a suitable buffer slot for the page */
382 slotno = SlruSelectLRUPage(ctl, pageno);
383 Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
384 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
385 !shared->page_dirty[slotno]) ||
386 shared->page_number[slotno] == pageno);
387
388 /* Mark the slot as containing this page */
389 shared->page_number[slotno] = pageno;
390 shared->page_status[slotno] = SLRU_PAGE_VALID;
391 shared->page_dirty[slotno] = true;
392 SlruRecentlyUsed(shared, slotno);
393
394 /* Set the buffer to zeroes */
395 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
396
397 /* Set the LSNs for this new page to zero */
398 SimpleLruZeroLSNs(ctl, slotno);
399
400 /*
401 * Assume this page is now the latest active page.
402 *
403 * Note that because both this routine and SlruSelectLRUPage run with
404 * ControlLock held, it is not possible for this to be zeroing a page that
405 * SlruSelectLRUPage is going to evict simultaneously. Therefore, there's
406 * no memory barrier here.
407 */
408 pg_atomic_write_u64(&shared->latest_page_number, pageno);
409
410 /* update the stats counter of zeroed pages */
412
413 return slotno;
414}
415
416/*
417 * Zero all the LSNs we store for this slru page.
418 *
419 * This should be called each time we create a new page, and each time we read
420 * in a page from disk into an existing buffer. (Such an old page cannot
421 * have any interesting LSNs, since we'd have flushed them before writing
422 * the page in the first place.)
423 *
424 * This assumes that InvalidXLogRecPtr is bitwise-all-0.
425 */
426static void
428{
429 SlruShared shared = ctl->shared;
430
431 if (shared->lsn_groups_per_page > 0)
432 MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
433 shared->lsn_groups_per_page * sizeof(XLogRecPtr));
434}
435
436/*
437 * Wait for any active I/O on a page slot to finish. (This does not
438 * guarantee that new I/O hasn't been started before we return, though.
439 * In fact the slot might not even contain the same page anymore.)
440 *
441 * Bank lock must be held at entry, and will be held at exit.
442 */
443static void
445{
446 SlruShared shared = ctl->shared;
447 int bankno = SlotGetBankNumber(slotno);
448
449 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
450
451 /* See notes at top of file */
452 LWLockRelease(&shared->bank_locks[bankno].lock);
453 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
454 LWLockRelease(&shared->buffer_locks[slotno].lock);
455 LWLockAcquire(&shared->bank_locks[bankno].lock, LW_EXCLUSIVE);
456
457 /*
458 * If the slot is still in an io-in-progress state, then either someone
459 * already started a new I/O on the slot, or a previous I/O failed and
460 * neglected to reset the page state. That shouldn't happen, really, but
461 * it seems worth a few extra cycles to check and recover from it. We can
462 * cheaply test for failure by seeing if the buffer lock is still held (we
463 * assume that transaction abort would release the lock).
464 */
465 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
466 shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
467 {
469 {
470 /* indeed, the I/O must have failed */
471 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
472 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
473 else /* write_in_progress */
474 {
475 shared->page_status[slotno] = SLRU_PAGE_VALID;
476 shared->page_dirty[slotno] = true;
477 }
478 LWLockRelease(&shared->buffer_locks[slotno].lock);
479 }
480 }
481}
482
483/*
484 * Find a page in a shared buffer, reading it in if necessary.
485 * The page number must correspond to an already-initialized page.
486 *
487 * If write_ok is true then it is OK to return a page that is in
488 * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
489 * that modification of the page is safe. If write_ok is false then we
490 * will not return the page until it is not undergoing active I/O.
491 *
492 * The passed-in xid is used only for error reporting, and may be
493 * InvalidTransactionId if no specific xid is associated with the action.
494 *
495 * Return value is the shared-buffer slot number now holding the page.
496 * The buffer's LRU access info is updated.
497 *
498 * The correct bank lock must be held at entry, and will be held at exit.
499 */
500int
501SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok,
502 TransactionId xid)
503{
504 SlruShared shared = ctl->shared;
505 LWLock *banklock = SimpleLruGetBankLock(ctl, pageno);
506
508
509 /* Outer loop handles restart if we must wait for someone else's I/O */
510 for (;;)
511 {
512 int slotno;
513 bool ok;
514
515 /* See if page already is in memory; if not, pick victim slot */
516 slotno = SlruSelectLRUPage(ctl, pageno);
517
518 /* Did we find the page in memory? */
519 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
520 shared->page_number[slotno] == pageno)
521 {
522 /*
523 * If page is still being read in, we must wait for I/O. Likewise
524 * if the page is being written and the caller said that's not OK.
525 */
526 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
527 (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
528 !write_ok))
529 {
530 SimpleLruWaitIO(ctl, slotno);
531 /* Now we must recheck state from the top */
532 continue;
533 }
534 /* Otherwise, it's ready to use */
535 SlruRecentlyUsed(shared, slotno);
536
537 /* update the stats counter of pages found in the SLRU */
539
540 return slotno;
541 }
542
543 /* We found no match; assert we selected a freeable slot */
544 Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
545 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
546 !shared->page_dirty[slotno]));
547
548 /* Mark the slot read-busy */
549 shared->page_number[slotno] = pageno;
550 shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
551 shared->page_dirty[slotno] = false;
552
553 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
554 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
555
556 /* Release bank lock while doing I/O */
557 LWLockRelease(banklock);
558
559 /* Do the read */
560 ok = SlruPhysicalReadPage(ctl, pageno, slotno);
561
562 /* Set the LSNs for this newly read-in page to zero */
563 SimpleLruZeroLSNs(ctl, slotno);
564
565 /* Re-acquire bank control lock and update page state */
566 LWLockAcquire(banklock, LW_EXCLUSIVE);
567
568 Assert(shared->page_number[slotno] == pageno &&
569 shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
570 !shared->page_dirty[slotno]);
571
572 shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
573
574 LWLockRelease(&shared->buffer_locks[slotno].lock);
575
576 /* Now it's okay to ereport if we failed */
577 if (!ok)
578 SlruReportIOError(ctl, pageno, xid);
579
580 SlruRecentlyUsed(shared, slotno);
581
582 /* update the stats counter of pages not found in SLRU */
584
585 return slotno;
586 }
587}
588
589/*
590 * Find a page in a shared buffer, reading it in if necessary.
591 * The page number must correspond to an already-initialized page.
592 * The caller must intend only read-only access to the page.
593 *
594 * The passed-in xid is used only for error reporting, and may be
595 * InvalidTransactionId if no specific xid is associated with the action.
596 *
597 * Return value is the shared-buffer slot number now holding the page.
598 * The buffer's LRU access info is updated.
599 *
600 * Bank control lock must NOT be held at entry, but will be held at exit.
601 * It is unspecified whether the lock will be shared or exclusive.
602 */
603int
605{
606 SlruShared shared = ctl->shared;
607 LWLock *banklock = SimpleLruGetBankLock(ctl, pageno);
608 int bankno = pageno % ctl->nbanks;
609 int bankstart = bankno * SLRU_BANK_SIZE;
610 int bankend = bankstart + SLRU_BANK_SIZE;
611
612 /* Try to find the page while holding only shared lock */
613 LWLockAcquire(banklock, LW_SHARED);
614
615 /* See if page is already in a buffer */
616 for (int slotno = bankstart; slotno < bankend; slotno++)
617 {
618 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
619 shared->page_number[slotno] == pageno &&
620 shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
621 {
622 /* See comments for SlruRecentlyUsed macro */
623 SlruRecentlyUsed(shared, slotno);
624
625 /* update the stats counter of pages found in the SLRU */
627
628 return slotno;
629 }
630 }
631
632 /* No luck, so switch to normal exclusive lock and do regular read */
633 LWLockRelease(banklock);
634 LWLockAcquire(banklock, LW_EXCLUSIVE);
635
636 return SimpleLruReadPage(ctl, pageno, true, xid);
637}
638
639/*
640 * Write a page from a shared buffer, if necessary.
641 * Does nothing if the specified slot is not dirty.
642 *
643 * NOTE: only one write attempt is made here. Hence, it is possible that
644 * the page is still dirty at exit (if someone else re-dirtied it during
645 * the write). However, we *do* attempt a fresh write even if the page
646 * is already being written; this is for checkpoints.
647 *
648 * Bank lock must be held at entry, and will be held at exit.
649 */
650static void
652{
653 SlruShared shared = ctl->shared;
654 int64 pageno = shared->page_number[slotno];
655 int bankno = SlotGetBankNumber(slotno);
656 bool ok;
657
658 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
660
661 /* If a write is in progress, wait for it to finish */
662 while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
663 shared->page_number[slotno] == pageno)
664 {
665 SimpleLruWaitIO(ctl, slotno);
666 }
667
668 /*
669 * Do nothing if page is not dirty, or if buffer no longer contains the
670 * same page we were called for.
671 */
672 if (!shared->page_dirty[slotno] ||
673 shared->page_status[slotno] != SLRU_PAGE_VALID ||
674 shared->page_number[slotno] != pageno)
675 return;
676
677 /*
678 * Mark the slot write-busy, and clear the dirtybit. After this point, a
679 * transaction status update on this page will mark it dirty again.
680 */
682 shared->page_dirty[slotno] = false;
683
684 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
685 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
686
687 /* Release bank lock while doing I/O */
688 LWLockRelease(&shared->bank_locks[bankno].lock);
689
690 /* Do the write */
691 ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
692
693 /* If we failed, and we're in a flush, better close the files */
694 if (!ok && fdata)
695 {
696 for (int i = 0; i < fdata->num_files; i++)
697 CloseTransientFile(fdata->fd[i]);
698 }
699
700 /* Re-acquire bank lock and update page state */
701 LWLockAcquire(&shared->bank_locks[bankno].lock, LW_EXCLUSIVE);
702
703 Assert(shared->page_number[slotno] == pageno &&
704 shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
705
706 /* If we failed to write, mark the page dirty again */
707 if (!ok)
708 shared->page_dirty[slotno] = true;
709
710 shared->page_status[slotno] = SLRU_PAGE_VALID;
711
712 LWLockRelease(&shared->buffer_locks[slotno].lock);
713
714 /* Now it's okay to ereport if we failed */
715 if (!ok)
717
718 /* If part of a checkpoint, count this as a SLRU buffer written. */
719 if (fdata)
720 {
723 }
724}
725
726/*
727 * Wrapper of SlruInternalWritePage, for external callers.
728 * fdata is always passed a NULL here.
729 */
730void
732{
733 Assert(ctl->shared->page_status[slotno] != SLRU_PAGE_EMPTY);
734
735 SlruInternalWritePage(ctl, slotno, NULL);
736}
737
738/*
739 * Return whether the given page exists on disk.
740 *
741 * A false return means that either the file does not exist, or that it's not
742 * large enough to contain the given page.
743 */
744bool
746{
748 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
749 int offset = rpageno * BLCKSZ;
750 char path[MAXPGPATH];
751 int fd;
752 bool result;
753 off_t endpos;
754
755 /* update the stats counter of checked pages */
756 pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
757
758 SlruFileName(ctl, path, segno);
759
760 fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
761 if (fd < 0)
762 {
763 /* expected: file doesn't exist */
764 if (errno == ENOENT)
765 return false;
766
767 /* report error normally */
769 slru_errno = errno;
770 SlruReportIOError(ctl, pageno, 0);
771 }
772
773 if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
774 {
776 slru_errno = errno;
777 SlruReportIOError(ctl, pageno, 0);
778 }
779
780 result = endpos >= (off_t) (offset + BLCKSZ);
781
782 if (CloseTransientFile(fd) != 0)
783 {
785 slru_errno = errno;
786 return false;
787 }
788
789 return result;
790}
791
792/*
793 * Physical read of a (previously existing) page into a buffer slot
794 *
795 * On failure, we cannot just ereport(ERROR) since caller has put state in
796 * shared memory that must be undone. So, we return false and save enough
797 * info in static variables to let SlruReportIOError make the report.
798 *
799 * For now, assume it's not worth keeping a file pointer open across
800 * read/write operations. We could cache one virtual file pointer ...
801 */
802static bool
804{
805 SlruShared shared = ctl->shared;
807 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
808 off_t offset = rpageno * BLCKSZ;
809 char path[MAXPGPATH];
810 int fd;
811
812 SlruFileName(ctl, path, segno);
813
814 /*
815 * In a crash-and-restart situation, it's possible for us to receive
816 * commands to set the commit status of transactions whose bits are in
817 * already-truncated segments of the commit log (see notes in
818 * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
819 * where the file doesn't exist, and return zeroes instead.
820 */
821 fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
822 if (fd < 0)
823 {
824 if (errno != ENOENT || !InRecovery)
825 {
827 slru_errno = errno;
828 return false;
829 }
830
831 ereport(LOG,
832 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
833 path)));
834 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
835 return true;
836 }
837
838 errno = 0;
839 pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
840 if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
841 {
844 slru_errno = errno;
846 return false;
847 }
849
850 if (CloseTransientFile(fd) != 0)
851 {
853 slru_errno = errno;
854 return false;
855 }
856
857 return true;
858}
859
860/*
861 * Physical write of a page from a buffer slot
862 *
863 * On failure, we cannot just ereport(ERROR) since caller has put state in
864 * shared memory that must be undone. So, we return false and save enough
865 * info in static variables to let SlruReportIOError make the report.
866 *
867 * For now, assume it's not worth keeping a file pointer open across
868 * independent read/write operations. We do batch operations during
869 * SimpleLruWriteAll, though.
870 *
871 * fdata is NULL for a standalone write, pointer to open-file info during
872 * SimpleLruWriteAll.
873 */
874static bool
876{
877 SlruShared shared = ctl->shared;
879 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
880 off_t offset = rpageno * BLCKSZ;
881 char path[MAXPGPATH];
882 int fd = -1;
883
884 /* update the stats counter of written pages */
886
887 /*
888 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
889 * write out data before associated WAL records. This is the same action
890 * performed during FlushBuffer() in the main buffer manager.
891 */
892 if (shared->group_lsn != NULL)
893 {
894 /*
895 * We must determine the largest async-commit LSN for the page. This
896 * is a bit tedious, but since this entire function is a slow path
897 * anyway, it seems better to do this here than to maintain a per-page
898 * LSN variable (which'd need an extra comparison in the
899 * transaction-commit path).
900 */
901 XLogRecPtr max_lsn;
902 int lsnindex;
903
904 lsnindex = slotno * shared->lsn_groups_per_page;
905 max_lsn = shared->group_lsn[lsnindex++];
906 for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
907 {
908 XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
909
910 if (max_lsn < this_lsn)
911 max_lsn = this_lsn;
912 }
913
914 if (!XLogRecPtrIsInvalid(max_lsn))
915 {
916 /*
917 * As noted above, elog(ERROR) is not acceptable here, so if
918 * XLogFlush were to fail, we must PANIC. This isn't much of a
919 * restriction because XLogFlush is just about all critical
920 * section anyway, but let's make sure.
921 */
923 XLogFlush(max_lsn);
925 }
926 }
927
928 /*
929 * During a SimpleLruWriteAll, we may already have the desired file open.
930 */
931 if (fdata)
932 {
933 for (int i = 0; i < fdata->num_files; i++)
934 {
935 if (fdata->segno[i] == segno)
936 {
937 fd = fdata->fd[i];
938 break;
939 }
940 }
941 }
942
943 if (fd < 0)
944 {
945 /*
946 * If the file doesn't already exist, we should create it. It is
947 * possible for this to need to happen when writing a page that's not
948 * first in its segment; we assume the OS can cope with that. (Note:
949 * it might seem that it'd be okay to create files only when
950 * SimpleLruZeroPage is called for the first page of a segment.
951 * However, if after a crash and restart the REDO logic elects to
952 * replay the log from a checkpoint before the latest one, then it's
953 * possible that we will get commands to set transaction status of
954 * transactions that have already been truncated from the commit log.
955 * Easiest way to deal with that is to accept references to
956 * nonexistent files here and in SlruPhysicalReadPage.)
957 *
958 * Note: it is possible for more than one backend to be executing this
959 * code simultaneously for different pages of the same file. Hence,
960 * don't use O_EXCL or O_TRUNC or anything like that.
961 */
962 SlruFileName(ctl, path, segno);
963 fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
964 if (fd < 0)
965 {
967 slru_errno = errno;
968 return false;
969 }
970
971 if (fdata)
972 {
973 if (fdata->num_files < MAX_WRITEALL_BUFFERS)
974 {
975 fdata->fd[fdata->num_files] = fd;
976 fdata->segno[fdata->num_files] = segno;
977 fdata->num_files++;
978 }
979 else
980 {
981 /*
982 * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
983 * fall back to treating it as a standalone write.
984 */
985 fdata = NULL;
986 }
987 }
988 }
989
990 errno = 0;
991 pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
992 if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
993 {
995 /* if write didn't set errno, assume problem is no disk space */
996 if (errno == 0)
997 errno = ENOSPC;
999 slru_errno = errno;
1000 if (!fdata)
1002 return false;
1003 }
1005
1006 /* Queue up a sync request for the checkpointer. */
1007 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1008 {
1009 FileTag tag;
1010
1011 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1012 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
1013 {
1014 /* No space to enqueue sync request. Do it synchronously. */
1015 pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
1016 if (pg_fsync(fd) != 0)
1017 {
1020 slru_errno = errno;
1022 return false;
1023 }
1025 }
1026 }
1027
1028 /* Close file, unless part of flush request. */
1029 if (!fdata)
1030 {
1031 if (CloseTransientFile(fd) != 0)
1032 {
1034 slru_errno = errno;
1035 return false;
1036 }
1037 }
1038
1039 return true;
1040}
1041
1042/*
1043 * Issue the error message after failure of SlruPhysicalReadPage or
1044 * SlruPhysicalWritePage. Call this after cleaning up shared-memory state.
1045 */
1046static void
1048{
1050 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
1051 int offset = rpageno * BLCKSZ;
1052 char path[MAXPGPATH];
1053
1054 SlruFileName(ctl, path, segno);
1055 errno = slru_errno;
1056 switch (slru_errcause)
1057 {
1058 case SLRU_OPEN_FAILED:
1059 ereport(ERROR,
1061 errmsg("could not access status of transaction %u", xid),
1062 errdetail("Could not open file \"%s\": %m.", path)));
1063 break;
1064 case SLRU_SEEK_FAILED:
1065 ereport(ERROR,
1067 errmsg("could not access status of transaction %u", xid),
1068 errdetail("Could not seek in file \"%s\" to offset %d: %m.",
1069 path, offset)));
1070 break;
1071 case SLRU_READ_FAILED:
1072 if (errno)
1073 ereport(ERROR,
1075 errmsg("could not access status of transaction %u", xid),
1076 errdetail("Could not read from file \"%s\" at offset %d: %m.",
1077 path, offset)));
1078 else
1079 ereport(ERROR,
1080 (errmsg("could not access status of transaction %u", xid),
1081 errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
1082 break;
1083 case SLRU_WRITE_FAILED:
1084 if (errno)
1085 ereport(ERROR,
1087 errmsg("could not access status of transaction %u", xid),
1088 errdetail("Could not write to file \"%s\" at offset %d: %m.",
1089 path, offset)));
1090 else
1091 ereport(ERROR,
1092 (errmsg("could not access status of transaction %u", xid),
1093 errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1094 path, offset)));
1095 break;
1096 case SLRU_FSYNC_FAILED:
1099 errmsg("could not access status of transaction %u", xid),
1100 errdetail("Could not fsync file \"%s\": %m.",
1101 path)));
1102 break;
1103 case SLRU_CLOSE_FAILED:
1104 ereport(ERROR,
1106 errmsg("could not access status of transaction %u", xid),
1107 errdetail("Could not close file \"%s\": %m.",
1108 path)));
1109 break;
1110 default:
1111 /* can't get here, we trust */
1112 elog(ERROR, "unrecognized SimpleLru error cause: %d",
1113 (int) slru_errcause);
1114 break;
1115 }
1116}
1117
1118/*
1119 * Mark a buffer slot "most recently used".
1120 */
1121static inline void
1122SlruRecentlyUsed(SlruShared shared, int slotno)
1123{
1124 int bankno = SlotGetBankNumber(slotno);
1125 int new_lru_count = shared->bank_cur_lru_count[bankno];
1126
1127 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
1128
1129 /*
1130 * The reason for the if-test is that there are often many consecutive
1131 * accesses to the same page (particularly the latest page). By
1132 * suppressing useless increments of bank_cur_lru_count, we reduce the
1133 * probability that old pages' counts will "wrap around" and make them
1134 * appear recently used.
1135 *
1136 * We allow this code to be executed concurrently by multiple processes
1137 * within SimpleLruReadPage_ReadOnly(). As long as int reads and writes
1138 * are atomic, this should not cause any completely-bogus values to enter
1139 * the computation. However, it is possible for either bank_cur_lru_count
1140 * or individual page_lru_count entries to be "reset" to lower values than
1141 * they should have, in case a process is delayed while it executes this
1142 * function. With care in SlruSelectLRUPage(), this does little harm, and
1143 * in any case the absolute worst possible consequence is a nonoptimal
1144 * choice of page to evict. The gain from allowing concurrent reads of
1145 * SLRU pages seems worth it.
1146 */
1147 if (new_lru_count != shared->page_lru_count[slotno])
1148 {
1149 shared->bank_cur_lru_count[bankno] = ++new_lru_count;
1150 shared->page_lru_count[slotno] = new_lru_count;
1151 }
1152}
1153
1154/*
1155 * Select the slot to re-use when we need a free slot for the given page.
1156 *
1157 * The target page number is passed not only because we need to know the
1158 * correct bank to use, but also because we need to consider the possibility
1159 * that some other process reads in the target page while we are doing I/O to
1160 * free a slot. Hence, check or recheck to see if any slot already holds the
1161 * target page, and return that slot if so. Thus, the returned slot is
1162 * *either* a slot already holding the pageno (could be any state except
1163 * EMPTY), *or* a freeable slot (state EMPTY or CLEAN).
1164 *
1165 * The correct bank lock must be held at entry, and will be held at exit.
1166 */
1167static int
1169{
1170 SlruShared shared = ctl->shared;
1171
1172 /* Outer loop handles restart after I/O */
1173 for (;;)
1174 {
1175 int cur_count;
1176 int bestvalidslot = 0; /* keep compiler quiet */
1177 int best_valid_delta = -1;
1178 int64 best_valid_page_number = 0; /* keep compiler quiet */
1179 int bestinvalidslot = 0; /* keep compiler quiet */
1180 int best_invalid_delta = -1;
1181 int64 best_invalid_page_number = 0; /* keep compiler quiet */
1182 int bankno = pageno % ctl->nbanks;
1183 int bankstart = bankno * SLRU_BANK_SIZE;
1184 int bankend = bankstart + SLRU_BANK_SIZE;
1185
1187
1188 /* See if page already has a buffer assigned */
1189 for (int slotno = bankstart; slotno < bankend; slotno++)
1190 {
1191 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
1192 shared->page_number[slotno] == pageno)
1193 return slotno;
1194 }
1195
1196 /*
1197 * If we find any EMPTY slot, just select that one. Else choose a
1198 * victim page to replace. We normally take the least recently used
1199 * valid page, but we will never take the slot containing
1200 * latest_page_number, even if it appears least recently used. We
1201 * will select a slot that is already I/O busy only if there is no
1202 * other choice: a read-busy slot will not be least recently used once
1203 * the read finishes, and waiting for an I/O on a write-busy slot is
1204 * inferior to just picking some other slot. Testing shows the slot
1205 * we pick instead will often be clean, allowing us to begin a read at
1206 * once.
1207 *
1208 * Normally the page_lru_count values will all be different and so
1209 * there will be a well-defined LRU page. But since we allow
1210 * concurrent execution of SlruRecentlyUsed() within
1211 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1212 * acquire the same lru_count values. In that case we break ties by
1213 * choosing the furthest-back page.
1214 *
1215 * Notice that this next line forcibly advances cur_lru_count to a
1216 * value that is certainly beyond any value that will be in the
1217 * page_lru_count array after the loop finishes. This ensures that
1218 * the next execution of SlruRecentlyUsed will mark the page newly
1219 * used, even if it's for a page that has the current counter value.
1220 * That gets us back on the path to having good data when there are
1221 * multiple pages with the same lru_count.
1222 */
1223 cur_count = (shared->bank_cur_lru_count[bankno])++;
1224 for (int slotno = bankstart; slotno < bankend; slotno++)
1225 {
1226 int this_delta;
1227 int64 this_page_number;
1228
1229 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1230 return slotno;
1231
1232 this_delta = cur_count - shared->page_lru_count[slotno];
1233 if (this_delta < 0)
1234 {
1235 /*
1236 * Clean up in case shared updates have caused cur_count
1237 * increments to get "lost". We back off the page counts,
1238 * rather than trying to increase cur_count, to avoid any
1239 * question of infinite loops or failure in the presence of
1240 * wrapped-around counts.
1241 */
1242 shared->page_lru_count[slotno] = cur_count;
1243 this_delta = 0;
1244 }
1245
1246 /*
1247 * If this page is the one most recently zeroed, don't consider it
1248 * an eviction candidate. See comments in SimpleLruZeroPage for an
1249 * explanation about the lack of a memory barrier here.
1250 */
1251 this_page_number = shared->page_number[slotno];
1252 if (this_page_number ==
1254 continue;
1255
1256 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1257 {
1258 if (this_delta > best_valid_delta ||
1259 (this_delta == best_valid_delta &&
1260 ctl->PagePrecedes(this_page_number,
1261 best_valid_page_number)))
1262 {
1263 bestvalidslot = slotno;
1264 best_valid_delta = this_delta;
1265 best_valid_page_number = this_page_number;
1266 }
1267 }
1268 else
1269 {
1270 if (this_delta > best_invalid_delta ||
1271 (this_delta == best_invalid_delta &&
1272 ctl->PagePrecedes(this_page_number,
1273 best_invalid_page_number)))
1274 {
1275 bestinvalidslot = slotno;
1276 best_invalid_delta = this_delta;
1277 best_invalid_page_number = this_page_number;
1278 }
1279 }
1280 }
1281
1282 /*
1283 * If all pages (except possibly the latest one) are I/O busy, we'll
1284 * have to wait for an I/O to complete and then retry. In that
1285 * unhappy case, we choose to wait for the I/O on the least recently
1286 * used slot, on the assumption that it was likely initiated first of
1287 * all the I/Os in progress and may therefore finish first.
1288 */
1289 if (best_valid_delta < 0)
1290 {
1291 SimpleLruWaitIO(ctl, bestinvalidslot);
1292 continue;
1293 }
1294
1295 /*
1296 * If the selected page is clean, we're set.
1297 */
1298 if (!shared->page_dirty[bestvalidslot])
1299 return bestvalidslot;
1300
1301 /*
1302 * Write the page.
1303 */
1304 SlruInternalWritePage(ctl, bestvalidslot, NULL);
1305
1306 /*
1307 * Now loop back and try again. This is the easiest way of dealing
1308 * with corner cases such as the victim page being re-dirtied while we
1309 * wrote it.
1310 */
1311 }
1312}
1313
1314/*
1315 * Write dirty pages to disk during checkpoint or database shutdown. Flushing
1316 * is deferred until the next call to ProcessSyncRequests(), though we do fsync
1317 * the containing directory here to make sure that newly created directory
1318 * entries are on disk.
1319 */
1320void
1321SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
1322{
1323 SlruShared shared = ctl->shared;
1324 SlruWriteAllData fdata;
1325 int64 pageno = 0;
1326 int prevbank = SlotGetBankNumber(0);
1327 bool ok;
1328
1329 /* update the stats counter of flushes */
1331
1332 /*
1333 * Find and write dirty pages
1334 */
1335 fdata.num_files = 0;
1336
1337 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1338
1339 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1340 {
1341 int curbank = SlotGetBankNumber(slotno);
1342
1343 /*
1344 * If the current bank lock is not same as the previous bank lock then
1345 * release the previous lock and acquire the new lock.
1346 */
1347 if (curbank != prevbank)
1348 {
1349 LWLockRelease(&shared->bank_locks[prevbank].lock);
1350 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1351 prevbank = curbank;
1352 }
1353
1354 /* Do nothing if slot is unused */
1355 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1356 continue;
1357
1358 SlruInternalWritePage(ctl, slotno, &fdata);
1359
1360 /*
1361 * In some places (e.g. checkpoints), we cannot assert that the slot
1362 * is clean now, since another process might have re-dirtied it
1363 * already. That's okay.
1364 */
1365 Assert(allow_redirtied ||
1366 shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1367 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1368 !shared->page_dirty[slotno]));
1369 }
1370
1371 LWLockRelease(&shared->bank_locks[prevbank].lock);
1372
1373 /*
1374 * Now close any files that were open
1375 */
1376 ok = true;
1377 for (int i = 0; i < fdata.num_files; i++)
1378 {
1379 if (CloseTransientFile(fdata.fd[i]) != 0)
1380 {
1382 slru_errno = errno;
1383 pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1384 ok = false;
1385 }
1386 }
1387 if (!ok)
1389
1390 /* Ensure that directory entries for new files are on disk. */
1391 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1392 fsync_fname(ctl->Dir, true);
1393}
1394
1395/*
1396 * Remove all segments before the one holding the passed page number
1397 *
1398 * All SLRUs prevent concurrent calls to this function, either with an LWLock
1399 * or by calling it only as part of a checkpoint. Mutual exclusion must begin
1400 * before computing cutoffPage. Mutual exclusion must end after any limit
1401 * update that would permit other backends to write fresh data into the
1402 * segment immediately preceding the one containing cutoffPage. Otherwise,
1403 * when the SLRU is quite full, SimpleLruTruncate() might delete that segment
1404 * after it has accrued freshly-written data.
1405 */
1406void
1408{
1409 SlruShared shared = ctl->shared;
1410 int prevbank;
1411
1412 /* update the stats counter of truncates */
1414
1415 /*
1416 * Scan shared memory and remove any pages preceding the cutoff page, to
1417 * ensure we won't rewrite them later. (Since this is normally called in
1418 * or just after a checkpoint, any dirty pages should have been flushed
1419 * already ... we're just being extra careful here.)
1420 */
1421restart:
1422
1423 /*
1424 * An important safety check: the current endpoint page must not be
1425 * eligible for removal. This check is just a backstop against wraparound
1426 * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1427 * outdated value; therefore we don't add a memory barrier.
1428 */
1429 if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
1430 cutoffPage))
1431 {
1432 ereport(LOG,
1433 (errmsg("could not truncate directory \"%s\": apparent wraparound",
1434 ctl->Dir)));
1435 return;
1436 }
1437
1438 prevbank = SlotGetBankNumber(0);
1439 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1440 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1441 {
1442 int curbank = SlotGetBankNumber(slotno);
1443
1444 /*
1445 * If the current bank lock is not same as the previous bank lock then
1446 * release the previous lock and acquire the new lock.
1447 */
1448 if (curbank != prevbank)
1449 {
1450 LWLockRelease(&shared->bank_locks[prevbank].lock);
1451 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1452 prevbank = curbank;
1453 }
1454
1455 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1456 continue;
1457 if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1458 continue;
1459
1460 /*
1461 * If page is clean, just change state to EMPTY (expected case).
1462 */
1463 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1464 !shared->page_dirty[slotno])
1465 {
1466 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1467 continue;
1468 }
1469
1470 /*
1471 * Hmm, we have (or may have) I/O operations acting on the page, so
1472 * we've got to wait for them to finish and then start again. This is
1473 * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1474 * wouldn't it be OK to just discard it without writing it?
1475 * SlruMayDeleteSegment() uses a stricter qualification, so we might
1476 * not delete this page in the end; even if we don't delete it, we
1477 * won't have cause to read its data again. For now, keep the logic
1478 * the same as it was.)
1479 */
1480 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1481 SlruInternalWritePage(ctl, slotno, NULL);
1482 else
1483 SimpleLruWaitIO(ctl, slotno);
1484
1485 LWLockRelease(&shared->bank_locks[prevbank].lock);
1486 goto restart;
1487 }
1488
1489 LWLockRelease(&shared->bank_locks[prevbank].lock);
1490
1491 /* Now we can remove the old segment(s) */
1492 (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1493}
1494
1495/*
1496 * Delete an individual SLRU segment.
1497 *
1498 * NB: This does not touch the SLRU buffers themselves, callers have to ensure
1499 * they either can't yet contain anything, or have already been cleaned out.
1500 */
1501static void
1503{
1504 char path[MAXPGPATH];
1505
1506 /* Forget any fsync requests queued for this segment. */
1507 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1508 {
1509 FileTag tag;
1510
1511 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1513 }
1514
1515 /* Unlink the file. */
1516 SlruFileName(ctl, path, segno);
1517 ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1518 unlink(path);
1519}
1520
1521/*
1522 * Delete an individual SLRU segment, identified by the segment number.
1523 */
1524void
1526{
1527 SlruShared shared = ctl->shared;
1528 int prevbank = SlotGetBankNumber(0);
1529 bool did_write;
1530
1531 /* Clean out any possibly existing references to the segment. */
1532 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1533restart:
1534 did_write = false;
1535 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1536 {
1537 int64 pagesegno;
1538 int curbank = SlotGetBankNumber(slotno);
1539
1540 /*
1541 * If the current bank lock is not same as the previous bank lock then
1542 * release the previous lock and acquire the new lock.
1543 */
1544 if (curbank != prevbank)
1545 {
1546 LWLockRelease(&shared->bank_locks[prevbank].lock);
1547 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1548 prevbank = curbank;
1549 }
1550
1551 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1552 continue;
1553
1554 pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1555 /* not the segment we're looking for */
1556 if (pagesegno != segno)
1557 continue;
1558
1559 /* If page is clean, just change state to EMPTY (expected case). */
1560 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1561 !shared->page_dirty[slotno])
1562 {
1563 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1564 continue;
1565 }
1566
1567 /* Same logic as SimpleLruTruncate() */
1568 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1569 SlruInternalWritePage(ctl, slotno, NULL);
1570 else
1571 SimpleLruWaitIO(ctl, slotno);
1572
1573 did_write = true;
1574 }
1575
1576 /*
1577 * Be extra careful and re-check. The IO functions release the control
1578 * lock, so new pages could have been read in.
1579 */
1580 if (did_write)
1581 goto restart;
1582
1584
1585 LWLockRelease(&shared->bank_locks[prevbank].lock);
1586}
1587
1588/*
1589 * Determine whether a segment is okay to delete.
1590 *
1591 * segpage is the first page of the segment, and cutoffPage is the oldest (in
1592 * PagePrecedes order) page in the SLRU containing still-useful data. Since
1593 * every core PagePrecedes callback implements "wrap around", check the
1594 * segment's first and last pages:
1595 *
1596 * first<cutoff && last<cutoff: yes
1597 * first<cutoff && last>=cutoff: no; cutoff falls inside this segment
1598 * first>=cutoff && last<cutoff: no; wrap point falls inside this segment
1599 * first>=cutoff && last>=cutoff: no; every page of this segment is too young
1600 */
1601static bool
1603{
1604 int64 seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
1605
1606 Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
1607
1608 return (ctl->PagePrecedes(segpage, cutoffPage) &&
1609 ctl->PagePrecedes(seg_last_page, cutoffPage));
1610}
1611
1612#ifdef USE_ASSERT_CHECKING
1613static void
1614SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
1615{
1616 TransactionId lhs,
1617 rhs;
1618 int64 newestPage,
1619 oldestPage;
1620 TransactionId newestXact,
1621 oldestXact;
1622
1623 /*
1624 * Compare an XID pair having undefined order (see RFC 1982), a pair at
1625 * "opposite ends" of the XID space. TransactionIdPrecedes() treats each
1626 * as preceding the other. If RHS is oldestXact, LHS is the first XID we
1627 * must not assign.
1628 */
1629 lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */
1630 rhs = lhs + (1U << 31);
1631 Assert(TransactionIdPrecedes(lhs, rhs));
1632 Assert(TransactionIdPrecedes(rhs, lhs));
1633 Assert(!TransactionIdPrecedes(lhs - 1, rhs));
1634 Assert(TransactionIdPrecedes(rhs, lhs - 1));
1635 Assert(TransactionIdPrecedes(lhs + 1, rhs));
1636 Assert(!TransactionIdPrecedes(rhs, lhs + 1));
1639 Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page));
1640 Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page));
1641 Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page));
1642 Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
1643 Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
1644 Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
1645 Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
1646 || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */
1647 Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
1648 || (1U << 31) % per_page != 0);
1649 Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
1650 Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
1651 Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
1652
1653 /*
1654 * GetNewTransactionId() has assigned the last XID it can safely use, and
1655 * that XID is in the *LAST* page of the second segment. We must not
1656 * delete that segment.
1657 */
1658 newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1;
1659 newestXact = newestPage * per_page + offset;
1660 Assert(newestXact / per_page == newestPage);
1661 oldestXact = newestXact + 1;
1662 oldestXact -= 1U << 31;
1663 oldestPage = oldestXact / per_page;
1665 (newestPage -
1666 newestPage % SLRU_PAGES_PER_SEGMENT),
1667 oldestPage));
1668
1669 /*
1670 * GetNewTransactionId() has assigned the last XID it can safely use, and
1671 * that XID is in the *FIRST* page of the second segment. We must not
1672 * delete that segment.
1673 */
1674 newestPage = SLRU_PAGES_PER_SEGMENT;
1675 newestXact = newestPage * per_page + offset;
1676 Assert(newestXact / per_page == newestPage);
1677 oldestXact = newestXact + 1;
1678 oldestXact -= 1U << 31;
1679 oldestPage = oldestXact / per_page;
1681 (newestPage -
1682 newestPage % SLRU_PAGES_PER_SEGMENT),
1683 oldestPage));
1684}
1685
1686/*
1687 * Unit-test a PagePrecedes function.
1688 *
1689 * This assumes every uint32 >= FirstNormalTransactionId is a valid key. It
1690 * assumes each value occupies a contiguous, fixed-size region of SLRU bytes.
1691 * (MultiXactMemberCtl separates flags from XIDs. NotifyCtl has
1692 * variable-length entries, no keys, and no random access. These unit tests
1693 * do not apply to them.)
1694 */
1695void
1697{
1698 /* Test first, middle and last entries of a page. */
1699 SlruPagePrecedesTestOffset(ctl, per_page, 0);
1700 SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2);
1701 SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1);
1702}
1703#endif
1704
1705/*
1706 * SlruScanDirectory callback
1707 * This callback reports true if there's any segment wholly prior to the
1708 * one containing the page passed as "data".
1709 */
1710bool
1712 void *data)
1713{
1714 int64 cutoffPage = *(int64 *) data;
1715
1716 if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1717 return true; /* found one; don't iterate any more */
1718
1719 return false; /* keep going */
1720}
1721
1722/*
1723 * SlruScanDirectory callback.
1724 * This callback deletes segments prior to the one passed in as "data".
1725 */
1726static bool
1728 void *data)
1729{
1730 int64 cutoffPage = *(int64 *) data;
1731
1732 if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1734
1735 return false; /* keep going */
1736}
1737
1738/*
1739 * SlruScanDirectory callback.
1740 * This callback deletes all segments.
1741 */
1742bool
1744{
1746
1747 return false; /* keep going */
1748}
1749
1750/*
1751 * An internal function used by SlruScanDirectory().
1752 *
1753 * Returns true if a file with a name of a given length may be a correct
1754 * SLRU segment.
1755 */
1756static inline bool
1758{
1759 if (ctl->long_segment_names)
1760 return (len == 15); /* see SlruFileName() */
1761 else
1762
1763 /*
1764 * Commit 638cf09e76d allowed 5-character lengths. Later commit
1765 * 73c986adde5 allowed 6-character length.
1766 *
1767 * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1768 * numbers, and the corresponding 15-character file names, which may
1769 * eventually deprecate the support for 4, 5, and 6-character names.
1770 */
1771 return (len == 4 || len == 5 || len == 6);
1772}
1773
1774/*
1775 * Scan the SimpleLru directory and apply a callback to each file found in it.
1776 *
1777 * If the callback returns true, the scan is stopped. The last return value
1778 * from the callback is returned.
1779 *
1780 * The callback receives the following arguments: 1. the SlruCtl struct for the
1781 * slru being truncated; 2. the filename being considered; 3. the page number
1782 * for the first page of that file; 4. a pointer to the opaque data given to us
1783 * by the caller.
1784 *
1785 * Note that the ordering in which the directory is scanned is not guaranteed.
1786 *
1787 * Note that no locking is applied.
1788 */
1789bool
1791{
1792 bool retval = false;
1793 DIR *cldir;
1794 struct dirent *clde;
1795 int64 segno;
1796 int64 segpage;
1797
1798 cldir = AllocateDir(ctl->Dir);
1799 while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1800 {
1801 size_t len;
1802
1803 len = strlen(clde->d_name);
1804
1806 strspn(clde->d_name, "0123456789ABCDEF") == len)
1807 {
1808 segno = strtoi64(clde->d_name, NULL, 16);
1809 segpage = segno * SLRU_PAGES_PER_SEGMENT;
1810
1811 elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1812 ctl->Dir, clde->d_name);
1813 retval = callback(ctl, clde->d_name, segpage, data);
1814 if (retval)
1815 break;
1816 }
1817 }
1818 FreeDir(cldir);
1819
1820 return retval;
1821}
1822
1823/*
1824 * Individual SLRUs (clog, ...) have to provide a sync.c handler function so
1825 * that they can provide the correct "SlruCtl" (otherwise we don't know how to
1826 * build the path), but they just forward to this common implementation that
1827 * performs the fsync.
1828 */
1829int
1830SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
1831{
1832 int fd;
1833 int save_errno;
1834 int result;
1835
1836 SlruFileName(ctl, path, ftag->segno);
1837
1838 fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
1839 if (fd < 0)
1840 return -1;
1841
1842 pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
1843 result = pg_fsync(fd);
1845 save_errno = errno;
1846
1848
1849 errno = save_errno;
1850 return result;
1851}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:453
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
#define INT64CONST(x)
Definition: c.h:516
#define Min(x, y)
Definition: c.h:975
#define MAXALIGN(LEN)
Definition: c.h:782
#define Max(x, y)
Definition: c.h:969
#define BUFFERALIGN(LEN)
Definition: c.h:784
int64_t int64
Definition: c.h:499
#define PG_BINARY
Definition: c.h:1244
uint32_t uint32
Definition: c.h:502
#define MemSet(start, val, len)
Definition: c.h:991
uint32 TransactionId
Definition: c.h:623
size_t Size
Definition: c.h:576
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
int errcode_for_file_access(void)
Definition: elog.c:877
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
int FreeDir(DIR *dir)
Definition: fd.c:3025
int CloseTransientFile(int fd)
Definition: fd.c:2871
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int data_sync_elevel(int elevel)
Definition: fd.c:4001
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2907
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2973
int pg_fsync(int fd)
Definition: fd.c:386
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2694
int NBuffers
Definition: globals.c:143
bool IsUnderPostmaster
Definition: globals.c:121
#define newval
#define GUC_check_errdetail
Definition: guc.h:481
Assert(PointerIsAligned(start, uint64))
int i
Definition: isn.c:77
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1985
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1182
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2029
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:721
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1353
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
#define MAXPGPATH
const void size_t len
const void * data
static char * filename
Definition: pg_dumpall.c:123
static XLogRecPtr endpos
Definition: pg_receivewal.c:56
PgStat_CheckpointerStats PendingCheckpointerStats
void pgstat_count_slru_page_exists(int slru_idx)
Definition: pgstat_slru.c:71
void pgstat_count_slru_page_read(int slru_idx)
Definition: pgstat_slru.c:77
int pgstat_get_slru_index(const char *name)
Definition: pgstat_slru.c:132
void pgstat_count_slru_page_hit(int slru_idx)
Definition: pgstat_slru.c:65
void pgstat_count_slru_page_zeroed(int slru_idx)
Definition: pgstat_slru.c:59
void pgstat_count_slru_truncate(int slru_idx)
Definition: pgstat_slru.c:95
void pgstat_count_slru_page_written(int slru_idx)
Definition: pgstat_slru.c:83
void pgstat_count_slru_flush(int slru_idx)
Definition: pgstat_slru.c:89
#define pg_pwrite
Definition: port.h:227
#define pg_pread
Definition: port.h:226
#define snprintf
Definition: port.h:239
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
static int fd(const char *x, int i)
Definition: preproc-init.c:105
tree ctl
Definition: radixtree.h:1838
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
Definition: slru.c:251
static int SlruFileName(SlruCtl ctl, char *path, int64 segno)
Definition: slru.c:91
static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
Definition: slru.c:803
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:604
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
Definition: slru.c:156
void SimpleLruWritePage(SlruCtl ctl, int slotno)
Definition: slru.c:731
void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
Definition: slru.c:1321
static bool SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage)
Definition: slru.c:1602
static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:1047
struct SlruWriteAllData SlruWriteAllData
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:427
#define SLRU_BANK_SIZE
Definition: slru.c:143
int SimpleLruAutotuneBuffers(int divisor, int max)
Definition: slru.c:231
static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
Definition: slru.c:875
static bool SlruCorrectSegmentFilenameLength(SlruCtl ctl, size_t len)
Definition: slru.c:1757
static SlruErrorCause slru_errcause
Definition: slru.c:174
#define MAX_WRITEALL_BUFFERS
Definition: slru.c:123
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:444
static int slru_errno
Definition: slru.c:175
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
Definition: slru.c:745
void SlruDeleteSegment(SlruCtl ctl, int64 segno)
Definition: slru.c:1525
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
Definition: slru.c:651
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1790
bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1743
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition: slru.c:501
int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
Definition: slru.c:1830
static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:1168
#define SlotGetBankNumber(slotno)
Definition: slru.c:148
int SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:374
void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
Definition: slru.c:1407
static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno)
Definition: slru.c:1502
struct SlruWriteAllData * SlruWriteAll
Definition: slru.c:132
SlruErrorCause
Definition: slru.c:165
@ SLRU_WRITE_FAILED
Definition: slru.c:169
@ SLRU_FSYNC_FAILED
Definition: slru.c:170
@ SLRU_SEEK_FAILED
Definition: slru.c:167
@ SLRU_OPEN_FAILED
Definition: slru.c:166
@ SLRU_CLOSE_FAILED
Definition: slru.c:171
@ SLRU_READ_FAILED
Definition: slru.c:168
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:198
bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1711
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1727
static void SlruRecentlyUsed(SlruShared shared, int slotno)
Definition: slru.c:1122
bool check_slru_buffers(const char *name, int *newval)
Definition: slru.c:354
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition: slru.h:175
SlruSharedData * SlruShared
Definition: slru.h:121
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition: slru.h:199
bool(* SlruScanCallback)(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.h:204
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:39
#define SLRU_MAX_ALLOWED_BUFFERS
Definition: slru.h:24
SlruPageStatus
Definition: slru.h:48
@ SLRU_PAGE_VALID
Definition: slru.h:51
@ SLRU_PAGE_WRITE_IN_PROGRESS
Definition: slru.h:52
@ SLRU_PAGE_READ_IN_PROGRESS
Definition: slru.h:50
@ SLRU_PAGE_EMPTY
Definition: slru.h:49
int ckpt_slru_written
Definition: xlog.h:168
Definition: dirent.c:26
Definition: sync.h:51
uint64 segno
Definition: sync.h:55
Definition: lwlock.h:42
PgStat_Counter slru_written
Definition: pgstat.h:264
int slru_stats_idx
Definition: slru.h:118
int64 * page_number
Definition: slru.h:73
int num_slots
Definition: slru.h:64
LWLockPadded * bank_locks
Definition: slru.h:80
int * page_lru_count
Definition: slru.h:74
pg_atomic_uint64 latest_page_number
Definition: slru.h:115
XLogRecPtr * group_lsn
Definition: slru.h:107
int * bank_cur_lru_count
Definition: slru.h:97
int lsn_groups_per_page
Definition: slru.h:108
SlruPageStatus * page_status
Definition: slru.h:71
bool * page_dirty
Definition: slru.h:72
LWLockPadded * buffer_locks
Definition: slru.h:77
char ** page_buffer
Definition: slru.h:70
int num_files
Definition: slru.c:127
int fd[MAX_WRITEALL_BUFFERS]
Definition: slru.c:128
int64 segno[MAX_WRITEALL_BUFFERS]
Definition: slru.c:129
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:580
SyncRequestHandler
Definition: sync.h:36
@ SYNC_HANDLER_NONE
Definition: sync.h:42
@ SYNC_FORGET_REQUEST
Definition: sync.h:27
@ SYNC_REQUEST
Definition: sync.h:25
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
#define InvalidTransactionId
Definition: transam.h:31
LWLock lock
Definition: lwlock.h:70
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
const char * name
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2923
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
bool InRecovery
Definition: xlogutils.c:50