PostgreSQL Source Code git master
Loading...
Searching...
No Matches
slru.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "storage/shmem_internal.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/wait_event.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruWriteAllData
 

Macros

#define MAX_WRITEALL_BUFFERS   16
 
#define SLRU_BANK_BITSHIFT   4
 
#define SLRU_BANK_SIZE   (1 << SLRU_BANK_BITSHIFT)
 
#define SlotGetBankNumber(slotno)   ((slotno) >> SLRU_BANK_BITSHIFT)
 
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
 

Typedefs

typedef struct SlruWriteAllData SlruWriteAllData
 
typedef struct SlruWriteAllDataSlruWriteAll
 

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED , SLRU_SEEK_FAILED , SLRU_READ_FAILED , SLRU_WRITE_FAILED ,
  SLRU_FSYNC_FAILED , SLRU_CLOSE_FAILED
}
 

Functions

static int SlruFileName (SlruDesc *ctl, char *path, int64 segno)
 
static void SimpleLruZeroLSNs (SlruDesc *ctl, int slotno)
 
static void SimpleLruWaitIO (SlruDesc *ctl, int slotno)
 
static void SlruInternalWritePage (SlruDesc *ctl, int slotno, SlruWriteAll fdata)
 
static bool SlruPhysicalReadPage (SlruDesc *ctl, int64 pageno, int slotno)
 
static bool SlruPhysicalWritePage (SlruDesc *ctl, int64 pageno, int slotno, SlruWriteAll fdata)
 
static void SlruReportIOError (SlruDesc *ctl, int64 pageno, const void *opaque_data)
 
static int SlruSelectLRUPage (SlruDesc *ctl, int64 pageno)
 
static bool SlruScanDirCbDeleteCutoff (SlruDesc *ctl, char *filename, int64 segpage, void *data)
 
static void SlruInternalDeleteSegment (SlruDesc *ctl, int64 segno)
 
static void SlruRecentlyUsed (SlruShared shared, int slotno)
 
static Size SimpleLruShmemSize (int nslots, int nlsns)
 
int SimpleLruAutotuneBuffers (int divisor, int max)
 
void SimpleLruRequestWithOpts (const SlruOpts *options)
 
void shmem_slru_init (void *location, ShmemStructOpts *base_options)
 
void shmem_slru_attach (void *location, ShmemStructOpts *base_options)
 
bool check_slru_buffers (const char *name, int *newval)
 
int SimpleLruZeroPage (SlruDesc *ctl, int64 pageno)
 
void SimpleLruZeroAndWritePage (SlruDesc *ctl, int64 pageno)
 
int SimpleLruReadPage (SlruDesc *ctl, int64 pageno, bool write_ok, const void *opaque_data)
 
int SimpleLruReadPage_ReadOnly (SlruDesc *ctl, int64 pageno, const void *opaque_data)
 
void SimpleLruWritePage (SlruDesc *ctl, int slotno)
 
bool SimpleLruDoesPhysicalPageExist (SlruDesc *ctl, int64 pageno)
 
void SimpleLruWriteAll (SlruDesc *ctl, bool allow_redirtied)
 
void SimpleLruTruncate (SlruDesc *ctl, int64 cutoffPage)
 
void SlruDeleteSegment (SlruDesc *ctl, int64 segno)
 
static bool SlruMayDeleteSegment (SlruDesc *ctl, int64 segpage, int64 cutoffPage)
 
bool SlruScanDirCbReportPresence (SlruDesc *ctl, char *filename, int64 segpage, void *data)
 
bool SlruScanDirCbDeleteAll (SlruDesc *ctl, char *filename, int64 segpage, void *data)
 
static bool SlruCorrectSegmentFilenameLength (SlruDesc *ctl, size_t len)
 
bool SlruScanDirectory (SlruDesc *ctl, SlruScanCallback callback, void *data)
 
int SlruSyncFileTag (SlruDesc *ctl, const FileTag *ftag, char *path)
 

Variables

static SlruErrorCause slru_errcause
 
static int slru_errno
 

Macro Definition Documentation

◆ INIT_SLRUFILETAG

#define INIT_SLRUFILETAG (   a,
  xx_handler,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = (xx_handler), \
(a).segno = (xx_segno) \
)
int a
Definition isn.c:73
static int fb(int x)
Definition sync.h:51

Definition at line 159 of file slru.c.

167{
175
177static int slru_errno;
178
179
180static void SimpleLruZeroLSNs(SlruDesc *ctl, int slotno);
181static void SimpleLruWaitIO(SlruDesc *ctl, int slotno);
183static bool SlruPhysicalReadPage(SlruDesc *ctl, int64 pageno, int slotno);
184static bool SlruPhysicalWritePage(SlruDesc *ctl, int64 pageno, int slotno,
186static void SlruReportIOError(SlruDesc *ctl, int64 pageno,
187 const void *opaque_data);
188static int SlruSelectLRUPage(SlruDesc *ctl, int64 pageno);
189
191 int64 segpage, void *data);
192static void SlruInternalDeleteSegment(SlruDesc *ctl, int64 segno);
193static inline void SlruRecentlyUsed(SlruShared shared, int slotno);
194
195
196/*
197 * Initialization of shared memory
198 */
199
200static Size
201SimpleLruShmemSize(int nslots, int nlsns)
202{
203 int nbanks = nslots / SLRU_BANK_SIZE;
204 Size sz;
205
207 Assert(nslots % SLRU_BANK_SIZE == 0);
208
209 /* we assume nslots isn't so large as to risk overflow */
210 sz = MAXALIGN(sizeof(SlruSharedData));
211 sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
212 sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
213 sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
214 sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
215 sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
216 sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
217 sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */
218 sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */
219
220 if (nlsns > 0)
221 sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
222
223 return BUFFERALIGN(sz) + BLCKSZ * nslots;
224}
225
226/*
227 * Determine a number of SLRU buffers to use.
228 *
229 * We simply divide shared_buffers by the divisor given and cap
230 * that at the maximum given; but always at least SLRU_BANK_SIZE.
231 * Round down to the nearest multiple of SLRU_BANK_SIZE.
232 */
233int
235{
236 return Min(max - (max % SLRU_BANK_SIZE),
239}
240
241/*
242 * Register a simple LRU cache in shared memory.
243 */
244void
246{
248
249 Assert(options->name != NULL);
250 Assert(options->nslots > 0);
251 Assert(options->PagePrecedes != NULL);
252 Assert(options->errdetail_for_io_error != NULL);
253
255 sizeof(SlruOpts));
257
258 options_copy->base.name = options->name;
259 options_copy->base.size = SimpleLruShmemSize(options_copy->nslots, options_copy->nlsns);
260
262}
263
264/* Initialize locks and shared memory area */
265void
267{
269 SlruDesc *desc = (SlruDesc *) options->desc;
270 char namebuf[NAMEDATALEN];
271 SlruShared shared;
272 int nslots = options->nslots;
273 int nbanks = nslots / SLRU_BANK_SIZE;
274 int nlsns = options->nlsns;
275 char *ptr;
276 Size offset;
277
278 shared = (SlruShared) location;
279 desc->shared = shared;
280 desc->nbanks = nbanks;
281 memcpy(&desc->options, options, sizeof(SlruOpts));
282
283 /* assign new tranche IDs, if not given */
284 if (desc->options.buffer_tranche_id == 0)
285 {
286 snprintf(namebuf, sizeof(namebuf), "%s buffer", desc->options.name);
287 desc->options.buffer_tranche_id = LWLockNewTrancheId(namebuf);
288 }
289 if (desc->options.bank_tranche_id == 0)
290 {
291 snprintf(namebuf, sizeof(namebuf), "%s bank", desc->options.name);
292 desc->options.bank_tranche_id = LWLockNewTrancheId(namebuf);
293 }
294
296
297 memset(shared, 0, sizeof(SlruSharedData));
298
299 shared->num_slots = nslots;
300 shared->lsn_groups_per_page = nlsns;
301
303
304 shared->slru_stats_idx = pgstat_get_slru_index(desc->options.name);
305
306 ptr = (char *) shared;
307 offset = MAXALIGN(sizeof(SlruSharedData));
308 shared->page_buffer = (char **) (ptr + offset);
309 offset += MAXALIGN(nslots * sizeof(char *));
310 shared->page_status = (SlruPageStatus *) (ptr + offset);
311 offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
312 shared->page_dirty = (bool *) (ptr + offset);
313 offset += MAXALIGN(nslots * sizeof(bool));
314 shared->page_number = (int64 *) (ptr + offset);
315 offset += MAXALIGN(nslots * sizeof(int64));
316 shared->page_lru_count = (int *) (ptr + offset);
317 offset += MAXALIGN(nslots * sizeof(int));
318
319 /* Initialize LWLocks */
320 shared->buffer_locks = (LWLockPadded *) (ptr + offset);
321 offset += MAXALIGN(nslots * sizeof(LWLockPadded));
322 shared->bank_locks = (LWLockPadded *) (ptr + offset);
323 offset += MAXALIGN(nbanks * sizeof(LWLockPadded));
324 shared->bank_cur_lru_count = (int *) (ptr + offset);
325 offset += MAXALIGN(nbanks * sizeof(int));
326
327 if (nlsns > 0)
328 {
329 shared->group_lsn = (XLogRecPtr *) (ptr + offset);
330 offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
331 }
332
333 ptr += BUFFERALIGN(offset);
334 for (int slotno = 0; slotno < nslots; slotno++)
335 {
337 desc->options.buffer_tranche_id);
338
339 shared->page_buffer[slotno] = ptr;
341 shared->page_dirty[slotno] = false;
342 shared->page_lru_count[slotno] = 0;
343 ptr += BLCKSZ;
344 }
345
346 /* Initialize the slot banks. */
347 for (int bankno = 0; bankno < nbanks; bankno++)
348 {
349 LWLockInitialize(&shared->bank_locks[bankno].lock, desc->options.bank_tranche_id);
350 shared->bank_cur_lru_count[bankno] = 0;
351 }
352
353 /* Should fit to estimated shmem size */
354 Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
355}
356
357void
359{
361 SlruDesc *desc = (SlruDesc *) options->desc;
362 int nslots = options->nslots;
363 int nbanks = nslots / SLRU_BANK_SIZE;
364
365 desc->shared = (SlruShared) location;
366 desc->nbanks = nbanks;
367 memcpy(&desc->options, options, sizeof(SlruOpts));
368}
369
370
371/*
372 * Helper function for GUC check_hook to check whether slru buffers are in
373 * multiples of SLRU_BANK_SIZE.
374 */
375bool
376check_slru_buffers(const char *name, int *newval)
377{
378 /* Valid values are multiples of SLRU_BANK_SIZE */
379 if (*newval % SLRU_BANK_SIZE == 0)
380 return true;
381
382 GUC_check_errdetail("\"%s\" must be a multiple of %d.", name,
384 return false;
385}
386
387/*
388 * Initialize (or reinitialize) a page to zeroes.
389 *
390 * The page is not actually written, just set up in shared memory.
391 * The slot number of the new page is returned.
392 *
393 * Bank lock must be held at entry, and will be held at exit.
394 */
395int
397{
398 SlruShared shared = ctl->shared;
399 int slotno;
400
402
403 /* Find a suitable buffer slot for the page */
404 slotno = SlruSelectLRUPage(ctl, pageno);
406 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
407 !shared->page_dirty[slotno]) ||
408 shared->page_number[slotno] == pageno);
409
410 /* Mark the slot as containing this page */
411 shared->page_number[slotno] = pageno;
413 shared->page_dirty[slotno] = true;
414 SlruRecentlyUsed(shared, slotno);
415
416 /* Set the buffer to zeroes */
417 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
418
419 /* Set the LSNs for this new page to zero */
421
422 /*
423 * Assume this page is now the latest active page.
424 *
425 * Note that because both this routine and SlruSelectLRUPage run with a
426 * SLRU bank lock held, it is not possible for this to be zeroing a page
427 * that SlruSelectLRUPage is going to evict simultaneously. Therefore,
428 * there's no memory barrier here.
429 */
430 pg_atomic_write_u64(&shared->latest_page_number, pageno);
431
432 /* update the stats counter of zeroed pages */
434
435 return slotno;
436}
437
438/*
439 * Zero all the LSNs we store for this slru page.
440 *
441 * This should be called each time we create a new page, and each time we read
442 * in a page from disk into an existing buffer. (Such an old page cannot
443 * have any interesting LSNs, since we'd have flushed them before writing
444 * the page in the first place.)
445 *
446 * This assumes that InvalidXLogRecPtr is bitwise-all-0.
447 */
448static void
450{
451 SlruShared shared = ctl->shared;
452
453 if (shared->lsn_groups_per_page > 0)
454 MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
455 shared->lsn_groups_per_page * sizeof(XLogRecPtr));
456}
457
458/*
459 * This is a convenience wrapper for the common case of zeroing a page and
460 * immediately flushing it to disk.
461 *
462 * SLRU bank lock is acquired and released here.
463 */
464void
466{
467 int slotno;
468 LWLock *lock;
469
470 lock = SimpleLruGetBankLock(ctl, pageno);
472
473 /* Create and zero the page */
474 slotno = SimpleLruZeroPage(ctl, pageno);
475
476 /* Make sure it's written out */
478 Assert(!ctl->shared->page_dirty[slotno]);
479
480 LWLockRelease(lock);
481}
482
483/*
484 * Wait for any active I/O on a page slot to finish. (This does not
485 * guarantee that new I/O hasn't been started before we return, though.
486 * In fact the slot might not even contain the same page anymore.)
487 *
488 * Bank lock must be held at entry, and will be held at exit.
489 */
490static void
492{
493 SlruShared shared = ctl->shared;
495
497
498 /* See notes at top of file */
503
504 /*
505 * If the slot is still in an io-in-progress state, then either someone
506 * already started a new I/O on the slot, or a previous I/O failed and
507 * neglected to reset the page state. That shouldn't happen, really, but
508 * it seems worth a few extra cycles to check and recover from it. We can
509 * cheaply test for failure by seeing if the buffer lock is still held (we
510 * assume that transaction abort would release the lock).
511 */
514 {
516 {
517 /* indeed, the I/O must have failed */
520 else /* write_in_progress */
521 {
523 shared->page_dirty[slotno] = true;
524 }
526 }
527 }
528}
529
530/*
531 * Find a page in a shared buffer, reading it in if necessary.
532 * The page number must correspond to an already-initialized page.
533 *
534 * If write_ok is true then it is OK to return a page that is in
535 * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
536 * that modification of the page is safe. If write_ok is false then we
537 * will not return the page until it is not undergoing active I/O.
538 *
539 * On error, the passed-in 'opaque_data' is passed to the
540 * 'errdetail_for_io_error' callback, to provide details on the operation that
541 * failed. It is only used for error reporting.
542 *
543 * Return value is the shared-buffer slot number now holding the page.
544 * The buffer's LRU access info is updated.
545 *
546 * The correct bank lock must be held at entry, and will be held at exit.
547 */
548int
550 const void *opaque_data)
551{
552 SlruShared shared = ctl->shared;
554
556
557 /* Outer loop handles restart if we must wait for someone else's I/O */
558 for (;;)
559 {
560 int slotno;
561 bool ok;
562
563 /* See if page already is in memory; if not, pick victim slot */
564 slotno = SlruSelectLRUPage(ctl, pageno);
565
566 /* Did we find the page in memory? */
567 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
568 shared->page_number[slotno] == pageno)
569 {
570 /*
571 * If page is still being read in, we must wait for I/O. Likewise
572 * if the page is being written and the caller said that's not OK.
573 */
576 !write_ok))
577 {
579 /* Now we must recheck state from the top */
580 continue;
581 }
582 /* Otherwise, it's ready to use */
583 SlruRecentlyUsed(shared, slotno);
584
585 /* update the stats counter of pages found in the SLRU */
587
588 return slotno;
589 }
590
591 /* We found no match; assert we selected a freeable slot */
593 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
594 !shared->page_dirty[slotno]));
595
596 /* Mark the slot read-busy */
597 shared->page_number[slotno] = pageno;
599 shared->page_dirty[slotno] = false;
600
601 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
603
604 /* Release bank lock while doing I/O */
606
607 /* Do the read */
608 ok = SlruPhysicalReadPage(ctl, pageno, slotno);
609
610 /* Set the LSNs for this newly read-in page to zero */
612
613 /* Re-acquire bank control lock and update page state */
615
616 Assert(shared->page_number[slotno] == pageno &&
618 !shared->page_dirty[slotno]);
619
621
623
624 /* Now it's okay to ereport if we failed */
625 if (!ok)
627
628 SlruRecentlyUsed(shared, slotno);
629
630 /* update the stats counter of pages not found in SLRU */
632
633 return slotno;
634 }
635}
636
637/*
638 * Find a page in a shared buffer, reading it in if necessary.
639 * The page number must correspond to an already-initialized page.
640 * The caller must intend only read-only access to the page.
641 *
642 * On error, the passed-in 'opaque_data' is passed to the
643 * 'errdetail_for_io_error' callback, to provide details on the operation that
644 * failed. It is only used for error reporting.
645 *
646 * Return value is the shared-buffer slot number now holding the page.
647 * The buffer's LRU access info is updated.
648 *
649 * Bank control lock must NOT be held at entry, but will be held at exit.
650 * It is unspecified whether the lock will be shared or exclusive.
651 */
652int
654{
655 SlruShared shared = ctl->shared;
657 int bankno = pageno % ctl->nbanks;
660
661 /* Try to find the page while holding only shared lock */
663
664 /* See if page is already in a buffer */
665 for (int slotno = bankstart; slotno < bankend; slotno++)
666 {
667 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
668 shared->page_number[slotno] == pageno &&
670 {
671 /* See comments for SlruRecentlyUsed() */
672 SlruRecentlyUsed(shared, slotno);
673
674 /* update the stats counter of pages found in the SLRU */
676
677 return slotno;
678 }
679 }
680
681 /* No luck, so switch to normal exclusive lock and do regular read */
684
685 return SimpleLruReadPage(ctl, pageno, true, opaque_data);
686}
687
688/*
689 * Write a page from a shared buffer, if necessary.
690 * Does nothing if the specified slot is not dirty.
691 *
692 * NOTE: only one write attempt is made here. Hence, it is possible that
693 * the page is still dirty at exit (if someone else re-dirtied it during
694 * the write). However, we *do* attempt a fresh write even if the page
695 * is already being written; this is for checkpoints.
696 *
697 * Bank lock must be held at entry, and will be held at exit.
698 */
699static void
701{
702 SlruShared shared = ctl->shared;
703 int64 pageno = shared->page_number[slotno];
705 bool ok;
706
709
710 /* If a write is in progress, wait for it to finish */
712 shared->page_number[slotno] == pageno)
713 {
715 }
716
717 /*
718 * Do nothing if page is not dirty, or if buffer no longer contains the
719 * same page we were called for.
720 */
721 if (!shared->page_dirty[slotno] ||
722 shared->page_status[slotno] != SLRU_PAGE_VALID ||
723 shared->page_number[slotno] != pageno)
724 return;
725
726 /*
727 * Mark the slot write-busy, and clear the dirtybit. After this point, a
728 * transaction status update on this page will mark it dirty again.
729 */
731 shared->page_dirty[slotno] = false;
732
733 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
735
736 /* Release bank lock while doing I/O */
738
739 /* Do the write */
741
742 /* If we failed, and we're in a flush, better close the files */
743 if (!ok && fdata)
744 {
745 for (int i = 0; i < fdata->num_files; i++)
747 }
748
749 /* Re-acquire bank lock and update page state */
751
752 Assert(shared->page_number[slotno] == pageno &&
754
755 /* If we failed to write, mark the page dirty again */
756 if (!ok)
757 shared->page_dirty[slotno] = true;
758
760
762
763 /* Now it's okay to ereport if we failed */
764 if (!ok)
765 SlruReportIOError(ctl, pageno, NULL);
766
767 /* If part of a checkpoint, count this as a SLRU buffer written. */
768 if (fdata)
769 {
772 }
773}
774
775/*
776 * Wrapper of SlruInternalWritePage, for external callers.
777 * fdata is always passed a NULL here.
778 */
779void
781{
782 Assert(ctl->shared->page_status[slotno] != SLRU_PAGE_EMPTY);
783
785}
786
787/*
788 * Return whether the given page exists on disk.
789 *
790 * A false return means that either the file does not exist, or that it's not
791 * large enough to contain the given page.
792 */
793bool
795{
796 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
797 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
798 int offset = rpageno * BLCKSZ;
799 char path[MAXPGPATH];
800 int fd;
801 bool result;
803
804 /* update the stats counter of checked pages */
805 pgstat_count_slru_blocks_exists(ctl->shared->slru_stats_idx);
806
807 SlruFileName(ctl, path, segno);
808
810 if (fd < 0)
811 {
812 /* expected: file doesn't exist */
813 if (errno == ENOENT)
814 return false;
815
816 /* report error normally */
819 SlruReportIOError(ctl, pageno, NULL);
820 }
821
822 if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
823 {
826 SlruReportIOError(ctl, pageno, NULL);
827 }
828
829 result = endpos >= (off_t) (offset + BLCKSZ);
830
831 if (CloseTransientFile(fd) != 0)
832 {
835 return false;
836 }
837
838 return result;
839}
840
841/*
842 * Physical read of a (previously existing) page into a buffer slot
843 *
844 * On failure, we cannot just ereport(ERROR) since caller has put state in
845 * shared memory that must be undone. So, we return false and save enough
846 * info in static variables to let SlruReportIOError make the report.
847 *
848 * For now, assume it's not worth keeping a file pointer open across
849 * read/write operations. We could cache one virtual file pointer ...
850 */
851static bool
853{
854 SlruShared shared = ctl->shared;
855 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
856 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
857 off_t offset = rpageno * BLCKSZ;
858 char path[MAXPGPATH];
859 int fd;
860
861 SlruFileName(ctl, path, segno);
862
863 /*
864 * In a crash-and-restart situation, it's possible for us to receive
865 * commands to set the commit status of transactions whose bits are in
866 * already-truncated segments of the commit log (see notes in
867 * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
868 * where the file doesn't exist, and return zeroes instead.
869 */
871 if (fd < 0)
872 {
873 if (errno != ENOENT || !InRecovery)
874 {
877 return false;
878 }
879
880 ereport(LOG,
881 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
882 path)));
883 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
884 return true;
885 }
886
887 errno = 0;
889 if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
890 {
895 return false;
896 }
898
899 if (CloseTransientFile(fd) != 0)
900 {
903 return false;
904 }
905
906 return true;
907}
908
909/*
910 * Physical write of a page from a buffer slot
911 *
912 * On failure, we cannot just ereport(ERROR) since caller has put state in
913 * shared memory that must be undone. So, we return false and save enough
914 * info in static variables to let SlruReportIOError make the report.
915 *
916 * For now, assume it's not worth keeping a file pointer open across
917 * independent read/write operations. We do batch operations during
918 * SimpleLruWriteAll, though.
919 *
920 * fdata is NULL for a standalone write, pointer to open-file info during
921 * SimpleLruWriteAll.
922 */
923static bool
925{
926 SlruShared shared = ctl->shared;
927 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
928 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
929 off_t offset = rpageno * BLCKSZ;
930 char path[MAXPGPATH];
931 int fd = -1;
932
933 /* update the stats counter of written pages */
935
936 /*
937 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
938 * write out data before associated WAL records. This is the same action
939 * performed during FlushBuffer() in the main buffer manager.
940 */
941 if (shared->group_lsn != NULL)
942 {
943 /*
944 * We must determine the largest async-commit LSN for the page. This
945 * is a bit tedious, but since this entire function is a slow path
946 * anyway, it seems better to do this here than to maintain a per-page
947 * LSN variable (which'd need an extra comparison in the
948 * transaction-commit path).
949 */
951 int lsnindex;
952
954 max_lsn = shared->group_lsn[lsnindex++];
955 for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
956 {
958
959 if (max_lsn < this_lsn)
961 }
962
964 {
965 /*
966 * As noted above, elog(ERROR) is not acceptable here, so if
967 * XLogFlush were to fail, we must PANIC. This isn't much of a
968 * restriction because XLogFlush is just about all critical
969 * section anyway, but let's make sure.
970 */
974 }
975 }
976
977 /*
978 * During a SimpleLruWriteAll, we may already have the desired file open.
979 */
980 if (fdata)
981 {
982 for (int i = 0; i < fdata->num_files; i++)
983 {
984 if (fdata->segno[i] == segno)
985 {
986 fd = fdata->fd[i];
987 break;
988 }
989 }
990 }
991
992 if (fd < 0)
993 {
994 /*
995 * If the file doesn't already exist, we should create it. It is
996 * possible for this to need to happen when writing a page that's not
997 * first in its segment; we assume the OS can cope with that. (Note:
998 * it might seem that it'd be okay to create files only when
999 * SimpleLruZeroPage is called for the first page of a segment.
1000 * However, if after a crash and restart the REDO logic elects to
1001 * replay the log from a checkpoint before the latest one, then it's
1002 * possible that we will get commands to set transaction status of
1003 * transactions that have already been truncated from the commit log.
1004 * Easiest way to deal with that is to accept references to
1005 * nonexistent files here and in SlruPhysicalReadPage.)
1006 *
1007 * Note: it is possible for more than one backend to be executing this
1008 * code simultaneously for different pages of the same file. Hence,
1009 * don't use O_EXCL or O_TRUNC or anything like that.
1010 */
1011 SlruFileName(ctl, path, segno);
1013 if (fd < 0)
1014 {
1016 slru_errno = errno;
1017 return false;
1018 }
1019
1020 if (fdata)
1021 {
1022 if (fdata->num_files < MAX_WRITEALL_BUFFERS)
1023 {
1024 fdata->fd[fdata->num_files] = fd;
1025 fdata->segno[fdata->num_files] = segno;
1026 fdata->num_files++;
1027 }
1028 else
1029 {
1030 /*
1031 * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
1032 * fall back to treating it as a standalone write.
1033 */
1034 fdata = NULL;
1035 }
1036 }
1037 }
1038
1039 errno = 0;
1041 if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
1042 {
1044 /* if write didn't set errno, assume problem is no disk space */
1045 if (errno == 0)
1046 errno = ENOSPC;
1048 slru_errno = errno;
1049 if (!fdata)
1051 return false;
1052 }
1054
1055 /* Queue up a sync request for the checkpointer. */
1056 if (ctl->options.sync_handler != SYNC_HANDLER_NONE)
1057 {
1058 FileTag tag;
1059
1060 INIT_SLRUFILETAG(tag, ctl->options.sync_handler, segno);
1061 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
1062 {
1063 /* No space to enqueue sync request. Do it synchronously. */
1065 if (pg_fsync(fd) != 0)
1066 {
1069 slru_errno = errno;
1071 return false;
1072 }
1074 }
1075 }
1076
1077 /* Close file, unless part of flush request. */
1078 if (!fdata)
1079 {
1080 if (CloseTransientFile(fd) != 0)
1081 {
1083 slru_errno = errno;
1084 return false;
1085 }
1086 }
1087
1088 return true;
1089}
1090
1091/*
1092 * Issue the error message after failure of SlruPhysicalReadPage or
1093 * SlruPhysicalWritePage. Call this after cleaning up shared-memory state.
1094 */
1095static void
1096SlruReportIOError(SlruDesc *ctl, int64 pageno, const void *opaque_data)
1097{
1098 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
1099 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
1100 int offset = rpageno * BLCKSZ;
1101 char path[MAXPGPATH];
1102
1103 SlruFileName(ctl, path, segno);
1104 errno = slru_errno;
1105 switch (slru_errcause)
1106 {
1107 case SLRU_OPEN_FAILED:
1108 ereport(ERROR,
1110 errmsg("could not open file \"%s\": %m", path),
1111 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1112 break;
1113 case SLRU_SEEK_FAILED:
1114 ereport(ERROR,
1116 errmsg("could not seek in file \"%s\" to offset %d: %m",
1117 path, offset),
1118 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1119 break;
1120 case SLRU_READ_FAILED:
1121 if (errno)
1122 ereport(ERROR,
1124 errmsg("could not read from file \"%s\" at offset %d: %m",
1125 path, offset),
1126 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1127 else
1128 ereport(ERROR,
1129 (errmsg("could not read from file \"%s\" at offset %d: read too few bytes",
1130 path, offset),
1131 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1132 break;
1133 case SLRU_WRITE_FAILED:
1134 if (errno)
1135 ereport(ERROR,
1137 errmsg("Could not write to file \"%s\" at offset %d: %m",
1138 path, offset),
1139 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1140 else
1141 ereport(ERROR,
1142 (errmsg("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1143 path, offset),
1144 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1145 break;
1146 case SLRU_FSYNC_FAILED:
1149 errmsg("could not fsync file \"%s\": %m",
1150 path),
1151 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1152 break;
1153 case SLRU_CLOSE_FAILED:
1154 ereport(ERROR,
1156 errmsg("could not close file \"%s\": %m",
1157 path),
1158 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1159 break;
1160 default:
1161 /* can't get here, we trust */
1162 elog(ERROR, "unrecognized SimpleLru error cause: %d",
1163 (int) slru_errcause);
1164 break;
1165 }
1166}
1167
1168/*
1169 * Mark a buffer slot "most recently used".
1170 */
1171static inline void
1173{
1176
1178
1179 /*
1180 * The reason for the if-test is that there are often many consecutive
1181 * accesses to the same page (particularly the latest page). By
1182 * suppressing useless increments of bank_cur_lru_count, we reduce the
1183 * probability that old pages' counts will "wrap around" and make them
1184 * appear recently used.
1185 *
1186 * We allow this code to be executed concurrently by multiple processes
1187 * within SimpleLruReadPage_ReadOnly(). As long as int reads and writes
1188 * are atomic, this should not cause any completely-bogus values to enter
1189 * the computation. However, it is possible for either bank_cur_lru_count
1190 * or individual page_lru_count entries to be "reset" to lower values than
1191 * they should have, in case a process is delayed while it executes this
1192 * function. With care in SlruSelectLRUPage(), this does little harm, and
1193 * in any case the absolute worst possible consequence is a nonoptimal
1194 * choice of page to evict. The gain from allowing concurrent reads of
1195 * SLRU pages seems worth it.
1196 */
1197 if (new_lru_count != shared->page_lru_count[slotno])
1198 {
1201 }
1202}
1203
1204/*
1205 * Select the slot to re-use when we need a free slot for the given page.
1206 *
1207 * The target page number is passed not only because we need to know the
1208 * correct bank to use, but also because we need to consider the possibility
1209 * that some other process reads in the target page while we are doing I/O to
1210 * free a slot. Hence, check or recheck to see if any slot already holds the
1211 * target page, and return that slot if so. Thus, the returned slot is
1212 * *either* a slot already holding the pageno (could be any state except
1213 * EMPTY), *or* a freeable slot (state EMPTY or CLEAN).
1214 *
1215 * The correct bank lock must be held at entry, and will be held at exit.
1216 */
1217static int
1219{
1220 SlruShared shared = ctl->shared;
1221
1222 /* Outer loop handles restart after I/O */
1223 for (;;)
1224 {
1225 int cur_count;
1226 int bestvalidslot = 0; /* keep compiler quiet */
1227 int best_valid_delta = -1;
1228 int64 best_valid_page_number = 0; /* keep compiler quiet */
1229 int bestinvalidslot = 0; /* keep compiler quiet */
1230 int best_invalid_delta = -1;
1231 int64 best_invalid_page_number = 0; /* keep compiler quiet */
1232 int bankno = pageno % ctl->nbanks;
1235
1237
1238 /* See if page already has a buffer assigned */
1239 for (int slotno = bankstart; slotno < bankend; slotno++)
1240 {
1241 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
1242 shared->page_number[slotno] == pageno)
1243 return slotno;
1244 }
1245
1246 /*
1247 * If we find any EMPTY slot, just select that one. Else choose a
1248 * victim page to replace. We normally take the least recently used
1249 * valid page, but we will never take the slot containing
1250 * latest_page_number, even if it appears least recently used. We
1251 * will select a slot that is already I/O busy only if there is no
1252 * other choice: a read-busy slot will not be least recently used once
1253 * the read finishes, and waiting for an I/O on a write-busy slot is
1254 * inferior to just picking some other slot. Testing shows the slot
1255 * we pick instead will often be clean, allowing us to begin a read at
1256 * once.
1257 *
1258 * Normally the page_lru_count values will all be different and so
1259 * there will be a well-defined LRU page. But since we allow
1260 * concurrent execution of SlruRecentlyUsed() within
1261 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1262 * acquire the same lru_count values. In that case we break ties by
1263 * choosing the furthest-back page.
1264 *
1265 * Notice that this next line forcibly advances cur_lru_count to a
1266 * value that is certainly beyond any value that will be in the
1267 * page_lru_count array after the loop finishes. This ensures that
1268 * the next execution of SlruRecentlyUsed will mark the page newly
1269 * used, even if it's for a page that has the current counter value.
1270 * That gets us back on the path to having good data when there are
1271 * multiple pages with the same lru_count.
1272 */
1273 cur_count = (shared->bank_cur_lru_count[bankno])++;
1274 for (int slotno = bankstart; slotno < bankend; slotno++)
1275 {
1276 int this_delta;
1278
1279 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1280 return slotno;
1281
1283 if (this_delta < 0)
1284 {
1285 /*
1286 * Clean up in case shared updates have caused cur_count
1287 * increments to get "lost". We back off the page counts,
1288 * rather than trying to increase cur_count, to avoid any
1289 * question of infinite loops or failure in the presence of
1290 * wrapped-around counts.
1291 */
1292 shared->page_lru_count[slotno] = cur_count;
1293 this_delta = 0;
1294 }
1295
1296 /*
1297 * If this page is the one most recently zeroed, don't consider it
1298 * an eviction candidate. See comments in SimpleLruZeroPage for an
1299 * explanation about the lack of a memory barrier here.
1300 */
1302 if (this_page_number ==
1304 continue;
1305
1306 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1307 {
1310 ctl->options.PagePrecedes(this_page_number,
1312 {
1316 }
1317 }
1318 else
1319 {
1322 ctl->options.PagePrecedes(this_page_number,
1324 {
1328 }
1329 }
1330 }
1331
1332 /*
1333 * If all pages (except possibly the latest one) are I/O busy, we'll
1334 * have to wait for an I/O to complete and then retry. In that
1335 * unhappy case, we choose to wait for the I/O on the least recently
1336 * used slot, on the assumption that it was likely initiated first of
1337 * all the I/Os in progress and may therefore finish first.
1338 */
1339 if (best_valid_delta < 0)
1340 {
1342 continue;
1343 }
1344
1345 /*
1346 * If the selected page is clean, we're set.
1347 */
1348 if (!shared->page_dirty[bestvalidslot])
1349 return bestvalidslot;
1350
1351 /*
1352 * Write the page.
1353 */
1355
1356 /*
1357 * Now loop back and try again. This is the easiest way of dealing
1358 * with corner cases such as the victim page being re-dirtied while we
1359 * wrote it.
1360 */
1361 }
1362}
1363
1364/*
1365 * Write dirty pages to disk during checkpoint or database shutdown. Flushing
1366 * is deferred until the next call to ProcessSyncRequests(), though we do fsync
1367 * the containing directory here to make sure that newly created directory
1368 * entries are on disk.
1369 */
1370void
1372{
1373 SlruShared shared = ctl->shared;
1375 int64 pageno = 0;
1376 int prevbank = SlotGetBankNumber(0);
1377 bool ok;
1378
1379 /* update the stats counter of flushes */
1381
1382 /*
1383 * Find and write dirty pages
1384 */
1385 fdata.num_files = 0;
1386
1388
1389 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1390 {
1392
1393 /*
1394 * If the current bank lock is not same as the previous bank lock then
1395 * release the previous lock and acquire the new lock.
1396 */
1397 if (curbank != prevbank)
1398 {
1401 prevbank = curbank;
1402 }
1403
1404 /* Do nothing if slot is unused */
1405 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1406 continue;
1407
1409
1410 /*
1411 * In some places (e.g. checkpoints), we cannot assert that the slot
1412 * is clean now, since another process might have re-dirtied it
1413 * already. That's okay.
1414 */
1416 shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1417 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1418 !shared->page_dirty[slotno]));
1419 }
1420
1422
1423 /*
1424 * Now close any files that were open
1425 */
1426 ok = true;
1427 for (int i = 0; i < fdata.num_files; i++)
1428 {
1429 if (CloseTransientFile(fdata.fd[i]) != 0)
1430 {
1432 slru_errno = errno;
1433 pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1434 ok = false;
1435 }
1436 }
1437 if (!ok)
1438 SlruReportIOError(ctl, pageno, NULL);
1439
1440 /* Ensure that directory entries for new files are on disk. */
1441 if (ctl->options.sync_handler != SYNC_HANDLER_NONE)
1442 fsync_fname(ctl->options.Dir, true);
1443}
1444
1445/*
1446 * Remove all segments before the one holding the passed page number
1447 *
1448 * All SLRUs prevent concurrent calls to this function, either with an LWLock
1449 * or by calling it only as part of a checkpoint. Mutual exclusion must begin
1450 * before computing cutoffPage. Mutual exclusion must end after any limit
1451 * update that would permit other backends to write fresh data into the
1452 * segment immediately preceding the one containing cutoffPage. Otherwise,
1453 * when the SLRU is quite full, SimpleLruTruncate() might delete that segment
1454 * after it has accrued freshly-written data.
1455 */
1456void
1458{
1459 SlruShared shared = ctl->shared;
1460 int prevbank;
1461
1462 /* update the stats counter of truncates */
1464
1465 /*
1466 * Scan shared memory and remove any pages preceding the cutoff page, to
1467 * ensure we won't rewrite them later. (Since this is normally called in
1468 * or just after a checkpoint, any dirty pages should have been flushed
1469 * already ... we're just being extra careful here.)
1470 */
1471restart:
1472
1473 /*
1474 * An important safety check: the current endpoint page must not be
1475 * eligible for removal. This check is just a backstop against wraparound
1476 * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1477 * outdated value; therefore we don't add a memory barrier.
1478 */
1479 if (ctl->options.PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
1480 cutoffPage))
1481 {
1482 ereport(LOG,
1483 (errmsg("could not truncate directory \"%s\": apparent wraparound",
1484 ctl->options.Dir)));
1485 return;
1486 }
1487
1490 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1491 {
1493
1494 /*
1495 * If the current bank lock is not same as the previous bank lock then
1496 * release the previous lock and acquire the new lock.
1497 */
1498 if (curbank != prevbank)
1499 {
1502 prevbank = curbank;
1503 }
1504
1505 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1506 continue;
1507 if (!ctl->options.PagePrecedes(shared->page_number[slotno], cutoffPage))
1508 continue;
1509
1510 /*
1511 * If page is clean, just change state to EMPTY (expected case).
1512 */
1513 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1514 !shared->page_dirty[slotno])
1515 {
1517 continue;
1518 }
1519
1520 /*
1521 * Hmm, we have (or may have) I/O operations acting on the page, so
1522 * we've got to wait for them to finish and then start again. This is
1523 * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1524 * wouldn't it be OK to just discard it without writing it?
1525 * SlruMayDeleteSegment() uses a stricter qualification, so we might
1526 * not delete this page in the end; even if we don't delete it, we
1527 * won't have cause to read its data again. For now, keep the logic
1528 * the same as it was.)
1529 */
1530 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1532 else
1534
1536 goto restart;
1537 }
1538
1540
1541 /* Now we can remove the old segment(s) */
1543}
1544
1545/*
1546 * Delete an individual SLRU segment.
1547 *
1548 * NB: This does not touch the SLRU buffers themselves, callers have to ensure
1549 * they either can't yet contain anything, or have already been cleaned out.
1550 */
1551static void
1553{
1554 char path[MAXPGPATH];
1555
1556 /* Forget any fsync requests queued for this segment. */
1557 if (ctl->options.sync_handler != SYNC_HANDLER_NONE)
1558 {
1559 FileTag tag;
1560
1561 INIT_SLRUFILETAG(tag, ctl->options.sync_handler, segno);
1563 }
1564
1565 /* Unlink the file. */
1566 SlruFileName(ctl, path, segno);
1567 ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1568 unlink(path);
1569}
1570
1571/*
1572 * Delete an individual SLRU segment, identified by the segment number.
1573 */
1574void
1576{
1577 SlruShared shared = ctl->shared;
1578 int prevbank = SlotGetBankNumber(0);
1579 bool did_write;
1580
1581 /* Clean out any possibly existing references to the segment. */
1583restart:
1584 did_write = false;
1585 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1586 {
1589
1590 /*
1591 * If the current bank lock is not same as the previous bank lock then
1592 * release the previous lock and acquire the new lock.
1593 */
1594 if (curbank != prevbank)
1595 {
1598 prevbank = curbank;
1599 }
1600
1601 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1602 continue;
1603
1605 /* not the segment we're looking for */
1606 if (pagesegno != segno)
1607 continue;
1608
1609 /* If page is clean, just change state to EMPTY (expected case). */
1610 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1611 !shared->page_dirty[slotno])
1612 {
1614 continue;
1615 }
1616
1617 /* Same logic as SimpleLruTruncate() */
1618 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1620 else
1622
1623 did_write = true;
1624 }
1625
1626 /*
1627 * Be extra careful and re-check. The IO functions release the control
1628 * lock, so new pages could have been read in.
1629 */
1630 if (did_write)
1631 goto restart;
1632
1634
1636}
1637
1638/*
1639 * Determine whether a segment is okay to delete.
1640 *
1641 * segpage is the first page of the segment, and cutoffPage is the oldest (in
1642 * PagePrecedes order) page in the SLRU containing still-useful data. Since
1643 * every core PagePrecedes callback implements "wrap around", check the
1644 * segment's first and last pages:
1645 *
1646 * first<cutoff && last<cutoff: yes
1647 * first<cutoff && last>=cutoff: no; cutoff falls inside this segment
1648 * first>=cutoff && last<cutoff: no; wrap point falls inside this segment
1649 * first>=cutoff && last>=cutoff: no; every page of this segment is too young
1650 */
1651static bool
1653{
1655
1657
1658 return (ctl->options.PagePrecedes(segpage, cutoffPage) &&
1659 ctl->options.PagePrecedes(seg_last_page, cutoffPage));
1660}
1661
1662#ifdef USE_ASSERT_CHECKING
1663static void
1665{
1667 rhs;
1669 oldestPage;
1671 oldestXact;
1672
1673 /* This must be called after the Slru has been initialized */
1674 Assert(ctl->options.PagePrecedes);
1675
1676 /*
1677 * Compare an XID pair having undefined order (see RFC 1982), a pair at
1678 * "opposite ends" of the XID space. TransactionIdPrecedes() treats each
1679 * as preceding the other. If RHS is oldestXact, LHS is the first XID we
1680 * must not assign.
1681 */
1682 lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */
1683 rhs = lhs + (1U << 31);
1692 Assert(!ctl->options.PagePrecedes(lhs / per_page, lhs / per_page));
1693 Assert(!ctl->options.PagePrecedes(lhs / per_page, rhs / per_page));
1694 Assert(!ctl->options.PagePrecedes(rhs / per_page, lhs / per_page));
1695 Assert(!ctl->options.PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
1696 Assert(ctl->options.PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
1697 Assert(ctl->options.PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
1698 Assert(ctl->options.PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
1699 || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */
1700 Assert(ctl->options.PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
1701 || (1U << 31) % per_page != 0);
1702 Assert(ctl->options.PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
1703 Assert(ctl->options.PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
1704 Assert(!ctl->options.PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
1705
1706 /*
1707 * GetNewTransactionId() has assigned the last XID it can safely use, and
1708 * that XID is in the *LAST* page of the second segment. We must not
1709 * delete that segment.
1710 */
1712 newestXact = newestPage * per_page + offset;
1714 oldestXact = newestXact + 1;
1715 oldestXact -= 1U << 31;
1716 oldestPage = oldestXact / per_page;
1718 (newestPage -
1720 oldestPage));
1721
1722 /*
1723 * GetNewTransactionId() has assigned the last XID it can safely use, and
1724 * that XID is in the *FIRST* page of the second segment. We must not
1725 * delete that segment.
1726 */
1728 newestXact = newestPage * per_page + offset;
1730 oldestXact = newestXact + 1;
1731 oldestXact -= 1U << 31;
1732 oldestPage = oldestXact / per_page;
1734 (newestPage -
1736 oldestPage));
1737}
1738
1739/*
1740 * Unit-test a PagePrecedes function.
1741 *
1742 * This assumes every uint32 >= FirstNormalTransactionId is a valid key. It
1743 * assumes each value occupies a contiguous, fixed-size region of SLRU bytes.
1744 * (MultiXactMemberCtl separates flags from XIDs. NotifyCtl has
1745 * variable-length entries, no keys, and no random access. These unit tests
1746 * do not apply to them.)
1747 */
1748void
1750{
1751 /* Test first, middle and last entries of a page. */
1755}
1756#endif
1757
1758/*
1759 * SlruScanDirectory callback
1760 * This callback reports true if there's any segment wholly prior to the
1761 * one containing the page passed as "data".
1762 */
1763bool
1765 void *data)
1766{
1767 int64 cutoffPage = *(int64 *) data;
1768
1770 return true; /* found one; don't iterate any more */
1771
1772 return false; /* keep going */
1773}
1774
1775/*
1776 * SlruScanDirectory callback.
1777 * This callback deletes segments prior to the one passed in as "data".
1778 */
1779static bool
1781 void *data)
1782{
1783 int64 cutoffPage = *(int64 *) data;
1784
1787
1788 return false; /* keep going */
1789}
1790
1791/*
1792 * SlruScanDirectory callback.
1793 * This callback deletes all segments.
1794 */
1795bool
1797{
1799
1800 return false; /* keep going */
1801}
1802
1803/*
1804 * An internal function used by SlruScanDirectory().
1805 *
1806 * Returns true if a file with a name of a given length may be a correct
1807 * SLRU segment.
1808 */
1809static inline bool
1811{
1812 if (ctl->options.long_segment_names)
1813 return (len == 15); /* see SlruFileName() */
1814 else
1815
1816 /*
1817 * Commit 638cf09e76d allowed 5-character lengths. Later commit
1818 * 73c986adde5 allowed 6-character length.
1819 *
1820 * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1821 * numbers, and the corresponding 15-character file names, which may
1822 * eventually deprecate the support for 4, 5, and 6-character names.
1823 */
1824 return (len == 4 || len == 5 || len == 6);
1825}
1826
1827/*
1828 * Scan the SimpleLru directory and apply a callback to each file found in it.
1829 *
1830 * If the callback returns true, the scan is stopped. The last return value
1831 * from the callback is returned.
1832 *
1833 * The callback receives the following arguments: 1. the SlruCtl struct for the
1834 * slru being truncated; 2. the filename being considered; 3. the page number
1835 * for the first page of that file; 4. a pointer to the opaque data given to us
1836 * by the caller.
1837 *
1838 * Note that the ordering in which the directory is scanned is not guaranteed.
1839 *
1840 * Note that no locking is applied.
1841 */
1842bool
1844{
1845 bool retval = false;
1846 DIR *cldir;
1847 struct dirent *clde;
1848 int64 segno;
1849 int64 segpage;
1850
1851 cldir = AllocateDir(ctl->options.Dir);
1852 while ((clde = ReadDir(cldir, ctl->options.Dir)) != NULL)
1853 {
1854 size_t len;
1855
1856 len = strlen(clde->d_name);
1857
1859 strspn(clde->d_name, "0123456789ABCDEF") == len)
1860 {
1861 segno = strtoi64(clde->d_name, NULL, 16);
1863
1864 elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1865 ctl->options.Dir, clde->d_name);
1866 retval = callback(ctl, clde->d_name, segpage, data);
1867 if (retval)
1868 break;
1869 }
1870 }
1871 FreeDir(cldir);
1872
1873 return retval;
1874}
1875
1876/*
1877 * Individual SLRUs (clog, ...) have to provide a sync.c handler function so
1878 * that they can provide the correct "SlruCtl" (otherwise we don't know how to
1879 * build the path), but they just forward to this common implementation that
1880 * performs the fsync.
1881 */
1882int
1883SlruSyncFileTag(SlruDesc *ctl, const FileTag *ftag, char *path)
1884{
1885 int fd;
1886 int save_errno;
1887 int result;
1888
1889 SlruFileName(ctl, path, ftag->segno);
1890
1892 if (fd < 0)
1893 return -1;
1894
1896 result = pg_fsync(fd);
1898 save_errno = errno;
1899
1901
1902 errno = save_errno;
1903 return result;
1904}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:453
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
#define Min(x, y)
Definition c.h:1091
#define MAXALIGN(LEN)
Definition c.h:896
#define Max(x, y)
Definition c.h:1085
#define BUFFERALIGN(LEN)
Definition c.h:898
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
#define PG_BINARY
Definition c.h:1374
uint32_t uint32
Definition c.h:624
#define MemSet(start, val, len)
Definition c.h:1107
uint32 TransactionId
Definition c.h:736
size_t Size
Definition c.h:689
uint32 result
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int errcode_for_file_access(void)
Definition elog.c:897
#define LOG
Definition elog.h:32
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:30
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
int FreeDir(DIR *dir)
Definition fd.c:3009
int CloseTransientFile(int fd)
Definition fd.c:2855
void fsync_fname(const char *fname, bool isdir)
Definition fd.c:757
int data_sync_elevel(int elevel)
Definition fd.c:3986
DIR * AllocateDir(const char *dirname)
Definition fd.c:2891
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2957
int pg_fsync(int fd)
Definition fd.c:390
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
int NBuffers
Definition globals.c:144
#define newval
#define GUC_check_errdetail
Definition guc.h:507
int i
Definition isn.c:77
bool LWLockHeldByMe(LWLock *lock)
Definition lwlock.c:1885
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
int LWLockNewTrancheId(const char *name)
Definition lwlock.c:562
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1929
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1321
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
MemoryContext TopMemoryContext
Definition mcxt.c:166
#define START_CRIT_SECTION()
Definition miscadmin.h:152
#define END_CRIT_SECTION()
Definition miscadmin.h:154
static char * errmsg
#define NAMEDATALEN
#define MAXPGPATH
#define SLRU_PAGES_PER_SEGMENT
const void size_t len
const void * data
static char * filename
Definition pg_dumpall.c:133
static XLogRecPtr endpos
void pgstat_count_slru_blocks_zeroed(int slru_idx)
void pgstat_count_slru_blocks_hit(int slru_idx)
void pgstat_count_slru_truncate(int slru_idx)
void pgstat_count_slru_blocks_read(int slru_idx)
void pgstat_count_slru_blocks_written(int slru_idx)
void pgstat_count_slru_flush(int slru_idx)
void pgstat_count_slru_blocks_exists(int slru_idx)
PgStat_CheckpointerStats PendingCheckpointerStats
int pgstat_get_slru_index(const char *name)
#define pg_pwrite
Definition port.h:248
#define pg_pread
Definition port.h:247
#define snprintf
Definition port.h:260
static int fd(const char *x, int i)
tree ctl
Definition radixtree.h:1838
void ShmemRequestInternal(ShmemStructOpts *options, ShmemRequestKind kind)
Definition shmem.c:337
@ SHMEM_KIND_SLRU
static void SimpleLruZeroLSNs(SlruDesc *ctl, int slotno)
Definition slru.c:450
bool SlruScanDirectory(SlruDesc *ctl, SlruScanCallback callback, void *data)
Definition slru.c:1844
static bool SlruMayDeleteSegment(SlruDesc *ctl, int64 segpage, int64 cutoffPage)
Definition slru.c:1653
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
Definition slru.c:159
int SimpleLruReadPage_ReadOnly(SlruDesc *ctl, int64 pageno, const void *opaque_data)
Definition slru.c:654
static int SlruSelectLRUPage(SlruDesc *ctl, int64 pageno)
Definition slru.c:1219
#define SLRU_BANK_SIZE
Definition slru.c:146
static void SlruReportIOError(SlruDesc *ctl, int64 pageno, const void *opaque_data)
Definition slru.c:1097
int SimpleLruAutotuneBuffers(int divisor, int max)
Definition slru.c:235
bool SlruScanDirCbReportPresence(SlruDesc *ctl, char *filename, int64 segpage, void *data)
Definition slru.c:1765
static SlruErrorCause slru_errcause
Definition slru.c:177
static bool SlruScanDirCbDeleteCutoff(SlruDesc *ctl, char *filename, int64 segpage, void *data)
Definition slru.c:1781
static int SlruFileName(SlruDesc *ctl, char *path, int64 segno)
Definition slru.c:94
#define MAX_WRITEALL_BUFFERS
Definition slru.c:126
static bool SlruCorrectSegmentFilenameLength(SlruDesc *ctl, size_t len)
Definition slru.c:1811
static void SlruInternalDeleteSegment(SlruDesc *ctl, int64 segno)
Definition slru.c:1553
static bool SlruPhysicalWritePage(SlruDesc *ctl, int64 pageno, int slotno, SlruWriteAll fdata)
Definition slru.c:925
static int slru_errno
Definition slru.c:178
void shmem_slru_init(void *location, ShmemStructOpts *base_options)
Definition slru.c:267
void SimpleLruTruncate(SlruDesc *ctl, int64 cutoffPage)
Definition slru.c:1458
static bool SlruPhysicalReadPage(SlruDesc *ctl, int64 pageno, int slotno)
Definition slru.c:853
void SimpleLruZeroAndWritePage(SlruDesc *ctl, int64 pageno)
Definition slru.c:466
static void SimpleLruWaitIO(SlruDesc *ctl, int slotno)
Definition slru.c:492
static void SlruInternalWritePage(SlruDesc *ctl, int slotno, SlruWriteAll fdata)
Definition slru.c:701
void SlruDeleteSegment(SlruDesc *ctl, int64 segno)
Definition slru.c:1576
int SimpleLruZeroPage(SlruDesc *ctl, int64 pageno)
Definition slru.c:397
#define SlotGetBankNumber(slotno)
Definition slru.c:151
void shmem_slru_attach(void *location, ShmemStructOpts *base_options)
Definition slru.c:359
bool SimpleLruDoesPhysicalPageExist(SlruDesc *ctl, int64 pageno)
Definition slru.c:795
static Size SimpleLruShmemSize(int nslots, int nlsns)
Definition slru.c:202
SlruErrorCause
Definition slru.c:168
@ SLRU_WRITE_FAILED
Definition slru.c:172
@ SLRU_FSYNC_FAILED
Definition slru.c:173
@ SLRU_SEEK_FAILED
Definition slru.c:170
@ SLRU_OPEN_FAILED
Definition slru.c:169
@ SLRU_CLOSE_FAILED
Definition slru.c:174
@ SLRU_READ_FAILED
Definition slru.c:171
bool SlruScanDirCbDeleteAll(SlruDesc *ctl, char *filename, int64 segpage, void *data)
Definition slru.c:1797
void SimpleLruWritePage(SlruDesc *ctl, int slotno)
Definition slru.c:781
void SimpleLruWriteAll(SlruDesc *ctl, bool allow_redirtied)
Definition slru.c:1372
int SimpleLruReadPage(SlruDesc *ctl, int64 pageno, bool write_ok, const void *opaque_data)
Definition slru.c:550
static void SlruRecentlyUsed(SlruShared shared, int slotno)
Definition slru.c:1173
void SimpleLruRequestWithOpts(const SlruOpts *options)
Definition slru.c:246
bool check_slru_buffers(const char *name, int *newval)
Definition slru.c:377
int SlruSyncFileTag(SlruDesc *ctl, const FileTag *ftag, char *path)
Definition slru.c:1884
SlruSharedData * SlruShared
Definition slru.h:108
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition slru.h:233
bool(* SlruScanCallback)(SlruDesc *ctl, char *filename, int64 segpage, void *data)
Definition slru.h:238
#define SLRU_MAX_ALLOWED_BUFFERS
Definition slru.h:26
static LWLock * SimpleLruGetBankLock(SlruDesc *ctl, int64 pageno)
Definition slru.h:207
SlruPageStatus
Definition slru.h:35
@ SLRU_PAGE_VALID
Definition slru.h:38
@ SLRU_PAGE_WRITE_IN_PROGRESS
Definition slru.h:39
@ SLRU_PAGE_READ_IN_PROGRESS
Definition slru.h:37
@ SLRU_PAGE_EMPTY
Definition slru.h:36
int ckpt_slru_written
Definition xlog.h:180
Definition dirent.c:26
uint64 segno
Definition sync.h:55
PgStat_Counter slru_written
Definition pgstat.h:271
int slru_stats_idx
Definition slru.h:105
int64 * page_number
Definition slru.h:60
int num_slots
Definition slru.h:51
LWLockPadded * bank_locks
Definition slru.h:67
int * page_lru_count
Definition slru.h:61
pg_atomic_uint64 latest_page_number
Definition slru.h:102
XLogRecPtr * group_lsn
Definition slru.h:94
int * bank_cur_lru_count
Definition slru.h:84
int lsn_groups_per_page
Definition slru.h:95
SlruPageStatus * page_status
Definition slru.h:58
bool * page_dirty
Definition slru.h:59
LWLockPadded * buffer_locks
Definition slru.h:64
char ** page_buffer
Definition slru.h:57
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition sync.c:581
@ SYNC_HANDLER_NONE
Definition sync.h:42
@ SYNC_FORGET_REQUEST
Definition sync.h:27
@ SYNC_REQUEST
Definition sync.h:25
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
LWLock lock
Definition lwlock.h:70
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:67
static void pgstat_report_wait_end(void)
Definition wait_event.h:83
const char * name
CheckpointStatsData CheckpointStats
Definition xlog.c:216
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2801
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
uint64 XLogRecPtr
Definition xlogdefs.h:21
bool InRecovery
Definition xlogutils.c:50

◆ MAX_WRITEALL_BUFFERS

#define MAX_WRITEALL_BUFFERS   16

Definition at line 126 of file slru.c.

◆ SlotGetBankNumber

#define SlotGetBankNumber (   slotno)    ((slotno) >> SLRU_BANK_BITSHIFT)

Definition at line 151 of file slru.c.

◆ SLRU_BANK_BITSHIFT

#define SLRU_BANK_BITSHIFT   4

Definition at line 145 of file slru.c.

◆ SLRU_BANK_SIZE

#define SLRU_BANK_SIZE   (1 << SLRU_BANK_BITSHIFT)

Definition at line 146 of file slru.c.

Typedef Documentation

◆ SlruWriteAll

Definition at line 135 of file slru.c.

◆ SlruWriteAllData

Enumeration Type Documentation

◆ SlruErrorCause

Enumerator
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 167 of file slru.c.

Function Documentation

◆ check_slru_buffers()

bool check_slru_buffers ( const char name,
int newval 
)

Definition at line 377 of file slru.c.

378{
379 /* Valid values are multiples of SLRU_BANK_SIZE */
380 if (*newval % SLRU_BANK_SIZE == 0)
381 return true;
382
383 GUC_check_errdetail("\"%s\" must be a multiple of %d.", name,
385 return false;
386}

References GUC_check_errdetail, name, newval, and SLRU_BANK_SIZE.

Referenced by check_commit_ts_buffers(), check_multixact_member_buffers(), check_multixact_offset_buffers(), check_notify_buffers(), check_serial_buffers(), check_subtrans_buffers(), and check_transaction_buffers().

◆ shmem_slru_attach()

void shmem_slru_attach ( void location,
ShmemStructOpts base_options 
)

Definition at line 359 of file slru.c.

360{
362 SlruDesc *desc = (SlruDesc *) options->desc;
363 int nslots = options->nslots;
364 int nbanks = nslots / SLRU_BANK_SIZE;
365
366 desc->shared = (SlruShared) location;
367 desc->nbanks = nbanks;
368 memcpy(&desc->options, options, sizeof(SlruOpts));
369}

References fb(), memcpy(), SlruDesc::nbanks, SlruDesc::options, SlruDesc::shared, and SLRU_BANK_SIZE.

Referenced by AttachShmemIndexEntry().

◆ shmem_slru_init()

void shmem_slru_init ( void location,
ShmemStructOpts base_options 
)

Definition at line 267 of file slru.c.

268{
270 SlruDesc *desc = (SlruDesc *) options->desc;
271 char namebuf[NAMEDATALEN];
272 SlruShared shared;
273 int nslots = options->nslots;
274 int nbanks = nslots / SLRU_BANK_SIZE;
275 int nlsns = options->nlsns;
276 char *ptr;
277 Size offset;
278
279 shared = (SlruShared) location;
280 desc->shared = shared;
281 desc->nbanks = nbanks;
282 memcpy(&desc->options, options, sizeof(SlruOpts));
283
284 /* assign new tranche IDs, if not given */
285 if (desc->options.buffer_tranche_id == 0)
286 {
287 snprintf(namebuf, sizeof(namebuf), "%s buffer", desc->options.name);
288 desc->options.buffer_tranche_id = LWLockNewTrancheId(namebuf);
289 }
290 if (desc->options.bank_tranche_id == 0)
291 {
292 snprintf(namebuf, sizeof(namebuf), "%s bank", desc->options.name);
293 desc->options.bank_tranche_id = LWLockNewTrancheId(namebuf);
294 }
295
297
298 memset(shared, 0, sizeof(SlruSharedData));
299
300 shared->num_slots = nslots;
301 shared->lsn_groups_per_page = nlsns;
302
304
305 shared->slru_stats_idx = pgstat_get_slru_index(desc->options.name);
306
307 ptr = (char *) shared;
308 offset = MAXALIGN(sizeof(SlruSharedData));
309 shared->page_buffer = (char **) (ptr + offset);
310 offset += MAXALIGN(nslots * sizeof(char *));
311 shared->page_status = (SlruPageStatus *) (ptr + offset);
312 offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
313 shared->page_dirty = (bool *) (ptr + offset);
314 offset += MAXALIGN(nslots * sizeof(bool));
315 shared->page_number = (int64 *) (ptr + offset);
316 offset += MAXALIGN(nslots * sizeof(int64));
317 shared->page_lru_count = (int *) (ptr + offset);
318 offset += MAXALIGN(nslots * sizeof(int));
319
320 /* Initialize LWLocks */
321 shared->buffer_locks = (LWLockPadded *) (ptr + offset);
322 offset += MAXALIGN(nslots * sizeof(LWLockPadded));
323 shared->bank_locks = (LWLockPadded *) (ptr + offset);
324 offset += MAXALIGN(nbanks * sizeof(LWLockPadded));
325 shared->bank_cur_lru_count = (int *) (ptr + offset);
326 offset += MAXALIGN(nbanks * sizeof(int));
327
328 if (nlsns > 0)
329 {
330 shared->group_lsn = (XLogRecPtr *) (ptr + offset);
331 offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
332 }
333
334 ptr += BUFFERALIGN(offset);
335 for (int slotno = 0; slotno < nslots; slotno++)
336 {
338 desc->options.buffer_tranche_id);
339
340 shared->page_buffer[slotno] = ptr;
342 shared->page_dirty[slotno] = false;
343 shared->page_lru_count[slotno] = 0;
344 ptr += BLCKSZ;
345 }
346
347 /* Initialize the slot banks. */
348 for (int bankno = 0; bankno < nbanks; bankno++)
349 {
350 LWLockInitialize(&shared->bank_locks[bankno].lock, desc->options.bank_tranche_id);
351 shared->bank_cur_lru_count[bankno] = 0;
352 }
353
354 /* Should fit to estimated shmem size */
355 Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
356}

References Assert, SlruSharedData::bank_cur_lru_count, SlruSharedData::bank_locks, SlruOpts::bank_tranche_id, SlruSharedData::buffer_locks, SlruOpts::buffer_tranche_id, BUFFERALIGN, fb(), SlruSharedData::group_lsn, SlruSharedData::latest_page_number, LWLockPadded::lock, SlruSharedData::lsn_groups_per_page, LWLockInitialize(), LWLockNewTrancheId(), MAXALIGN, memcpy(), SlruOpts::name, NAMEDATALEN, SlruDesc::nbanks, SlruSharedData::num_slots, SlruDesc::options, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_init_u64(), pgstat_get_slru_index(), SlruDesc::shared, SimpleLruShmemSize(), SLRU_BANK_SIZE, SLRU_MAX_ALLOWED_BUFFERS, SLRU_PAGE_EMPTY, SlruSharedData::slru_stats_idx, and snprintf.

Referenced by InitShmemIndexEntry().

◆ SimpleLruAutotuneBuffers()

int SimpleLruAutotuneBuffers ( int  divisor,
int  max 
)

Definition at line 235 of file slru.c.

236{
237 return Min(max - (max % SLRU_BANK_SIZE),
240}

References fb(), Max, Min, NBuffers, and SLRU_BANK_SIZE.

Referenced by CLOGShmemBuffers(), CommitTsShmemBuffers(), and SUBTRANSShmemBuffers().

◆ SimpleLruDoesPhysicalPageExist()

bool SimpleLruDoesPhysicalPageExist ( SlruDesc ctl,
int64  pageno 
)

Definition at line 795 of file slru.c.

796{
797 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
798 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
799 int offset = rpageno * BLCKSZ;
800 char path[MAXPGPATH];
801 int fd;
802 bool result;
804
805 /* update the stats counter of checked pages */
806 pgstat_count_slru_blocks_exists(ctl->shared->slru_stats_idx);
807
808 SlruFileName(ctl, path, segno);
809
811 if (fd < 0)
812 {
813 /* expected: file doesn't exist */
814 if (errno == ENOENT)
815 return false;
816
817 /* report error normally */
820 SlruReportIOError(ctl, pageno, NULL);
821 }
822
823 if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
824 {
827 SlruReportIOError(ctl, pageno, NULL);
828 }
829
830 result = endpos >= (off_t) (offset + BLCKSZ);
831
832 if (CloseTransientFile(fd) != 0)
833 {
836 return false;
837 }
838
839 return result;
840}

References CloseTransientFile(), ctl, endpos, fb(), fd(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pgstat_count_slru_blocks_exists(), result, SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_SEEK_FAILED, SlruFileName(), and SlruReportIOError().

Referenced by ActivateCommitTs(), find_multixact_start(), and test_slru_page_exists().

◆ SimpleLruReadPage()

int SimpleLruReadPage ( SlruDesc ctl,
int64  pageno,
bool  write_ok,
const void opaque_data 
)

Definition at line 550 of file slru.c.

552{
553 SlruShared shared = ctl->shared;
555
557
558 /* Outer loop handles restart if we must wait for someone else's I/O */
559 for (;;)
560 {
561 int slotno;
562 bool ok;
563
564 /* See if page already is in memory; if not, pick victim slot */
565 slotno = SlruSelectLRUPage(ctl, pageno);
566
567 /* Did we find the page in memory? */
568 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
569 shared->page_number[slotno] == pageno)
570 {
571 /*
572 * If page is still being read in, we must wait for I/O. Likewise
573 * if the page is being written and the caller said that's not OK.
574 */
577 !write_ok))
578 {
580 /* Now we must recheck state from the top */
581 continue;
582 }
583 /* Otherwise, it's ready to use */
584 SlruRecentlyUsed(shared, slotno);
585
586 /* update the stats counter of pages found in the SLRU */
588
589 return slotno;
590 }
591
592 /* We found no match; assert we selected a freeable slot */
594 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
595 !shared->page_dirty[slotno]));
596
597 /* Mark the slot read-busy */
598 shared->page_number[slotno] = pageno;
600 shared->page_dirty[slotno] = false;
601
602 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
604
605 /* Release bank lock while doing I/O */
607
608 /* Do the read */
609 ok = SlruPhysicalReadPage(ctl, pageno, slotno);
610
611 /* Set the LSNs for this newly read-in page to zero */
613
614 /* Re-acquire bank control lock and update page state */
616
617 Assert(shared->page_number[slotno] == pageno &&
619 !shared->page_dirty[slotno]);
620
622
624
625 /* Now it's okay to ereport if we failed */
626 if (!ok)
628
629 SlruRecentlyUsed(shared, slotno);
630
631 /* update the stats counter of pages not found in SLRU */
633
634 return slotno;
635 }
636}

References Assert, SlruSharedData::buffer_locks, ctl, fb(), LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_blocks_hit(), pgstat_count_slru_blocks_read(), SimpleLruGetBankLock(), SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruSharedData::slru_stats_idx, SlruPhysicalReadPage(), SlruRecentlyUsed(), SlruReportIOError(), and SlruSelectLRUPage().

Referenced by AsyncNotifyFreezeXids(), asyncQueueAddEntries(), GetMultiXactIdMembers(), RecordNewMultiXact(), SerialAdd(), SetXidCommitTsInPage(), SimpleLruReadPage_ReadOnly(), SubTransSetParent(), test_slru_page_read(), TransactionIdSetPageStatusInternal(), TrimCLOG(), and TrimMultiXact().

◆ SimpleLruReadPage_ReadOnly()

int SimpleLruReadPage_ReadOnly ( SlruDesc ctl,
int64  pageno,
const void opaque_data 
)

Definition at line 654 of file slru.c.

655{
656 SlruShared shared = ctl->shared;
658 int bankno = pageno % ctl->nbanks;
661
662 /* Try to find the page while holding only shared lock */
664
665 /* See if page is already in a buffer */
666 for (int slotno = bankstart; slotno < bankend; slotno++)
667 {
668 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
669 shared->page_number[slotno] == pageno &&
671 {
672 /* See comments for SlruRecentlyUsed() */
673 SlruRecentlyUsed(shared, slotno);
674
675 /* update the stats counter of pages found in the SLRU */
677
678 return slotno;
679 }
680 }
681
682 /* No luck, so switch to normal exclusive lock and do regular read */
685
686 return SimpleLruReadPage(ctl, pageno, true, opaque_data);
687}

References ctl, fb(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_blocks_hit(), SimpleLruGetBankLock(), SimpleLruReadPage(), SLRU_BANK_SIZE, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SlruSharedData::slru_stats_idx, and SlruRecentlyUsed().

Referenced by asyncQueueProcessPageEntries(), find_multixact_start(), SerialGetMinConflictCommitSeqNo(), SubTransGetParent(), test_slru_page_readonly(), TransactionIdGetCommitTsData(), and TransactionIdGetStatus().

◆ SimpleLruRequestWithOpts()

void SimpleLruRequestWithOpts ( const SlruOpts options)

Definition at line 246 of file slru.c.

247{
249
250 Assert(options->name != NULL);
251 Assert(options->nslots > 0);
252 Assert(options->PagePrecedes != NULL);
253 Assert(options->errdetail_for_io_error != NULL);
254
256 sizeof(SlruOpts));
258
259 options_copy->base.name = options->name;
260 options_copy->base.size = SimpleLruShmemSize(options_copy->nslots, options_copy->nlsns);
261
263}

References Assert, fb(), memcpy(), MemoryContextAlloc(), SHMEM_KIND_SLRU, ShmemRequestInternal(), SimpleLruShmemSize(), and TopMemoryContext.

◆ SimpleLruShmemSize()

static Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)
static

Definition at line 202 of file slru.c.

203{
204 int nbanks = nslots / SLRU_BANK_SIZE;
205 Size sz;
206
208 Assert(nslots % SLRU_BANK_SIZE == 0);
209
210 /* we assume nslots isn't so large as to risk overflow */
211 sz = MAXALIGN(sizeof(SlruSharedData));
212 sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
213 sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
214 sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
215 sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
216 sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
217 sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
218 sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */
219 sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */
220
221 if (nlsns > 0)
222 sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
223
224 return BUFFERALIGN(sz) + BLCKSZ * nslots;
225}

References Assert, BUFFERALIGN, fb(), MAXALIGN, SLRU_BANK_SIZE, and SLRU_MAX_ALLOWED_BUFFERS.

Referenced by shmem_slru_init(), and SimpleLruRequestWithOpts().

◆ SimpleLruTruncate()

void SimpleLruTruncate ( SlruDesc ctl,
int64  cutoffPage 
)

Definition at line 1458 of file slru.c.

1459{
1460 SlruShared shared = ctl->shared;
1461 int prevbank;
1462
1463 /* update the stats counter of truncates */
1465
1466 /*
1467 * Scan shared memory and remove any pages preceding the cutoff page, to
1468 * ensure we won't rewrite them later. (Since this is normally called in
1469 * or just after a checkpoint, any dirty pages should have been flushed
1470 * already ... we're just being extra careful here.)
1471 */
1472restart:
1473
1474 /*
1475 * An important safety check: the current endpoint page must not be
1476 * eligible for removal. This check is just a backstop against wraparound
1477 * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1478 * outdated value; therefore we don't add a memory barrier.
1479 */
1480 if (ctl->options.PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
1481 cutoffPage))
1482 {
1483 ereport(LOG,
1484 (errmsg("could not truncate directory \"%s\": apparent wraparound",
1485 ctl->options.Dir)));
1486 return;
1487 }
1488
1491 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1492 {
1494
1495 /*
1496 * If the current bank lock is not same as the previous bank lock then
1497 * release the previous lock and acquire the new lock.
1498 */
1499 if (curbank != prevbank)
1500 {
1503 prevbank = curbank;
1504 }
1505
1506 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1507 continue;
1508 if (!ctl->options.PagePrecedes(shared->page_number[slotno], cutoffPage))
1509 continue;
1510
1511 /*
1512 * If page is clean, just change state to EMPTY (expected case).
1513 */
1514 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1515 !shared->page_dirty[slotno])
1516 {
1518 continue;
1519 }
1520
1521 /*
1522 * Hmm, we have (or may have) I/O operations acting on the page, so
1523 * we've got to wait for them to finish and then start again. This is
1524 * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1525 * wouldn't it be OK to just discard it without writing it?
1526 * SlruMayDeleteSegment() uses a stricter qualification, so we might
1527 * not delete this page in the end; even if we don't delete it, we
1528 * won't have cause to read its data again. For now, keep the logic
1529 * the same as it was.)
1530 */
1531 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1533 else
1535
1537 goto restart;
1538 }
1539
1541
1542 /* Now we can remove the old segment(s) */
1544}

References SlruSharedData::bank_locks, ctl, ereport, errmsg, fb(), SlruSharedData::latest_page_number, LWLockPadded::lock, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_read_u64(), pgstat_count_slru_truncate(), SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), commit_ts_redo(), PerformMembersTruncation(), PerformOffsetsTruncation(), test_slru_page_truncate(), TruncateCLOG(), TruncateCommitTs(), and TruncateSUBTRANS().

◆ SimpleLruWaitIO()

static void SimpleLruWaitIO ( SlruDesc ctl,
int  slotno 
)
static

Definition at line 492 of file slru.c.

493{
494 SlruShared shared = ctl->shared;
496
498
499 /* See notes at top of file */
504
505 /*
506 * If the slot is still in an io-in-progress state, then either someone
507 * already started a new I/O on the slot, or a previous I/O failed and
508 * neglected to reset the page state. That shouldn't happen, really, but
509 * it seems worth a few extra cycles to check and recover from it. We can
510 * cheaply test for failure by seeing if the buffer lock is still held (we
511 * assume that transaction abort would release the lock).
512 */
515 {
517 {
518 /* indeed, the I/O must have failed */
521 else /* write_in_progress */
522 {
524 shared->page_dirty[slotno] = true;
525 }
527 }
528 }
529}

References Assert, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, ctl, fb(), LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruDeleteSegment(), SlruInternalWritePage(), and SlruSelectLRUPage().

◆ SimpleLruWriteAll()

void SimpleLruWriteAll ( SlruDesc ctl,
bool  allow_redirtied 
)

Definition at line 1372 of file slru.c.

1373{
1374 SlruShared shared = ctl->shared;
1376 int64 pageno = 0;
1377 int prevbank = SlotGetBankNumber(0);
1378 bool ok;
1379
1380 /* update the stats counter of flushes */
1382
1383 /*
1384 * Find and write dirty pages
1385 */
1386 fdata.num_files = 0;
1387
1389
1390 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1391 {
1393
1394 /*
1395 * If the current bank lock is not same as the previous bank lock then
1396 * release the previous lock and acquire the new lock.
1397 */
1398 if (curbank != prevbank)
1399 {
1402 prevbank = curbank;
1403 }
1404
1405 /* Do nothing if slot is unused */
1406 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1407 continue;
1408
1410
1411 /*
1412 * In some places (e.g. checkpoints), we cannot assert that the slot
1413 * is clean now, since another process might have re-dirtied it
1414 * already. That's okay.
1415 */
1417 shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1418 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1419 !shared->page_dirty[slotno]));
1420 }
1421
1423
1424 /*
1425 * Now close any files that were open
1426 */
1427 ok = true;
1428 for (int i = 0; i < fdata.num_files; i++)
1429 {
1430 if (CloseTransientFile(fdata.fd[i]) != 0)
1431 {
1433 slru_errno = errno;
1434 pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1435 ok = false;
1436 }
1437 }
1438 if (!ok)
1439 SlruReportIOError(ctl, pageno, NULL);
1440
1441 /* Ensure that directory entries for new files are on disk. */
1442 if (ctl->options.sync_handler != SYNC_HANDLER_NONE)
1443 fsync_fname(ctl->options.Dir, true);
1444}

References Assert, SlruSharedData::bank_locks, CloseTransientFile(), ctl, fb(), fsync_fname(), i, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pgstat_count_slru_flush(), SlotGetBankNumber, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruReportIOError(), and SYNC_HANDLER_NONE.

Referenced by CheckPointCLOG(), CheckPointCommitTs(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), find_multixact_start(), and test_slru_page_writeall().

◆ SimpleLruWritePage()

void SimpleLruWritePage ( SlruDesc ctl,
int  slotno 
)

Definition at line 781 of file slru.c.

782{
783 Assert(ctl->shared->page_status[slotno] != SLRU_PAGE_EMPTY);
784
786}

References Assert, ctl, fb(), SLRU_PAGE_EMPTY, and SlruInternalWritePage().

Referenced by SimpleLruZeroAndWritePage(), and test_slru_page_write().

◆ SimpleLruZeroAndWritePage()

void SimpleLruZeroAndWritePage ( SlruDesc ctl,
int64  pageno 
)

Definition at line 466 of file slru.c.

467{
468 int slotno;
469 LWLock *lock;
470
471 lock = SimpleLruGetBankLock(ctl, pageno);
473
474 /* Create and zero the page */
475 slotno = SimpleLruZeroPage(ctl, pageno);
476
477 /* Make sure it's written out */
479 Assert(!ctl->shared->page_dirty[slotno]);
480
481 LWLockRelease(lock);
482}

References Assert, ctl, fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SimpleLruGetBankLock(), SimpleLruWritePage(), and SimpleLruZeroPage().

Referenced by ActivateCommitTs(), BootStrapCLOG(), BootStrapMultiXact(), BootStrapSUBTRANS(), clog_redo(), commit_ts_redo(), and multixact_redo().

◆ SimpleLruZeroLSNs()

static void SimpleLruZeroLSNs ( SlruDesc ctl,
int  slotno 
)
static

Definition at line 450 of file slru.c.

451{
452 SlruShared shared = ctl->shared;
453
454 if (shared->lsn_groups_per_page > 0)
455 MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
456 shared->lsn_groups_per_page * sizeof(XLogRecPtr));
457}

References ctl, fb(), SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, and MemSet.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SimpleLruZeroPage()

int SimpleLruZeroPage ( SlruDesc ctl,
int64  pageno 
)

Definition at line 397 of file slru.c.

398{
399 SlruShared shared = ctl->shared;
400 int slotno;
401
403
404 /* Find a suitable buffer slot for the page */
405 slotno = SlruSelectLRUPage(ctl, pageno);
407 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
408 !shared->page_dirty[slotno]) ||
409 shared->page_number[slotno] == pageno);
410
411 /* Mark the slot as containing this page */
412 shared->page_number[slotno] = pageno;
414 shared->page_dirty[slotno] = true;
415 SlruRecentlyUsed(shared, slotno);
416
417 /* Set the buffer to zeroes */
418 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
419
420 /* Set the LSNs for this new page to zero */
422
423 /*
424 * Assume this page is now the latest active page.
425 *
426 * Note that because both this routine and SlruSelectLRUPage run with a
427 * SLRU bank lock held, it is not possible for this to be zeroing a page
428 * that SlruSelectLRUPage is going to evict simultaneously. Therefore,
429 * there's no memory barrier here.
430 */
431 pg_atomic_write_u64(&shared->latest_page_number, pageno);
432
433 /* update the stats counter of zeroed pages */
435
436 return slotno;
437}

References Assert, ctl, fb(), SlruSharedData::latest_page_number, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_write_u64(), pgstat_count_slru_blocks_zeroed(), SimpleLruGetBankLock(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruRecentlyUsed(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), ExtendCLOG(), ExtendCommitTs(), ExtendMultiXactMember(), ExtendMultiXactOffset(), ExtendSUBTRANS(), SerialAdd(), SimpleLruZeroAndWritePage(), StartupSUBTRANS(), test_slru_page_write(), and TrimMultiXact().

◆ SlruCorrectSegmentFilenameLength()

static bool SlruCorrectSegmentFilenameLength ( SlruDesc ctl,
size_t  len 
)
inlinestatic

Definition at line 1811 of file slru.c.

1812{
1813 if (ctl->options.long_segment_names)
1814 return (len == 15); /* see SlruFileName() */
1815 else
1816
1817 /*
1818 * Commit 638cf09e76d allowed 5-character lengths. Later commit
1819 * 73c986adde5 allowed 6-character length.
1820 *
1821 * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1822 * numbers, and the corresponding 15-character file names, which may
1823 * eventually deprecate the support for 4, 5, and 6-character names.
1824 */
1825 return (len == 4 || len == 5 || len == 6);
1826}

References ctl, and len.

Referenced by SlruScanDirectory().

◆ SlruDeleteSegment()

void SlruDeleteSegment ( SlruDesc ctl,
int64  segno 
)

Definition at line 1576 of file slru.c.

1577{
1578 SlruShared shared = ctl->shared;
1579 int prevbank = SlotGetBankNumber(0);
1580 bool did_write;
1581
1582 /* Clean out any possibly existing references to the segment. */
1584restart:
1585 did_write = false;
1586 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1587 {
1590
1591 /*
1592 * If the current bank lock is not same as the previous bank lock then
1593 * release the previous lock and acquire the new lock.
1594 */
1595 if (curbank != prevbank)
1596 {
1599 prevbank = curbank;
1600 }
1601
1602 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1603 continue;
1604
1606 /* not the segment we're looking for */
1607 if (pagesegno != segno)
1608 continue;
1609
1610 /* If page is clean, just change state to EMPTY (expected case). */
1611 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1612 !shared->page_dirty[slotno])
1613 {
1615 continue;
1616 }
1617
1618 /* Same logic as SimpleLruTruncate() */
1619 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1621 else
1623
1624 did_write = true;
1625 }
1626
1627 /*
1628 * Be extra careful and re-check. The IO functions release the control
1629 * lock, so new pages could have been read in.
1630 */
1631 if (did_write)
1632 goto restart;
1633
1635
1637}

References SlruSharedData::bank_locks, ctl, fb(), LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruWriteAllData::segno, SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruInternalWritePage().

Referenced by test_slru_page_delete().

◆ SlruFileName()

static int SlruFileName ( SlruDesc ctl,
char path,
int64  segno 
)
inlinestatic

Definition at line 94 of file slru.c.

95{
96 if (ctl->options.long_segment_names)
97 {
98 /*
99 * We could use 16 characters here but the disadvantage would be that
100 * the SLRU segments will be hard to distinguish from WAL segments.
101 *
102 * For this reason we use 15 characters. It is enough but also means
103 * that in the future we can't decrease SLRU_PAGES_PER_SEGMENT easily.
104 */
105 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
106 return snprintf(path, MAXPGPATH, "%s/%015" PRIX64, ctl->options.Dir, segno);
107 }
108 else
109 {
110 /*
111 * Despite the fact that %04X format string is used up to 24 bit
112 * integers are allowed. See SlruCorrectSegmentFilenameLength()
113 */
114 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
115 return snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->options.Dir,
116 (unsigned int) segno);
117 }
118}
#define INT64CONST(x)
Definition c.h:630

References Assert, ctl, fb(), INT64CONST, MAXPGPATH, and snprintf.

Referenced by SimpleLruDoesPhysicalPageExist(), SlruInternalDeleteSegment(), SlruPhysicalReadPage(), SlruPhysicalWritePage(), SlruReportIOError(), and SlruSyncFileTag().

◆ SlruInternalDeleteSegment()

static void SlruInternalDeleteSegment ( SlruDesc ctl,
int64  segno 
)
static

Definition at line 1553 of file slru.c.

1554{
1555 char path[MAXPGPATH];
1556
1557 /* Forget any fsync requests queued for this segment. */
1558 if (ctl->options.sync_handler != SYNC_HANDLER_NONE)
1559 {
1560 FileTag tag;
1561
1562 INIT_SLRUFILETAG(tag, ctl->options.sync_handler, segno);
1564 }
1565
1566 /* Unlink the file. */
1567 SlruFileName(ctl, path, segno);
1568 ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1569 unlink(path);
1570}

References ctl, DEBUG2, ereport, errmsg_internal(), fb(), INIT_SLRUFILETAG, MAXPGPATH, RegisterSyncRequest(), SlruWriteAllData::segno, SlruFileName(), SYNC_FORGET_REQUEST, and SYNC_HANDLER_NONE.

Referenced by SlruDeleteSegment(), SlruScanDirCbDeleteAll(), and SlruScanDirCbDeleteCutoff().

◆ SlruInternalWritePage()

static void SlruInternalWritePage ( SlruDesc ctl,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 701 of file slru.c.

702{
703 SlruShared shared = ctl->shared;
704 int64 pageno = shared->page_number[slotno];
706 bool ok;
707
710
711 /* If a write is in progress, wait for it to finish */
713 shared->page_number[slotno] == pageno)
714 {
716 }
717
718 /*
719 * Do nothing if page is not dirty, or if buffer no longer contains the
720 * same page we were called for.
721 */
722 if (!shared->page_dirty[slotno] ||
723 shared->page_status[slotno] != SLRU_PAGE_VALID ||
724 shared->page_number[slotno] != pageno)
725 return;
726
727 /*
728 * Mark the slot write-busy, and clear the dirtybit. After this point, a
729 * transaction status update on this page will mark it dirty again.
730 */
732 shared->page_dirty[slotno] = false;
733
734 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
736
737 /* Release bank lock while doing I/O */
739
740 /* Do the write */
742
743 /* If we failed, and we're in a flush, better close the files */
744 if (!ok && fdata)
745 {
746 for (int i = 0; i < fdata->num_files; i++)
748 }
749
750 /* Re-acquire bank lock and update page state */
752
753 Assert(shared->page_number[slotno] == pageno &&
755
756 /* If we failed to write, mark the page dirty again */
757 if (!ok)
758 shared->page_dirty[slotno] = true;
759
761
763
764 /* Now it's okay to ereport if we failed */
765 if (!ok)
766 SlruReportIOError(ctl, pageno, NULL);
767
768 /* If part of a checkpoint, count this as a SLRU buffer written. */
769 if (fdata)
770 {
773 }
774}

References Assert, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, CheckpointStats, CheckpointStatsData::ckpt_slru_written, CloseTransientFile(), ctl, fb(), i, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, PendingCheckpointerStats, SimpleLruGetBankLock(), SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, PgStat_CheckpointerStats::slru_written, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruTruncate(), SimpleLruWriteAll(), SimpleLruWritePage(), SlruDeleteSegment(), and SlruSelectLRUPage().

◆ SlruMayDeleteSegment()

static bool SlruMayDeleteSegment ( SlruDesc ctl,
int64  segpage,
int64  cutoffPage 
)
static

Definition at line 1653 of file slru.c.

1654{
1656
1658
1659 return (ctl->options.PagePrecedes(segpage, cutoffPage) &&
1660 ctl->options.PagePrecedes(seg_last_page, cutoffPage));
1661}

References Assert, ctl, fb(), and SLRU_PAGES_PER_SEGMENT.

Referenced by SlruScanDirCbDeleteCutoff(), and SlruScanDirCbReportPresence().

◆ SlruPhysicalReadPage()

static bool SlruPhysicalReadPage ( SlruDesc ctl,
int64  pageno,
int  slotno 
)
static

Definition at line 853 of file slru.c.

854{
855 SlruShared shared = ctl->shared;
856 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
857 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
858 off_t offset = rpageno * BLCKSZ;
859 char path[MAXPGPATH];
860 int fd;
861
862 SlruFileName(ctl, path, segno);
863
864 /*
865 * In a crash-and-restart situation, it's possible for us to receive
866 * commands to set the commit status of transactions whose bits are in
867 * already-truncated segments of the commit log (see notes in
868 * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
869 * where the file doesn't exist, and return zeroes instead.
870 */
872 if (fd < 0)
873 {
874 if (errno != ENOENT || !InRecovery)
875 {
878 return false;
879 }
880
881 ereport(LOG,
882 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
883 path)));
884 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
885 return true;
886 }
887
888 errno = 0;
890 if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
891 {
896 return false;
897 }
899
900 if (CloseTransientFile(fd) != 0)
901 {
904 return false;
905 }
906
907 return true;
908}

References CloseTransientFile(), ctl, ereport, errmsg, fb(), fd(), InRecovery, LOG, MAXPGPATH, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_pread, pgstat_report_wait_end(), pgstat_report_wait_start(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, and SlruFileName().

Referenced by SimpleLruReadPage().

◆ SlruPhysicalWritePage()

static bool SlruPhysicalWritePage ( SlruDesc ctl,
int64  pageno,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 925 of file slru.c.

926{
927 SlruShared shared = ctl->shared;
928 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
929 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
930 off_t offset = rpageno * BLCKSZ;
931 char path[MAXPGPATH];
932 int fd = -1;
933
934 /* update the stats counter of written pages */
936
937 /*
938 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
939 * write out data before associated WAL records. This is the same action
940 * performed during FlushBuffer() in the main buffer manager.
941 */
942 if (shared->group_lsn != NULL)
943 {
944 /*
945 * We must determine the largest async-commit LSN for the page. This
946 * is a bit tedious, but since this entire function is a slow path
947 * anyway, it seems better to do this here than to maintain a per-page
948 * LSN variable (which'd need an extra comparison in the
949 * transaction-commit path).
950 */
952 int lsnindex;
953
955 max_lsn = shared->group_lsn[lsnindex++];
956 for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
957 {
959
960 if (max_lsn < this_lsn)
962 }
963
965 {
966 /*
967 * As noted above, elog(ERROR) is not acceptable here, so if
968 * XLogFlush were to fail, we must PANIC. This isn't much of a
969 * restriction because XLogFlush is just about all critical
970 * section anyway, but let's make sure.
971 */
975 }
976 }
977
978 /*
979 * During a SimpleLruWriteAll, we may already have the desired file open.
980 */
981 if (fdata)
982 {
983 for (int i = 0; i < fdata->num_files; i++)
984 {
985 if (fdata->segno[i] == segno)
986 {
987 fd = fdata->fd[i];
988 break;
989 }
990 }
991 }
992
993 if (fd < 0)
994 {
995 /*
996 * If the file doesn't already exist, we should create it. It is
997 * possible for this to need to happen when writing a page that's not
998 * first in its segment; we assume the OS can cope with that. (Note:
999 * it might seem that it'd be okay to create files only when
1000 * SimpleLruZeroPage is called for the first page of a segment.
1001 * However, if after a crash and restart the REDO logic elects to
1002 * replay the log from a checkpoint before the latest one, then it's
1003 * possible that we will get commands to set transaction status of
1004 * transactions that have already been truncated from the commit log.
1005 * Easiest way to deal with that is to accept references to
1006 * nonexistent files here and in SlruPhysicalReadPage.)
1007 *
1008 * Note: it is possible for more than one backend to be executing this
1009 * code simultaneously for different pages of the same file. Hence,
1010 * don't use O_EXCL or O_TRUNC or anything like that.
1011 */
1012 SlruFileName(ctl, path, segno);
1014 if (fd < 0)
1015 {
1017 slru_errno = errno;
1018 return false;
1019 }
1020
1021 if (fdata)
1022 {
1023 if (fdata->num_files < MAX_WRITEALL_BUFFERS)
1024 {
1025 fdata->fd[fdata->num_files] = fd;
1026 fdata->segno[fdata->num_files] = segno;
1027 fdata->num_files++;
1028 }
1029 else
1030 {
1031 /*
1032 * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
1033 * fall back to treating it as a standalone write.
1034 */
1035 fdata = NULL;
1036 }
1037 }
1038 }
1039
1040 errno = 0;
1042 if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
1043 {
1045 /* if write didn't set errno, assume problem is no disk space */
1046 if (errno == 0)
1047 errno = ENOSPC;
1049 slru_errno = errno;
1050 if (!fdata)
1052 return false;
1053 }
1055
1056 /* Queue up a sync request for the checkpointer. */
1057 if (ctl->options.sync_handler != SYNC_HANDLER_NONE)
1058 {
1059 FileTag tag;
1060
1061 INIT_SLRUFILETAG(tag, ctl->options.sync_handler, segno);
1062 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
1063 {
1064 /* No space to enqueue sync request. Do it synchronously. */
1066 if (pg_fsync(fd) != 0)
1067 {
1070 slru_errno = errno;
1072 return false;
1073 }
1075 }
1076 }
1077
1078 /* Close file, unless part of flush request. */
1079 if (!fdata)
1080 {
1081 if (CloseTransientFile(fd) != 0)
1082 {
1084 slru_errno = errno;
1085 return false;
1086 }
1087 }
1088
1089 return true;
1090}

References CloseTransientFile(), ctl, END_CRIT_SECTION, fb(), fd(), SlruSharedData::group_lsn, i, INIT_SLRUFILETAG, SlruSharedData::lsn_groups_per_page, MAX_WRITEALL_BUFFERS, MAXPGPATH, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), pg_pwrite, pgstat_count_slru_blocks_written(), pgstat_report_wait_end(), pgstat_report_wait_start(), RegisterSyncRequest(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SLRU_WRITE_FAILED, SlruFileName(), START_CRIT_SECTION, SYNC_HANDLER_NONE, SYNC_REQUEST, XLogFlush(), and XLogRecPtrIsValid.

Referenced by SlruInternalWritePage().

◆ SlruRecentlyUsed()

static void SlruRecentlyUsed ( SlruShared  shared,
int  slotno 
)
inlinestatic

Definition at line 1173 of file slru.c.

1174{
1177
1179
1180 /*
1181 * The reason for the if-test is that there are often many consecutive
1182 * accesses to the same page (particularly the latest page). By
1183 * suppressing useless increments of bank_cur_lru_count, we reduce the
1184 * probability that old pages' counts will "wrap around" and make them
1185 * appear recently used.
1186 *
1187 * We allow this code to be executed concurrently by multiple processes
1188 * within SimpleLruReadPage_ReadOnly(). As long as int reads and writes
1189 * are atomic, this should not cause any completely-bogus values to enter
1190 * the computation. However, it is possible for either bank_cur_lru_count
1191 * or individual page_lru_count entries to be "reset" to lower values than
1192 * they should have, in case a process is delayed while it executes this
1193 * function. With care in SlruSelectLRUPage(), this does little harm, and
1194 * in any case the absolute worst possible consequence is a nonoptimal
1195 * choice of page to evict. The gain from allowing concurrent reads of
1196 * SLRU pages seems worth it.
1197 */
1198 if (new_lru_count != shared->page_lru_count[slotno])
1199 {
1202 }
1203}

References Assert, SlruSharedData::bank_cur_lru_count, fb(), SlruSharedData::page_lru_count, SlruSharedData::page_status, SlotGetBankNumber, and SLRU_PAGE_EMPTY.

Referenced by SimpleLruReadPage(), SimpleLruReadPage_ReadOnly(), and SimpleLruZeroPage().

◆ SlruReportIOError()

static void SlruReportIOError ( SlruDesc ctl,
int64  pageno,
const void opaque_data 
)
static

Definition at line 1097 of file slru.c.

1098{
1099 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
1100 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
1101 int offset = rpageno * BLCKSZ;
1102 char path[MAXPGPATH];
1103
1104 SlruFileName(ctl, path, segno);
1105 errno = slru_errno;
1106 switch (slru_errcause)
1107 {
1108 case SLRU_OPEN_FAILED:
1109 ereport(ERROR,
1111 errmsg("could not open file \"%s\": %m", path),
1112 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1113 break;
1114 case SLRU_SEEK_FAILED:
1115 ereport(ERROR,
1117 errmsg("could not seek in file \"%s\" to offset %d: %m",
1118 path, offset),
1119 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1120 break;
1121 case SLRU_READ_FAILED:
1122 if (errno)
1123 ereport(ERROR,
1125 errmsg("could not read from file \"%s\" at offset %d: %m",
1126 path, offset),
1127 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1128 else
1129 ereport(ERROR,
1130 (errmsg("could not read from file \"%s\" at offset %d: read too few bytes",
1131 path, offset),
1132 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1133 break;
1134 case SLRU_WRITE_FAILED:
1135 if (errno)
1136 ereport(ERROR,
1138 errmsg("Could not write to file \"%s\" at offset %d: %m",
1139 path, offset),
1140 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1141 else
1142 ereport(ERROR,
1143 (errmsg("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1144 path, offset),
1145 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1146 break;
1147 case SLRU_FSYNC_FAILED:
1150 errmsg("could not fsync file \"%s\": %m",
1151 path),
1152 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1153 break;
1154 case SLRU_CLOSE_FAILED:
1155 ereport(ERROR,
1157 errmsg("could not close file \"%s\": %m",
1158 path),
1159 opaque_data ? ctl->options.errdetail_for_io_error(opaque_data) : 0));
1160 break;
1161 default:
1162 /* can't get here, we trust */
1163 elog(ERROR, "unrecognized SimpleLru error cause: %d",
1164 (int) slru_errcause);
1165 break;
1166 }
1167}

References ctl, data_sync_elevel(), elog, ereport, errcode_for_file_access(), errmsg, ERROR, fb(), MAXPGPATH, SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName().

Referenced by SimpleLruDoesPhysicalPageExist(), SimpleLruReadPage(), SimpleLruWriteAll(), and SlruInternalWritePage().

◆ SlruScanDirCbDeleteAll()

bool SlruScanDirCbDeleteAll ( SlruDesc ctl,
char filename,
int64  segpage,
void data 
)

Definition at line 1797 of file slru.c.

1798{
1800
1801 return false; /* keep going */
1802}

References ctl, fb(), SLRU_PAGES_PER_SEGMENT, and SlruInternalDeleteSegment().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), and test_slru_scan_cb().

◆ SlruScanDirCbDeleteCutoff()

static bool SlruScanDirCbDeleteCutoff ( SlruDesc ctl,
char filename,
int64  segpage,
void data 
)
static

Definition at line 1781 of file slru.c.

1783{
1784 int64 cutoffPage = *(int64 *) data;
1785
1788
1789 return false; /* keep going */
1790}

References ctl, data, fb(), SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruMayDeleteSegment().

Referenced by SimpleLruTruncate().

◆ SlruScanDirCbReportPresence()

bool SlruScanDirCbReportPresence ( SlruDesc ctl,
char filename,
int64  segpage,
void data 
)

Definition at line 1765 of file slru.c.

1767{
1768 int64 cutoffPage = *(int64 *) data;
1769
1771 return true; /* found one; don't iterate any more */
1772
1773 return false; /* keep going */
1774}

References ctl, data, fb(), and SlruMayDeleteSegment().

Referenced by TruncateCLOG(), and TruncateCommitTs().

◆ SlruScanDirectory()

bool SlruScanDirectory ( SlruDesc ctl,
SlruScanCallback  callback,
void data 
)

Definition at line 1844 of file slru.c.

1845{
1846 bool retval = false;
1847 DIR *cldir;
1848 struct dirent *clde;
1849 int64 segno;
1850 int64 segpage;
1851
1852 cldir = AllocateDir(ctl->options.Dir);
1853 while ((clde = ReadDir(cldir, ctl->options.Dir)) != NULL)
1854 {
1855 size_t len;
1856
1857 len = strlen(clde->d_name);
1858
1860 strspn(clde->d_name, "0123456789ABCDEF") == len)
1861 {
1862 segno = strtoi64(clde->d_name, NULL, 16);
1864
1865 elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1866 ctl->options.Dir, clde->d_name);
1867 retval = callback(ctl, clde->d_name, segpage, data);
1868 if (retval)
1869 break;
1870 }
1871 }
1872 FreeDir(cldir);
1873
1874 return retval;
1875}

References AllocateDir(), callback(), ctl, data, DEBUG2, elog, fb(), FreeDir(), len, ReadDir(), SLRU_PAGES_PER_SEGMENT, and SlruCorrectSegmentFilenameLength().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), SimpleLruTruncate(), test_slru_delete_all(), TruncateCLOG(), and TruncateCommitTs().

◆ SlruSelectLRUPage()

static int SlruSelectLRUPage ( SlruDesc ctl,
int64  pageno 
)
static

Definition at line 1219 of file slru.c.

1220{
1221 SlruShared shared = ctl->shared;
1222
1223 /* Outer loop handles restart after I/O */
1224 for (;;)
1225 {
1226 int cur_count;
1227 int bestvalidslot = 0; /* keep compiler quiet */
1228 int best_valid_delta = -1;
1229 int64 best_valid_page_number = 0; /* keep compiler quiet */
1230 int bestinvalidslot = 0; /* keep compiler quiet */
1231 int best_invalid_delta = -1;
1232 int64 best_invalid_page_number = 0; /* keep compiler quiet */
1233 int bankno = pageno % ctl->nbanks;
1236
1238
1239 /* See if page already has a buffer assigned */
1240 for (int slotno = bankstart; slotno < bankend; slotno++)
1241 {
1242 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
1243 shared->page_number[slotno] == pageno)
1244 return slotno;
1245 }
1246
1247 /*
1248 * If we find any EMPTY slot, just select that one. Else choose a
1249 * victim page to replace. We normally take the least recently used
1250 * valid page, but we will never take the slot containing
1251 * latest_page_number, even if it appears least recently used. We
1252 * will select a slot that is already I/O busy only if there is no
1253 * other choice: a read-busy slot will not be least recently used once
1254 * the read finishes, and waiting for an I/O on a write-busy slot is
1255 * inferior to just picking some other slot. Testing shows the slot
1256 * we pick instead will often be clean, allowing us to begin a read at
1257 * once.
1258 *
1259 * Normally the page_lru_count values will all be different and so
1260 * there will be a well-defined LRU page. But since we allow
1261 * concurrent execution of SlruRecentlyUsed() within
1262 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1263 * acquire the same lru_count values. In that case we break ties by
1264 * choosing the furthest-back page.
1265 *
1266 * Notice that this next line forcibly advances cur_lru_count to a
1267 * value that is certainly beyond any value that will be in the
1268 * page_lru_count array after the loop finishes. This ensures that
1269 * the next execution of SlruRecentlyUsed will mark the page newly
1270 * used, even if it's for a page that has the current counter value.
1271 * That gets us back on the path to having good data when there are
1272 * multiple pages with the same lru_count.
1273 */
1274 cur_count = (shared->bank_cur_lru_count[bankno])++;
1275 for (int slotno = bankstart; slotno < bankend; slotno++)
1276 {
1277 int this_delta;
1279
1280 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1281 return slotno;
1282
1284 if (this_delta < 0)
1285 {
1286 /*
1287 * Clean up in case shared updates have caused cur_count
1288 * increments to get "lost". We back off the page counts,
1289 * rather than trying to increase cur_count, to avoid any
1290 * question of infinite loops or failure in the presence of
1291 * wrapped-around counts.
1292 */
1293 shared->page_lru_count[slotno] = cur_count;
1294 this_delta = 0;
1295 }
1296
1297 /*
1298 * If this page is the one most recently zeroed, don't consider it
1299 * an eviction candidate. See comments in SimpleLruZeroPage for an
1300 * explanation about the lack of a memory barrier here.
1301 */
1303 if (this_page_number ==
1305 continue;
1306
1307 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1308 {
1311 ctl->options.PagePrecedes(this_page_number,
1313 {
1317 }
1318 }
1319 else
1320 {
1323 ctl->options.PagePrecedes(this_page_number,
1325 {
1329 }
1330 }
1331 }
1332
1333 /*
1334 * If all pages (except possibly the latest one) are I/O busy, we'll
1335 * have to wait for an I/O to complete and then retry. In that
1336 * unhappy case, we choose to wait for the I/O on the least recently
1337 * used slot, on the assumption that it was likely initiated first of
1338 * all the I/Os in progress and may therefore finish first.
1339 */
1340 if (best_valid_delta < 0)
1341 {
1343 continue;
1344 }
1345
1346 /*
1347 * If the selected page is clean, we're set.
1348 */
1349 if (!shared->page_dirty[bestvalidslot])
1350 return bestvalidslot;
1351
1352 /*
1353 * Write the page.
1354 */
1356
1357 /*
1358 * Now loop back and try again. This is the easiest way of dealing
1359 * with corner cases such as the victim page being re-dirtied while we
1360 * wrote it.
1361 */
1362 }
1363}

References Assert, SlruSharedData::bank_cur_lru_count, ctl, fb(), SlruSharedData::latest_page_number, LWLockHeldByMe(), SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_read_u64(), SimpleLruGetBankLock(), SimpleLruWaitIO(), SLRU_BANK_SIZE, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SlruSyncFileTag()

int SlruSyncFileTag ( SlruDesc ctl,
const FileTag ftag,
char path 
)

Variable Documentation

◆ slru_errcause

◆ slru_errno