PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
slru.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "utils/guc.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruWriteAllData
 

Macros

#define MAX_WRITEALL_BUFFERS   16
 
#define SLRU_BANK_BITSHIFT   4
 
#define SLRU_BANK_SIZE   (1 << SLRU_BANK_BITSHIFT)
 
#define SlotGetBankNumber(slotno)   ((slotno) >> SLRU_BANK_BITSHIFT)
 
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
 

Typedefs

typedef struct SlruWriteAllData SlruWriteAllData
 
typedef struct SlruWriteAllDataSlruWriteAll
 

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED , SLRU_SEEK_FAILED , SLRU_READ_FAILED , SLRU_WRITE_FAILED ,
  SLRU_FSYNC_FAILED , SLRU_CLOSE_FAILED
}
 

Functions

static int SlruFileName (SlruCtl ctl, char *path, int64 segno)
 
static void SimpleLruZeroLSNs (SlruCtl ctl, int slotno)
 
static void SimpleLruWaitIO (SlruCtl ctl, int slotno)
 
static void SlruInternalWritePage (SlruCtl ctl, int slotno, SlruWriteAll fdata)
 
static bool SlruPhysicalReadPage (SlruCtl ctl, int64 pageno, int slotno)
 
static bool SlruPhysicalWritePage (SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
 
static void SlruReportIOError (SlruCtl ctl, int64 pageno, TransactionId xid)
 
static int SlruSelectLRUPage (SlruCtl ctl, int64 pageno)
 
static bool SlruScanDirCbDeleteCutoff (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
static void SlruInternalDeleteSegment (SlruCtl ctl, int64 segno)
 
static void SlruRecentlyUsed (SlruShared shared, int slotno)
 
Size SimpleLruShmemSize (int nslots, int nlsns)
 
int SimpleLruAutotuneBuffers (int divisor, int max)
 
void SimpleLruInit (SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
 
bool check_slru_buffers (const char *name, int *newval)
 
int SimpleLruZeroPage (SlruCtl ctl, int64 pageno)
 
int SimpleLruReadPage (SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
 
int SimpleLruReadPage_ReadOnly (SlruCtl ctl, int64 pageno, TransactionId xid)
 
void SimpleLruWritePage (SlruCtl ctl, int slotno)
 
bool SimpleLruDoesPhysicalPageExist (SlruCtl ctl, int64 pageno)
 
void SimpleLruWriteAll (SlruCtl ctl, bool allow_redirtied)
 
void SimpleLruTruncate (SlruCtl ctl, int64 cutoffPage)
 
void SlruDeleteSegment (SlruCtl ctl, int64 segno)
 
static bool SlruMayDeleteSegment (SlruCtl ctl, int64 segpage, int64 cutoffPage)
 
bool SlruScanDirCbReportPresence (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
bool SlruScanDirCbDeleteAll (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
static bool SlruCorrectSegmentFilenameLength (SlruCtl ctl, size_t len)
 
bool SlruScanDirectory (SlruCtl ctl, SlruScanCallback callback, void *data)
 
int SlruSyncFileTag (SlruCtl ctl, const FileTag *ftag, char *path)
 

Variables

static SlruErrorCause slru_errcause
 
static int slru_errno
 

Macro Definition Documentation

◆ INIT_SLRUFILETAG

#define INIT_SLRUFILETAG (   a,
  xx_handler,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = (xx_handler), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:73
Definition: sync.h:51

Definition at line 156 of file slru.c.

◆ MAX_WRITEALL_BUFFERS

#define MAX_WRITEALL_BUFFERS   16

Definition at line 123 of file slru.c.

◆ SlotGetBankNumber

#define SlotGetBankNumber (   slotno)    ((slotno) >> SLRU_BANK_BITSHIFT)

Definition at line 148 of file slru.c.

◆ SLRU_BANK_BITSHIFT

#define SLRU_BANK_BITSHIFT   4

Definition at line 142 of file slru.c.

◆ SLRU_BANK_SIZE

#define SLRU_BANK_SIZE   (1 << SLRU_BANK_BITSHIFT)

Definition at line 143 of file slru.c.

Typedef Documentation

◆ SlruWriteAll

typedef struct SlruWriteAllData* SlruWriteAll

Definition at line 132 of file slru.c.

◆ SlruWriteAllData

Enumeration Type Documentation

◆ SlruErrorCause

Enumerator
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 164 of file slru.c.

165{
SlruErrorCause
Definition: slru.c:165
@ SLRU_WRITE_FAILED
Definition: slru.c:169
@ SLRU_FSYNC_FAILED
Definition: slru.c:170
@ SLRU_SEEK_FAILED
Definition: slru.c:167
@ SLRU_OPEN_FAILED
Definition: slru.c:166
@ SLRU_CLOSE_FAILED
Definition: slru.c:171
@ SLRU_READ_FAILED
Definition: slru.c:168

Function Documentation

◆ check_slru_buffers()

bool check_slru_buffers ( const char *  name,
int *  newval 
)

Definition at line 354 of file slru.c.

355{
356 /* Valid values are multiples of SLRU_BANK_SIZE */
357 if (*newval % SLRU_BANK_SIZE == 0)
358 return true;
359
360 GUC_check_errdetail("\"%s\" must be a multiple of %d.", name,
362 return false;
363}
#define newval
#define GUC_check_errdetail
Definition: guc.h:481
#define SLRU_BANK_SIZE
Definition: slru.c:143
const char * name

References GUC_check_errdetail, name, newval, and SLRU_BANK_SIZE.

Referenced by check_commit_ts_buffers(), check_multixact_member_buffers(), check_multixact_offset_buffers(), check_notify_buffers(), check_serial_buffers(), check_subtrans_buffers(), and check_transaction_buffers().

◆ SimpleLruAutotuneBuffers()

int SimpleLruAutotuneBuffers ( int  divisor,
int  max 
)

Definition at line 231 of file slru.c.

232{
233 return Min(max - (max % SLRU_BANK_SIZE),
235 NBuffers / divisor - (NBuffers / divisor) % SLRU_BANK_SIZE));
236}
#define Min(x, y)
Definition: c.h:975
#define Max(x, y)
Definition: c.h:969
int NBuffers
Definition: globals.c:143

References Max, Min, NBuffers, and SLRU_BANK_SIZE.

Referenced by CLOGShmemBuffers(), CommitTsShmemBuffers(), and SUBTRANSShmemBuffers().

◆ SimpleLruDoesPhysicalPageExist()

bool SimpleLruDoesPhysicalPageExist ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 745 of file slru.c.

746{
747 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
748 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
749 int offset = rpageno * BLCKSZ;
750 char path[MAXPGPATH];
751 int fd;
752 bool result;
753 off_t endpos;
754
755 /* update the stats counter of checked pages */
756 pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
757
758 SlruFileName(ctl, path, segno);
759
760 fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
761 if (fd < 0)
762 {
763 /* expected: file doesn't exist */
764 if (errno == ENOENT)
765 return false;
766
767 /* report error normally */
769 slru_errno = errno;
770 SlruReportIOError(ctl, pageno, 0);
771 }
772
773 if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
774 {
776 slru_errno = errno;
777 SlruReportIOError(ctl, pageno, 0);
778 }
779
780 result = endpos >= (off_t) (offset + BLCKSZ);
781
782 if (CloseTransientFile(fd) != 0)
783 {
785 slru_errno = errno;
786 return false;
787 }
788
789 return result;
790}
int64_t int64
Definition: c.h:499
#define PG_BINARY
Definition: c.h:1244
int CloseTransientFile(int fd)
Definition: fd.c:2871
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2694
#define MAXPGPATH
static XLogRecPtr endpos
Definition: pg_receivewal.c:56
void pgstat_count_slru_page_exists(int slru_idx)
Definition: pgstat_slru.c:71
static int fd(const char *x, int i)
Definition: preproc-init.c:105
tree ctl
Definition: radixtree.h:1838
static int SlruFileName(SlruCtl ctl, char *path, int64 segno)
Definition: slru.c:91
static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:1047
static SlruErrorCause slru_errcause
Definition: slru.c:174
static int slru_errno
Definition: slru.c:175
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:39

References CloseTransientFile(), ctl, endpos, fd(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pgstat_count_slru_page_exists(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_SEEK_FAILED, SlruFileName(), and SlruReportIOError().

Referenced by ActivateCommitTs(), find_multixact_start(), MaybeExtendOffsetSlru(), and test_slru_page_exists().

◆ SimpleLruInit()

void SimpleLruInit ( SlruCtl  ctl,
const char *  name,
int  nslots,
int  nlsns,
const char *  subdir,
int  buffer_tranche_id,
int  bank_tranche_id,
SyncRequestHandler  sync_handler,
bool  long_segment_names 
)

Definition at line 251 of file slru.c.

254{
255 SlruShared shared;
256 bool found;
257 int nbanks = nslots / SLRU_BANK_SIZE;
258
260
262 SimpleLruShmemSize(nslots, nlsns),
263 &found);
264
266 {
267 /* Initialize locks and shared memory area */
268 char *ptr;
269 Size offset;
270
271 Assert(!found);
272
273 memset(shared, 0, sizeof(SlruSharedData));
274
275 shared->num_slots = nslots;
276 shared->lsn_groups_per_page = nlsns;
277
279
281
282 ptr = (char *) shared;
283 offset = MAXALIGN(sizeof(SlruSharedData));
284 shared->page_buffer = (char **) (ptr + offset);
285 offset += MAXALIGN(nslots * sizeof(char *));
286 shared->page_status = (SlruPageStatus *) (ptr + offset);
287 offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
288 shared->page_dirty = (bool *) (ptr + offset);
289 offset += MAXALIGN(nslots * sizeof(bool));
290 shared->page_number = (int64 *) (ptr + offset);
291 offset += MAXALIGN(nslots * sizeof(int64));
292 shared->page_lru_count = (int *) (ptr + offset);
293 offset += MAXALIGN(nslots * sizeof(int));
294
295 /* Initialize LWLocks */
296 shared->buffer_locks = (LWLockPadded *) (ptr + offset);
297 offset += MAXALIGN(nslots * sizeof(LWLockPadded));
298 shared->bank_locks = (LWLockPadded *) (ptr + offset);
299 offset += MAXALIGN(nbanks * sizeof(LWLockPadded));
300 shared->bank_cur_lru_count = (int *) (ptr + offset);
301 offset += MAXALIGN(nbanks * sizeof(int));
302
303 if (nlsns > 0)
304 {
305 shared->group_lsn = (XLogRecPtr *) (ptr + offset);
306 offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
307 }
308
309 ptr += BUFFERALIGN(offset);
310 for (int slotno = 0; slotno < nslots; slotno++)
311 {
312 LWLockInitialize(&shared->buffer_locks[slotno].lock,
313 buffer_tranche_id);
314
315 shared->page_buffer[slotno] = ptr;
316 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
317 shared->page_dirty[slotno] = false;
318 shared->page_lru_count[slotno] = 0;
319 ptr += BLCKSZ;
320 }
321
322 /* Initialize the slot banks. */
323 for (int bankno = 0; bankno < nbanks; bankno++)
324 {
325 LWLockInitialize(&shared->bank_locks[bankno].lock, bank_tranche_id);
326 shared->bank_cur_lru_count[bankno] = 0;
327 }
328
329 /* Should fit to estimated shmem size */
330 Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
331 }
332 else
333 {
334 Assert(found);
335 Assert(shared->num_slots == nslots);
336 }
337
338 /*
339 * Initialize the unshared control struct, including directory path. We
340 * assume caller set PagePrecedes.
341 */
342 ctl->shared = shared;
343 ctl->sync_handler = sync_handler;
344 ctl->long_segment_names = long_segment_names;
345 ctl->nbanks = nbanks;
346 strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
347}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:453
#define MAXALIGN(LEN)
Definition: c.h:782
#define BUFFERALIGN(LEN)
Definition: c.h:784
size_t Size
Definition: c.h:576
bool IsUnderPostmaster
Definition: globals.c:121
Assert(PointerIsAligned(start, uint64))
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:721
int pgstat_get_slru_index(const char *name)
Definition: pgstat_slru.c:132
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:198
SlruSharedData * SlruShared
Definition: slru.h:121
#define SLRU_MAX_ALLOWED_BUFFERS
Definition: slru.h:24
SlruPageStatus
Definition: slru.h:48
@ SLRU_PAGE_EMPTY
Definition: slru.h:49
int slru_stats_idx
Definition: slru.h:118
int64 * page_number
Definition: slru.h:73
int num_slots
Definition: slru.h:64
LWLockPadded * bank_locks
Definition: slru.h:80
int * page_lru_count
Definition: slru.h:74
pg_atomic_uint64 latest_page_number
Definition: slru.h:115
XLogRecPtr * group_lsn
Definition: slru.h:107
int * bank_cur_lru_count
Definition: slru.h:97
int lsn_groups_per_page
Definition: slru.h:108
SlruPageStatus * page_status
Definition: slru.h:71
bool * page_dirty
Definition: slru.h:72
LWLockPadded * buffer_locks
Definition: slru.h:77
char ** page_buffer
Definition: slru.h:70
LWLock lock
Definition: lwlock.h:70
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), SlruSharedData::bank_cur_lru_count, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, BUFFERALIGN, ctl, SlruSharedData::group_lsn, IsUnderPostmaster, SlruSharedData::latest_page_number, LWLockPadded::lock, SlruSharedData::lsn_groups_per_page, LWLockInitialize(), MAXALIGN, name, SlruSharedData::num_slots, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_init_u64(), pgstat_get_slru_index(), ShmemInitStruct(), SimpleLruShmemSize(), SLRU_BANK_SIZE, SLRU_MAX_ALLOWED_BUFFERS, SLRU_PAGE_EMPTY, SlruSharedData::slru_stats_idx, and strlcpy().

Referenced by AsyncShmemInit(), CLOGShmemInit(), CommitTsShmemInit(), MultiXactShmemInit(), SerialInit(), SUBTRANSShmemInit(), and test_slru_shmem_startup().

◆ SimpleLruReadPage()

int SimpleLruReadPage ( SlruCtl  ctl,
int64  pageno,
bool  write_ok,
TransactionId  xid 
)

Definition at line 501 of file slru.c.

503{
504 SlruShared shared = ctl->shared;
505 LWLock *banklock = SimpleLruGetBankLock(ctl, pageno);
506
508
509 /* Outer loop handles restart if we must wait for someone else's I/O */
510 for (;;)
511 {
512 int slotno;
513 bool ok;
514
515 /* See if page already is in memory; if not, pick victim slot */
516 slotno = SlruSelectLRUPage(ctl, pageno);
517
518 /* Did we find the page in memory? */
519 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
520 shared->page_number[slotno] == pageno)
521 {
522 /*
523 * If page is still being read in, we must wait for I/O. Likewise
524 * if the page is being written and the caller said that's not OK.
525 */
526 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
527 (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
528 !write_ok))
529 {
530 SimpleLruWaitIO(ctl, slotno);
531 /* Now we must recheck state from the top */
532 continue;
533 }
534 /* Otherwise, it's ready to use */
535 SlruRecentlyUsed(shared, slotno);
536
537 /* update the stats counter of pages found in the SLRU */
539
540 return slotno;
541 }
542
543 /* We found no match; assert we selected a freeable slot */
544 Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
545 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
546 !shared->page_dirty[slotno]));
547
548 /* Mark the slot read-busy */
549 shared->page_number[slotno] = pageno;
550 shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
551 shared->page_dirty[slotno] = false;
552
553 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
554 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
555
556 /* Release bank lock while doing I/O */
557 LWLockRelease(banklock);
558
559 /* Do the read */
560 ok = SlruPhysicalReadPage(ctl, pageno, slotno);
561
562 /* Set the LSNs for this newly read-in page to zero */
563 SimpleLruZeroLSNs(ctl, slotno);
564
565 /* Re-acquire bank control lock and update page state */
566 LWLockAcquire(banklock, LW_EXCLUSIVE);
567
568 Assert(shared->page_number[slotno] == pageno &&
569 shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
570 !shared->page_dirty[slotno]);
571
572 shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
573
574 LWLockRelease(&shared->buffer_locks[slotno].lock);
575
576 /* Now it's okay to ereport if we failed */
577 if (!ok)
578 SlruReportIOError(ctl, pageno, xid);
579
580 SlruRecentlyUsed(shared, slotno);
581
582 /* update the stats counter of pages not found in SLRU */
584
585 return slotno;
586 }
587}
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1182
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2014
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
@ LW_EXCLUSIVE
Definition: lwlock.h:114
void pgstat_count_slru_page_read(int slru_idx)
Definition: pgstat_slru.c:77
void pgstat_count_slru_page_hit(int slru_idx)
Definition: pgstat_slru.c:65
static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
Definition: slru.c:803
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:427
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:444
static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:1168
static void SlruRecentlyUsed(SlruShared shared, int slotno)
Definition: slru.c:1122
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition: slru.h:175
@ SLRU_PAGE_VALID
Definition: slru.h:51
@ SLRU_PAGE_WRITE_IN_PROGRESS
Definition: slru.h:52
@ SLRU_PAGE_READ_IN_PROGRESS
Definition: slru.h:50
Definition: lwlock.h:42

References Assert(), SlruSharedData::buffer_locks, ctl, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_hit(), pgstat_count_slru_page_read(), SimpleLruGetBankLock(), SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruSharedData::slru_stats_idx, SlruPhysicalReadPage(), SlruRecentlyUsed(), SlruReportIOError(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), GetMultiXactIdMembers(), RecordNewMultiXact(), SerialAdd(), SetXidCommitTsInPage(), SimpleLruReadPage_ReadOnly(), SubTransSetParent(), test_slru_page_read(), TransactionIdSetPageStatusInternal(), TrimCLOG(), and TrimMultiXact().

◆ SimpleLruReadPage_ReadOnly()

int SimpleLruReadPage_ReadOnly ( SlruCtl  ctl,
int64  pageno,
TransactionId  xid 
)

Definition at line 604 of file slru.c.

605{
606 SlruShared shared = ctl->shared;
607 LWLock *banklock = SimpleLruGetBankLock(ctl, pageno);
608 int bankno = pageno % ctl->nbanks;
609 int bankstart = bankno * SLRU_BANK_SIZE;
610 int bankend = bankstart + SLRU_BANK_SIZE;
611
612 /* Try to find the page while holding only shared lock */
613 LWLockAcquire(banklock, LW_SHARED);
614
615 /* See if page is already in a buffer */
616 for (int slotno = bankstart; slotno < bankend; slotno++)
617 {
618 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
619 shared->page_number[slotno] == pageno &&
620 shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
621 {
622 /* See comments for SlruRecentlyUsed macro */
623 SlruRecentlyUsed(shared, slotno);
624
625 /* update the stats counter of pages found in the SLRU */
627
628 return slotno;
629 }
630 }
631
632 /* No luck, so switch to normal exclusive lock and do regular read */
633 LWLockRelease(banklock);
634 LWLockAcquire(banklock, LW_EXCLUSIVE);
635
636 return SimpleLruReadPage(ctl, pageno, true, xid);
637}
@ LW_SHARED
Definition: lwlock.h:115
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition: slru.c:501

References ctl, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_hit(), SimpleLruGetBankLock(), SimpleLruReadPage(), SLRU_BANK_SIZE, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SlruSharedData::slru_stats_idx, and SlruRecentlyUsed().

Referenced by asyncQueueReadAllNotifications(), find_multixact_start(), SerialGetMinConflictCommitSeqNo(), SubTransGetParent(), test_slru_page_readonly(), TransactionIdGetCommitTsData(), and TransactionIdGetStatus().

◆ SimpleLruShmemSize()

Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)

Definition at line 198 of file slru.c.

199{
200 int nbanks = nslots / SLRU_BANK_SIZE;
201 Size sz;
202
204 Assert(nslots % SLRU_BANK_SIZE == 0);
205
206 /* we assume nslots isn't so large as to risk overflow */
207 sz = MAXALIGN(sizeof(SlruSharedData));
208 sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
209 sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
210 sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
211 sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
212 sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
213 sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
214 sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */
215 sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */
216
217 if (nlsns > 0)
218 sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
219
220 return BUFFERALIGN(sz) + BLCKSZ * nslots;
221}

References Assert(), BUFFERALIGN, MAXALIGN, SLRU_BANK_SIZE, and SLRU_MAX_ALLOWED_BUFFERS.

Referenced by AsyncShmemSize(), CLOGShmemSize(), CommitTsShmemSize(), MultiXactShmemSize(), PredicateLockShmemSize(), SimpleLruInit(), SUBTRANSShmemSize(), and test_slru_shmem_request().

◆ SimpleLruTruncate()

void SimpleLruTruncate ( SlruCtl  ctl,
int64  cutoffPage 
)

Definition at line 1407 of file slru.c.

1408{
1409 SlruShared shared = ctl->shared;
1410 int prevbank;
1411
1412 /* update the stats counter of truncates */
1414
1415 /*
1416 * Scan shared memory and remove any pages preceding the cutoff page, to
1417 * ensure we won't rewrite them later. (Since this is normally called in
1418 * or just after a checkpoint, any dirty pages should have been flushed
1419 * already ... we're just being extra careful here.)
1420 */
1421restart:
1422
1423 /*
1424 * An important safety check: the current endpoint page must not be
1425 * eligible for removal. This check is just a backstop against wraparound
1426 * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1427 * outdated value; therefore we don't add a memory barrier.
1428 */
1429 if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
1430 cutoffPage))
1431 {
1432 ereport(LOG,
1433 (errmsg("could not truncate directory \"%s\": apparent wraparound",
1434 ctl->Dir)));
1435 return;
1436 }
1437
1438 prevbank = SlotGetBankNumber(0);
1439 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1440 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1441 {
1442 int curbank = SlotGetBankNumber(slotno);
1443
1444 /*
1445 * If the current bank lock is not same as the previous bank lock then
1446 * release the previous lock and acquire the new lock.
1447 */
1448 if (curbank != prevbank)
1449 {
1450 LWLockRelease(&shared->bank_locks[prevbank].lock);
1451 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1452 prevbank = curbank;
1453 }
1454
1455 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1456 continue;
1457 if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1458 continue;
1459
1460 /*
1461 * If page is clean, just change state to EMPTY (expected case).
1462 */
1463 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1464 !shared->page_dirty[slotno])
1465 {
1466 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1467 continue;
1468 }
1469
1470 /*
1471 * Hmm, we have (or may have) I/O operations acting on the page, so
1472 * we've got to wait for them to finish and then start again. This is
1473 * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1474 * wouldn't it be OK to just discard it without writing it?
1475 * SlruMayDeleteSegment() uses a stricter qualification, so we might
1476 * not delete this page in the end; even if we don't delete it, we
1477 * won't have cause to read its data again. For now, keep the logic
1478 * the same as it was.)
1479 */
1480 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1481 SlruInternalWritePage(ctl, slotno, NULL);
1482 else
1483 SimpleLruWaitIO(ctl, slotno);
1484
1485 LWLockRelease(&shared->bank_locks[prevbank].lock);
1486 goto restart;
1487 }
1488
1489 LWLockRelease(&shared->bank_locks[prevbank].lock);
1490
1491 /* Now we can remove the old segment(s) */
1492 (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1493}
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
void pgstat_count_slru_truncate(int slru_idx)
Definition: pgstat_slru.c:95
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
Definition: slru.c:651
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1790
#define SlotGetBankNumber(slotno)
Definition: slru.c:148
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1727

References SlruSharedData::bank_locks, ctl, ereport, errmsg(), SlruSharedData::latest_page_number, LWLockPadded::lock, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_read_u64(), pgstat_count_slru_truncate(), SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), commit_ts_redo(), PerformOffsetsTruncation(), test_slru_page_truncate(), TruncateCLOG(), TruncateCommitTs(), and TruncateSUBTRANS().

◆ SimpleLruWaitIO()

static void SimpleLruWaitIO ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 444 of file slru.c.

445{
446 SlruShared shared = ctl->shared;
447 int bankno = SlotGetBankNumber(slotno);
448
449 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
450
451 /* See notes at top of file */
452 LWLockRelease(&shared->bank_locks[bankno].lock);
453 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
454 LWLockRelease(&shared->buffer_locks[slotno].lock);
455 LWLockAcquire(&shared->bank_locks[bankno].lock, LW_EXCLUSIVE);
456
457 /*
458 * If the slot is still in an io-in-progress state, then either someone
459 * already started a new I/O on the slot, or a previous I/O failed and
460 * neglected to reset the page state. That shouldn't happen, really, but
461 * it seems worth a few extra cycles to check and recover from it. We can
462 * cheaply test for failure by seeing if the buffer lock is still held (we
463 * assume that transaction abort would release the lock).
464 */
465 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
466 shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
467 {
469 {
470 /* indeed, the I/O must have failed */
471 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
472 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
473 else /* write_in_progress */
474 {
475 shared->page_status[slotno] = SLRU_PAGE_VALID;
476 shared->page_dirty[slotno] = true;
477 }
478 LWLockRelease(&shared->buffer_locks[slotno].lock);
479 }
480 }
481}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1353

References Assert(), SlruSharedData::bank_locks, SlruSharedData::buffer_locks, ctl, LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruDeleteSegment(), SlruInternalWritePage(), and SlruSelectLRUPage().

◆ SimpleLruWriteAll()

void SimpleLruWriteAll ( SlruCtl  ctl,
bool  allow_redirtied 
)

Definition at line 1321 of file slru.c.

1322{
1323 SlruShared shared = ctl->shared;
1324 SlruWriteAllData fdata;
1325 int64 pageno = 0;
1326 int prevbank = SlotGetBankNumber(0);
1327 bool ok;
1328
1329 /* update the stats counter of flushes */
1331
1332 /*
1333 * Find and write dirty pages
1334 */
1335 fdata.num_files = 0;
1336
1337 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1338
1339 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1340 {
1341 int curbank = SlotGetBankNumber(slotno);
1342
1343 /*
1344 * If the current bank lock is not same as the previous bank lock then
1345 * release the previous lock and acquire the new lock.
1346 */
1347 if (curbank != prevbank)
1348 {
1349 LWLockRelease(&shared->bank_locks[prevbank].lock);
1350 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1351 prevbank = curbank;
1352 }
1353
1354 /* Do nothing if slot is unused */
1355 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1356 continue;
1357
1358 SlruInternalWritePage(ctl, slotno, &fdata);
1359
1360 /*
1361 * In some places (e.g. checkpoints), we cannot assert that the slot
1362 * is clean now, since another process might have re-dirtied it
1363 * already. That's okay.
1364 */
1365 Assert(allow_redirtied ||
1366 shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1367 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1368 !shared->page_dirty[slotno]));
1369 }
1370
1371 LWLockRelease(&shared->bank_locks[prevbank].lock);
1372
1373 /*
1374 * Now close any files that were open
1375 */
1376 ok = true;
1377 for (int i = 0; i < fdata.num_files; i++)
1378 {
1379 if (CloseTransientFile(fdata.fd[i]) != 0)
1380 {
1382 slru_errno = errno;
1383 pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1384 ok = false;
1385 }
1386 }
1387 if (!ok)
1389
1390 /* Ensure that directory entries for new files are on disk. */
1391 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1392 fsync_fname(ctl->Dir, true);
1393}
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int i
Definition: isn.c:77
void pgstat_count_slru_flush(int slru_idx)
Definition: pgstat_slru.c:89
int num_files
Definition: slru.c:127
int fd[MAX_WRITEALL_BUFFERS]
Definition: slru.c:128
int64 segno[MAX_WRITEALL_BUFFERS]
Definition: slru.c:129
@ SYNC_HANDLER_NONE
Definition: sync.h:42
#define InvalidTransactionId
Definition: transam.h:31

References Assert(), SlruSharedData::bank_locks, CloseTransientFile(), ctl, SlruWriteAllData::fd, fsync_fname(), i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruWriteAllData::num_files, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pgstat_count_slru_flush(), SlruWriteAllData::segno, SlotGetBankNumber, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruReportIOError(), and SYNC_HANDLER_NONE.

Referenced by CheckPointCLOG(), CheckPointCommitTs(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), find_multixact_start(), and test_slru_page_writeall().

◆ SimpleLruWritePage()

void SimpleLruWritePage ( SlruCtl  ctl,
int  slotno 
)

◆ SimpleLruZeroLSNs()

static void SimpleLruZeroLSNs ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 427 of file slru.c.

428{
429 SlruShared shared = ctl->shared;
430
431 if (shared->lsn_groups_per_page > 0)
432 MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
433 shared->lsn_groups_per_page * sizeof(XLogRecPtr));
434}
#define MemSet(start, val, len)
Definition: c.h:991

References ctl, SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, and MemSet.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SimpleLruZeroPage()

int SimpleLruZeroPage ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 374 of file slru.c.

375{
376 SlruShared shared = ctl->shared;
377 int slotno;
378
380
381 /* Find a suitable buffer slot for the page */
382 slotno = SlruSelectLRUPage(ctl, pageno);
383 Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
384 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
385 !shared->page_dirty[slotno]) ||
386 shared->page_number[slotno] == pageno);
387
388 /* Mark the slot as containing this page */
389 shared->page_number[slotno] = pageno;
390 shared->page_status[slotno] = SLRU_PAGE_VALID;
391 shared->page_dirty[slotno] = true;
392 SlruRecentlyUsed(shared, slotno);
393
394 /* Set the buffer to zeroes */
395 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
396
397 /* Set the LSNs for this new page to zero */
398 SimpleLruZeroLSNs(ctl, slotno);
399
400 /*
401 * Assume this page is now the latest active page.
402 *
403 * Note that because both this routine and SlruSelectLRUPage run with
404 * ControlLock held, it is not possible for this to be zeroing a page that
405 * SlruSelectLRUPage is going to evict simultaneously. Therefore, there's
406 * no memory barrier here.
407 */
408 pg_atomic_write_u64(&shared->latest_page_number, pageno);
409
410 /* update the stats counter of zeroed pages */
412
413 return slotno;
414}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
void pgstat_count_slru_page_zeroed(int slru_idx)
Definition: pgstat_slru.c:59

References Assert(), ctl, SlruSharedData::latest_page_number, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_write_u64(), pgstat_count_slru_page_zeroed(), SimpleLruGetBankLock(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruRecentlyUsed(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), SerialAdd(), test_slru_page_write(), ZeroCLOGPage(), ZeroCommitTsPage(), ZeroMultiXactMemberPage(), ZeroMultiXactOffsetPage(), and ZeroSUBTRANSPage().

◆ SlruCorrectSegmentFilenameLength()

static bool SlruCorrectSegmentFilenameLength ( SlruCtl  ctl,
size_t  len 
)
inlinestatic

Definition at line 1757 of file slru.c.

1758{
1759 if (ctl->long_segment_names)
1760 return (len == 15); /* see SlruFileName() */
1761 else
1762
1763 /*
1764 * Commit 638cf09e76d allowed 5-character lengths. Later commit
1765 * 73c986adde5 allowed 6-character length.
1766 *
1767 * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1768 * numbers, and the corresponding 15-character file names, which may
1769 * eventually deprecate the support for 4, 5, and 6-character names.
1770 */
1771 return (len == 4 || len == 5 || len == 6);
1772}
const void size_t len

References ctl, and len.

Referenced by SlruScanDirectory().

◆ SlruDeleteSegment()

void SlruDeleteSegment ( SlruCtl  ctl,
int64  segno 
)

Definition at line 1525 of file slru.c.

1526{
1527 SlruShared shared = ctl->shared;
1528 int prevbank = SlotGetBankNumber(0);
1529 bool did_write;
1530
1531 /* Clean out any possibly existing references to the segment. */
1532 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1533restart:
1534 did_write = false;
1535 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1536 {
1537 int64 pagesegno;
1538 int curbank = SlotGetBankNumber(slotno);
1539
1540 /*
1541 * If the current bank lock is not same as the previous bank lock then
1542 * release the previous lock and acquire the new lock.
1543 */
1544 if (curbank != prevbank)
1545 {
1546 LWLockRelease(&shared->bank_locks[prevbank].lock);
1547 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1548 prevbank = curbank;
1549 }
1550
1551 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1552 continue;
1553
1554 pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1555 /* not the segment we're looking for */
1556 if (pagesegno != segno)
1557 continue;
1558
1559 /* If page is clean, just change state to EMPTY (expected case). */
1560 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1561 !shared->page_dirty[slotno])
1562 {
1563 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1564 continue;
1565 }
1566
1567 /* Same logic as SimpleLruTruncate() */
1568 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1569 SlruInternalWritePage(ctl, slotno, NULL);
1570 else
1571 SimpleLruWaitIO(ctl, slotno);
1572
1573 did_write = true;
1574 }
1575
1576 /*
1577 * Be extra careful and re-check. The IO functions release the control
1578 * lock, so new pages could have been read in.
1579 */
1580 if (did_write)
1581 goto restart;
1582
1584
1585 LWLockRelease(&shared->bank_locks[prevbank].lock);
1586}
static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno)
Definition: slru.c:1502

References SlruSharedData::bank_locks, ctl, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruWriteAllData::segno, SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruInternalWritePage().

Referenced by PerformMembersTruncation(), and test_slru_page_delete().

◆ SlruFileName()

static int SlruFileName ( SlruCtl  ctl,
char *  path,
int64  segno 
)
inlinestatic

Definition at line 91 of file slru.c.

92{
93 if (ctl->long_segment_names)
94 {
95 /*
96 * We could use 16 characters here but the disadvantage would be that
97 * the SLRU segments will be hard to distinguish from WAL segments.
98 *
99 * For this reason we use 15 characters. It is enough but also means
100 * that in the future we can't decrease SLRU_PAGES_PER_SEGMENT easily.
101 */
102 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
103 return snprintf(path, MAXPGPATH, "%s/%015" PRIX64, ctl->Dir, segno);
104 }
105 else
106 {
107 /*
108 * Despite the fact that %04X format string is used up to 24 bit
109 * integers are allowed. See SlruCorrectSegmentFilenameLength()
110 */
111 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
112 return snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir,
113 (unsigned int) segno);
114 }
115}
#define INT64CONST(x)
Definition: c.h:516
#define snprintf
Definition: port.h:239

References Assert(), ctl, INT64CONST, MAXPGPATH, and snprintf.

Referenced by SimpleLruDoesPhysicalPageExist(), SlruInternalDeleteSegment(), SlruPhysicalReadPage(), SlruPhysicalWritePage(), SlruReportIOError(), and SlruSyncFileTag().

◆ SlruInternalDeleteSegment()

static void SlruInternalDeleteSegment ( SlruCtl  ctl,
int64  segno 
)
static

Definition at line 1502 of file slru.c.

1503{
1504 char path[MAXPGPATH];
1505
1506 /* Forget any fsync requests queued for this segment. */
1507 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1508 {
1509 FileTag tag;
1510
1511 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1513 }
1514
1515 /* Unlink the file. */
1516 SlruFileName(ctl, path, segno);
1517 ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1518 unlink(path);
1519}
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
#define DEBUG2
Definition: elog.h:29
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
Definition: slru.c:156
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:580
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References ctl, DEBUG2, ereport, errmsg_internal(), INIT_SLRUFILETAG, MAXPGPATH, RegisterSyncRequest(), SlruWriteAllData::segno, SlruFileName(), SYNC_FORGET_REQUEST, and SYNC_HANDLER_NONE.

Referenced by SlruDeleteSegment(), SlruScanDirCbDeleteAll(), and SlruScanDirCbDeleteCutoff().

◆ SlruInternalWritePage()

static void SlruInternalWritePage ( SlruCtl  ctl,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 651 of file slru.c.

652{
653 SlruShared shared = ctl->shared;
654 int64 pageno = shared->page_number[slotno];
655 int bankno = SlotGetBankNumber(slotno);
656 bool ok;
657
658 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
660
661 /* If a write is in progress, wait for it to finish */
662 while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
663 shared->page_number[slotno] == pageno)
664 {
665 SimpleLruWaitIO(ctl, slotno);
666 }
667
668 /*
669 * Do nothing if page is not dirty, or if buffer no longer contains the
670 * same page we were called for.
671 */
672 if (!shared->page_dirty[slotno] ||
673 shared->page_status[slotno] != SLRU_PAGE_VALID ||
674 shared->page_number[slotno] != pageno)
675 return;
676
677 /*
678 * Mark the slot write-busy, and clear the dirtybit. After this point, a
679 * transaction status update on this page will mark it dirty again.
680 */
682 shared->page_dirty[slotno] = false;
683
684 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
685 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
686
687 /* Release bank lock while doing I/O */
688 LWLockRelease(&shared->bank_locks[bankno].lock);
689
690 /* Do the write */
691 ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
692
693 /* If we failed, and we're in a flush, better close the files */
694 if (!ok && fdata)
695 {
696 for (int i = 0; i < fdata->num_files; i++)
697 CloseTransientFile(fdata->fd[i]);
698 }
699
700 /* Re-acquire bank lock and update page state */
701 LWLockAcquire(&shared->bank_locks[bankno].lock, LW_EXCLUSIVE);
702
703 Assert(shared->page_number[slotno] == pageno &&
704 shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
705
706 /* If we failed to write, mark the page dirty again */
707 if (!ok)
708 shared->page_dirty[slotno] = true;
709
710 shared->page_status[slotno] = SLRU_PAGE_VALID;
711
712 LWLockRelease(&shared->buffer_locks[slotno].lock);
713
714 /* Now it's okay to ereport if we failed */
715 if (!ok)
717
718 /* If part of a checkpoint, count this as a SLRU buffer written. */
719 if (fdata)
720 {
723 }
724}
PgStat_CheckpointerStats PendingCheckpointerStats
static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
Definition: slru.c:875
int ckpt_slru_written
Definition: xlog.h:168
PgStat_Counter slru_written
Definition: pgstat.h:264
CheckpointStatsData CheckpointStats
Definition: xlog.c:209

References Assert(), SlruSharedData::bank_locks, SlruSharedData::buffer_locks, CheckpointStats, CheckpointStatsData::ckpt_slru_written, CloseTransientFile(), ctl, SlruWriteAllData::fd, i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), SlruWriteAllData::num_files, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, PendingCheckpointerStats, SimpleLruGetBankLock(), SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, PgStat_CheckpointerStats::slru_written, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruTruncate(), SimpleLruWriteAll(), SimpleLruWritePage(), SlruDeleteSegment(), and SlruSelectLRUPage().

◆ SlruMayDeleteSegment()

static bool SlruMayDeleteSegment ( SlruCtl  ctl,
int64  segpage,
int64  cutoffPage 
)
static

Definition at line 1602 of file slru.c.

1603{
1604 int64 seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
1605
1606 Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
1607
1608 return (ctl->PagePrecedes(segpage, cutoffPage) &&
1609 ctl->PagePrecedes(seg_last_page, cutoffPage));
1610}

References Assert(), ctl, and SLRU_PAGES_PER_SEGMENT.

Referenced by SlruScanDirCbDeleteCutoff(), and SlruScanDirCbReportPresence().

◆ SlruPhysicalReadPage()

static bool SlruPhysicalReadPage ( SlruCtl  ctl,
int64  pageno,
int  slotno 
)
static

Definition at line 803 of file slru.c.

804{
805 SlruShared shared = ctl->shared;
806 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
807 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
808 off_t offset = rpageno * BLCKSZ;
809 char path[MAXPGPATH];
810 int fd;
811
812 SlruFileName(ctl, path, segno);
813
814 /*
815 * In a crash-and-restart situation, it's possible for us to receive
816 * commands to set the commit status of transactions whose bits are in
817 * already-truncated segments of the commit log (see notes in
818 * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
819 * where the file doesn't exist, and return zeroes instead.
820 */
821 fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
822 if (fd < 0)
823 {
824 if (errno != ENOENT || !InRecovery)
825 {
827 slru_errno = errno;
828 return false;
829 }
830
831 ereport(LOG,
832 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
833 path)));
834 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
835 return true;
836 }
837
838 errno = 0;
839 pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
840 if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
841 {
844 slru_errno = errno;
846 return false;
847 }
849
850 if (CloseTransientFile(fd) != 0)
851 {
853 slru_errno = errno;
854 return false;
855 }
856
857 return true;
858}
#define pg_pread
Definition: port.h:226
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
bool InRecovery
Definition: xlogutils.c:50

References CloseTransientFile(), ctl, ereport, errmsg(), fd(), InRecovery, LOG, MAXPGPATH, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_pread, pgstat_report_wait_end(), pgstat_report_wait_start(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, and SlruFileName().

Referenced by SimpleLruReadPage().

◆ SlruPhysicalWritePage()

static bool SlruPhysicalWritePage ( SlruCtl  ctl,
int64  pageno,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 875 of file slru.c.

876{
877 SlruShared shared = ctl->shared;
878 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
879 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
880 off_t offset = rpageno * BLCKSZ;
881 char path[MAXPGPATH];
882 int fd = -1;
883
884 /* update the stats counter of written pages */
886
887 /*
888 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
889 * write out data before associated WAL records. This is the same action
890 * performed during FlushBuffer() in the main buffer manager.
891 */
892 if (shared->group_lsn != NULL)
893 {
894 /*
895 * We must determine the largest async-commit LSN for the page. This
896 * is a bit tedious, but since this entire function is a slow path
897 * anyway, it seems better to do this here than to maintain a per-page
898 * LSN variable (which'd need an extra comparison in the
899 * transaction-commit path).
900 */
901 XLogRecPtr max_lsn;
902 int lsnindex;
903
904 lsnindex = slotno * shared->lsn_groups_per_page;
905 max_lsn = shared->group_lsn[lsnindex++];
906 for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
907 {
908 XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
909
910 if (max_lsn < this_lsn)
911 max_lsn = this_lsn;
912 }
913
914 if (!XLogRecPtrIsInvalid(max_lsn))
915 {
916 /*
917 * As noted above, elog(ERROR) is not acceptable here, so if
918 * XLogFlush were to fail, we must PANIC. This isn't much of a
919 * restriction because XLogFlush is just about all critical
920 * section anyway, but let's make sure.
921 */
923 XLogFlush(max_lsn);
925 }
926 }
927
928 /*
929 * During a SimpleLruWriteAll, we may already have the desired file open.
930 */
931 if (fdata)
932 {
933 for (int i = 0; i < fdata->num_files; i++)
934 {
935 if (fdata->segno[i] == segno)
936 {
937 fd = fdata->fd[i];
938 break;
939 }
940 }
941 }
942
943 if (fd < 0)
944 {
945 /*
946 * If the file doesn't already exist, we should create it. It is
947 * possible for this to need to happen when writing a page that's not
948 * first in its segment; we assume the OS can cope with that. (Note:
949 * it might seem that it'd be okay to create files only when
950 * SimpleLruZeroPage is called for the first page of a segment.
951 * However, if after a crash and restart the REDO logic elects to
952 * replay the log from a checkpoint before the latest one, then it's
953 * possible that we will get commands to set transaction status of
954 * transactions that have already been truncated from the commit log.
955 * Easiest way to deal with that is to accept references to
956 * nonexistent files here and in SlruPhysicalReadPage.)
957 *
958 * Note: it is possible for more than one backend to be executing this
959 * code simultaneously for different pages of the same file. Hence,
960 * don't use O_EXCL or O_TRUNC or anything like that.
961 */
962 SlruFileName(ctl, path, segno);
963 fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
964 if (fd < 0)
965 {
967 slru_errno = errno;
968 return false;
969 }
970
971 if (fdata)
972 {
973 if (fdata->num_files < MAX_WRITEALL_BUFFERS)
974 {
975 fdata->fd[fdata->num_files] = fd;
976 fdata->segno[fdata->num_files] = segno;
977 fdata->num_files++;
978 }
979 else
980 {
981 /*
982 * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
983 * fall back to treating it as a standalone write.
984 */
985 fdata = NULL;
986 }
987 }
988 }
989
990 errno = 0;
991 pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
992 if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
993 {
995 /* if write didn't set errno, assume problem is no disk space */
996 if (errno == 0)
997 errno = ENOSPC;
999 slru_errno = errno;
1000 if (!fdata)
1002 return false;
1003 }
1005
1006 /* Queue up a sync request for the checkpointer. */
1007 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1008 {
1009 FileTag tag;
1010
1011 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1012 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
1013 {
1014 /* No space to enqueue sync request. Do it synchronously. */
1015 pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
1016 if (pg_fsync(fd) != 0)
1017 {
1020 slru_errno = errno;
1022 return false;
1023 }
1025 }
1026 }
1027
1028 /* Close file, unless part of flush request. */
1029 if (!fdata)
1030 {
1031 if (CloseTransientFile(fd) != 0)
1032 {
1034 slru_errno = errno;
1035 return false;
1036 }
1037 }
1038
1039 return true;
1040}
int pg_fsync(int fd)
Definition: fd.c:386
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
void pgstat_count_slru_page_written(int slru_idx)
Definition: pgstat_slru.c:83
#define pg_pwrite
Definition: port.h:227
#define MAX_WRITEALL_BUFFERS
Definition: slru.c:123
@ SYNC_REQUEST
Definition: sync.h:25
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2923
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29

References CloseTransientFile(), ctl, END_CRIT_SECTION, fd(), SlruWriteAllData::fd, SlruSharedData::group_lsn, i, INIT_SLRUFILETAG, SlruSharedData::lsn_groups_per_page, MAX_WRITEALL_BUFFERS, MAXPGPATH, SlruWriteAllData::num_files, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), pg_pwrite, pgstat_count_slru_page_written(), pgstat_report_wait_end(), pgstat_report_wait_start(), RegisterSyncRequest(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SLRU_WRITE_FAILED, SlruFileName(), START_CRIT_SECTION, SYNC_HANDLER_NONE, SYNC_REQUEST, XLogFlush(), and XLogRecPtrIsInvalid.

Referenced by SlruInternalWritePage().

◆ SlruRecentlyUsed()

static void SlruRecentlyUsed ( SlruShared  shared,
int  slotno 
)
inlinestatic

Definition at line 1122 of file slru.c.

1123{
1124 int bankno = SlotGetBankNumber(slotno);
1125 int new_lru_count = shared->bank_cur_lru_count[bankno];
1126
1127 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
1128
1129 /*
1130 * The reason for the if-test is that there are often many consecutive
1131 * accesses to the same page (particularly the latest page). By
1132 * suppressing useless increments of bank_cur_lru_count, we reduce the
1133 * probability that old pages' counts will "wrap around" and make them
1134 * appear recently used.
1135 *
1136 * We allow this code to be executed concurrently by multiple processes
1137 * within SimpleLruReadPage_ReadOnly(). As long as int reads and writes
1138 * are atomic, this should not cause any completely-bogus values to enter
1139 * the computation. However, it is possible for either bank_cur_lru_count
1140 * or individual page_lru_count entries to be "reset" to lower values than
1141 * they should have, in case a process is delayed while it executes this
1142 * function. With care in SlruSelectLRUPage(), this does little harm, and
1143 * in any case the absolute worst possible consequence is a nonoptimal
1144 * choice of page to evict. The gain from allowing concurrent reads of
1145 * SLRU pages seems worth it.
1146 */
1147 if (new_lru_count != shared->page_lru_count[slotno])
1148 {
1149 shared->bank_cur_lru_count[bankno] = ++new_lru_count;
1150 shared->page_lru_count[slotno] = new_lru_count;
1151 }
1152}

References Assert(), SlruSharedData::bank_cur_lru_count, SlruSharedData::page_lru_count, SlruSharedData::page_status, SlotGetBankNumber, and SLRU_PAGE_EMPTY.

Referenced by SimpleLruReadPage(), SimpleLruReadPage_ReadOnly(), and SimpleLruZeroPage().

◆ SlruReportIOError()

static void SlruReportIOError ( SlruCtl  ctl,
int64  pageno,
TransactionId  xid 
)
static

Definition at line 1047 of file slru.c.

1048{
1049 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
1050 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
1051 int offset = rpageno * BLCKSZ;
1052 char path[MAXPGPATH];
1053
1054 SlruFileName(ctl, path, segno);
1055 errno = slru_errno;
1056 switch (slru_errcause)
1057 {
1058 case SLRU_OPEN_FAILED:
1059 ereport(ERROR,
1061 errmsg("could not access status of transaction %u", xid),
1062 errdetail("Could not open file \"%s\": %m.", path)));
1063 break;
1064 case SLRU_SEEK_FAILED:
1065 ereport(ERROR,
1067 errmsg("could not access status of transaction %u", xid),
1068 errdetail("Could not seek in file \"%s\" to offset %d: %m.",
1069 path, offset)));
1070 break;
1071 case SLRU_READ_FAILED:
1072 if (errno)
1073 ereport(ERROR,
1075 errmsg("could not access status of transaction %u", xid),
1076 errdetail("Could not read from file \"%s\" at offset %d: %m.",
1077 path, offset)));
1078 else
1079 ereport(ERROR,
1080 (errmsg("could not access status of transaction %u", xid),
1081 errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
1082 break;
1083 case SLRU_WRITE_FAILED:
1084 if (errno)
1085 ereport(ERROR,
1087 errmsg("could not access status of transaction %u", xid),
1088 errdetail("Could not write to file \"%s\" at offset %d: %m.",
1089 path, offset)));
1090 else
1091 ereport(ERROR,
1092 (errmsg("could not access status of transaction %u", xid),
1093 errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1094 path, offset)));
1095 break;
1096 case SLRU_FSYNC_FAILED:
1099 errmsg("could not access status of transaction %u", xid),
1100 errdetail("Could not fsync file \"%s\": %m.",
1101 path)));
1102 break;
1103 case SLRU_CLOSE_FAILED:
1104 ereport(ERROR,
1106 errmsg("could not access status of transaction %u", xid),
1107 errdetail("Could not close file \"%s\": %m.",
1108 path)));
1109 break;
1110 default:
1111 /* can't get here, we trust */
1112 elog(ERROR, "unrecognized SimpleLru error cause: %d",
1113 (int) slru_errcause);
1114 break;
1115 }
1116}
int errcode_for_file_access(void)
Definition: elog.c:877
int errdetail(const char *fmt,...)
Definition: elog.c:1204
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
int data_sync_elevel(int elevel)
Definition: fd.c:4001

References ctl, data_sync_elevel(), elog, ereport, errcode_for_file_access(), errdetail(), errmsg(), ERROR, MAXPGPATH, SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName().

Referenced by SimpleLruDoesPhysicalPageExist(), SimpleLruReadPage(), SimpleLruWriteAll(), and SlruInternalWritePage().

◆ SlruScanDirCbDeleteAll()

bool SlruScanDirCbDeleteAll ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)

Definition at line 1743 of file slru.c.

1744{
1746
1747 return false; /* keep going */
1748}

References ctl, SLRU_PAGES_PER_SEGMENT, and SlruInternalDeleteSegment().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), and test_slru_scan_cb().

◆ SlruScanDirCbDeleteCutoff()

static bool SlruScanDirCbDeleteCutoff ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)
static

Definition at line 1727 of file slru.c.

1729{
1730 int64 cutoffPage = *(int64 *) data;
1731
1732 if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1734
1735 return false; /* keep going */
1736}
const void * data
static bool SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage)
Definition: slru.c:1602

References ctl, data, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruMayDeleteSegment().

Referenced by SimpleLruTruncate().

◆ SlruScanDirCbReportPresence()

bool SlruScanDirCbReportPresence ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)

Definition at line 1711 of file slru.c.

1713{
1714 int64 cutoffPage = *(int64 *) data;
1715
1716 if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1717 return true; /* found one; don't iterate any more */
1718
1719 return false; /* keep going */
1720}

References ctl, data, and SlruMayDeleteSegment().

Referenced by TruncateCLOG(), and TruncateCommitTs().

◆ SlruScanDirectory()

bool SlruScanDirectory ( SlruCtl  ctl,
SlruScanCallback  callback,
void *  data 
)

Definition at line 1790 of file slru.c.

1791{
1792 bool retval = false;
1793 DIR *cldir;
1794 struct dirent *clde;
1795 int64 segno;
1796 int64 segpage;
1797
1798 cldir = AllocateDir(ctl->Dir);
1799 while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1800 {
1801 size_t len;
1802
1803 len = strlen(clde->d_name);
1804
1806 strspn(clde->d_name, "0123456789ABCDEF") == len)
1807 {
1808 segno = strtoi64(clde->d_name, NULL, 16);
1809 segpage = segno * SLRU_PAGES_PER_SEGMENT;
1810
1811 elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1812 ctl->Dir, clde->d_name);
1813 retval = callback(ctl, clde->d_name, segpage, data);
1814 if (retval)
1815 break;
1816 }
1817 }
1818 FreeDir(cldir);
1819
1820 return retval;
1821}
int FreeDir(DIR *dir)
Definition: fd.c:3025
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2907
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2973
static bool SlruCorrectSegmentFilenameLength(SlruCtl ctl, size_t len)
Definition: slru.c:1757
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References AllocateDir(), callback(), ctl, dirent::d_name, data, DEBUG2, elog, FreeDir(), len, ReadDir(), SLRU_PAGES_PER_SEGMENT, and SlruCorrectSegmentFilenameLength().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), SimpleLruTruncate(), test_slru_delete_all(), TruncateCLOG(), TruncateCommitTs(), and TruncateMultiXact().

◆ SlruSelectLRUPage()

static int SlruSelectLRUPage ( SlruCtl  ctl,
int64  pageno 
)
static

Definition at line 1168 of file slru.c.

1169{
1170 SlruShared shared = ctl->shared;
1171
1172 /* Outer loop handles restart after I/O */
1173 for (;;)
1174 {
1175 int cur_count;
1176 int bestvalidslot = 0; /* keep compiler quiet */
1177 int best_valid_delta = -1;
1178 int64 best_valid_page_number = 0; /* keep compiler quiet */
1179 int bestinvalidslot = 0; /* keep compiler quiet */
1180 int best_invalid_delta = -1;
1181 int64 best_invalid_page_number = 0; /* keep compiler quiet */
1182 int bankno = pageno % ctl->nbanks;
1183 int bankstart = bankno * SLRU_BANK_SIZE;
1184 int bankend = bankstart + SLRU_BANK_SIZE;
1185
1187
1188 /* See if page already has a buffer assigned */
1189 for (int slotno = bankstart; slotno < bankend; slotno++)
1190 {
1191 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
1192 shared->page_number[slotno] == pageno)
1193 return slotno;
1194 }
1195
1196 /*
1197 * If we find any EMPTY slot, just select that one. Else choose a
1198 * victim page to replace. We normally take the least recently used
1199 * valid page, but we will never take the slot containing
1200 * latest_page_number, even if it appears least recently used. We
1201 * will select a slot that is already I/O busy only if there is no
1202 * other choice: a read-busy slot will not be least recently used once
1203 * the read finishes, and waiting for an I/O on a write-busy slot is
1204 * inferior to just picking some other slot. Testing shows the slot
1205 * we pick instead will often be clean, allowing us to begin a read at
1206 * once.
1207 *
1208 * Normally the page_lru_count values will all be different and so
1209 * there will be a well-defined LRU page. But since we allow
1210 * concurrent execution of SlruRecentlyUsed() within
1211 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1212 * acquire the same lru_count values. In that case we break ties by
1213 * choosing the furthest-back page.
1214 *
1215 * Notice that this next line forcibly advances cur_lru_count to a
1216 * value that is certainly beyond any value that will be in the
1217 * page_lru_count array after the loop finishes. This ensures that
1218 * the next execution of SlruRecentlyUsed will mark the page newly
1219 * used, even if it's for a page that has the current counter value.
1220 * That gets us back on the path to having good data when there are
1221 * multiple pages with the same lru_count.
1222 */
1223 cur_count = (shared->bank_cur_lru_count[bankno])++;
1224 for (int slotno = bankstart; slotno < bankend; slotno++)
1225 {
1226 int this_delta;
1227 int64 this_page_number;
1228
1229 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1230 return slotno;
1231
1232 this_delta = cur_count - shared->page_lru_count[slotno];
1233 if (this_delta < 0)
1234 {
1235 /*
1236 * Clean up in case shared updates have caused cur_count
1237 * increments to get "lost". We back off the page counts,
1238 * rather than trying to increase cur_count, to avoid any
1239 * question of infinite loops or failure in the presence of
1240 * wrapped-around counts.
1241 */
1242 shared->page_lru_count[slotno] = cur_count;
1243 this_delta = 0;
1244 }
1245
1246 /*
1247 * If this page is the one most recently zeroed, don't consider it
1248 * an eviction candidate. See comments in SimpleLruZeroPage for an
1249 * explanation about the lack of a memory barrier here.
1250 */
1251 this_page_number = shared->page_number[slotno];
1252 if (this_page_number ==
1254 continue;
1255
1256 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1257 {
1258 if (this_delta > best_valid_delta ||
1259 (this_delta == best_valid_delta &&
1260 ctl->PagePrecedes(this_page_number,
1261 best_valid_page_number)))
1262 {
1263 bestvalidslot = slotno;
1264 best_valid_delta = this_delta;
1265 best_valid_page_number = this_page_number;
1266 }
1267 }
1268 else
1269 {
1270 if (this_delta > best_invalid_delta ||
1271 (this_delta == best_invalid_delta &&
1272 ctl->PagePrecedes(this_page_number,
1273 best_invalid_page_number)))
1274 {
1275 bestinvalidslot = slotno;
1276 best_invalid_delta = this_delta;
1277 best_invalid_page_number = this_page_number;
1278 }
1279 }
1280 }
1281
1282 /*
1283 * If all pages (except possibly the latest one) are I/O busy, we'll
1284 * have to wait for an I/O to complete and then retry. In that
1285 * unhappy case, we choose to wait for the I/O on the least recently
1286 * used slot, on the assumption that it was likely initiated first of
1287 * all the I/Os in progress and may therefore finish first.
1288 */
1289 if (best_valid_delta < 0)
1290 {
1291 SimpleLruWaitIO(ctl, bestinvalidslot);
1292 continue;
1293 }
1294
1295 /*
1296 * If the selected page is clean, we're set.
1297 */
1298 if (!shared->page_dirty[bestvalidslot])
1299 return bestvalidslot;
1300
1301 /*
1302 * Write the page.
1303 */
1304 SlruInternalWritePage(ctl, bestvalidslot, NULL);
1305
1306 /*
1307 * Now loop back and try again. This is the easiest way of dealing
1308 * with corner cases such as the victim page being re-dirtied while we
1309 * wrote it.
1310 */
1311 }
1312}
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1970

References Assert(), SlruSharedData::bank_cur_lru_count, ctl, SlruSharedData::latest_page_number, LWLockHeldByMe(), SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_read_u64(), SimpleLruGetBankLock(), SimpleLruWaitIO(), SLRU_BANK_SIZE, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SlruSyncFileTag()

int SlruSyncFileTag ( SlruCtl  ctl,
const FileTag ftag,
char *  path 
)

Definition at line 1830 of file slru.c.

1831{
1832 int fd;
1833 int save_errno;
1834 int result;
1835
1836 SlruFileName(ctl, path, ftag->segno);
1837
1838 fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
1839 if (fd < 0)
1840 return -1;
1841
1842 pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
1843 result = pg_fsync(fd);
1845 save_errno = errno;
1846
1848
1849 errno = save_errno;
1850 return result;
1851}
uint64 segno
Definition: sync.h:55

References CloseTransientFile(), ctl, fd(), OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), FileTag::segno, and SlruFileName().

Referenced by clogsyncfiletag(), committssyncfiletag(), multixactmemberssyncfiletag(), multixactoffsetssyncfiletag(), and test_slru_page_sync().

Variable Documentation

◆ slru_errcause

◆ slru_errno