PostgreSQL Source Code git master
slru.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "utils/guc.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruWriteAllData
 

Macros

#define MAX_WRITEALL_BUFFERS   16
 
#define SLRU_BANK_BITSHIFT   4
 
#define SLRU_BANK_SIZE   (1 << SLRU_BANK_BITSHIFT)
 
#define SlotGetBankNumber(slotno)   ((slotno) >> SLRU_BANK_BITSHIFT)
 
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
 

Typedefs

typedef struct SlruWriteAllData SlruWriteAllData
 
typedef struct SlruWriteAllDataSlruWriteAll
 

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED , SLRU_SEEK_FAILED , SLRU_READ_FAILED , SLRU_WRITE_FAILED ,
  SLRU_FSYNC_FAILED , SLRU_CLOSE_FAILED
}
 

Functions

static int SlruFileName (SlruCtl ctl, char *path, int64 segno)
 
static void SimpleLruZeroLSNs (SlruCtl ctl, int slotno)
 
static void SimpleLruWaitIO (SlruCtl ctl, int slotno)
 
static void SlruInternalWritePage (SlruCtl ctl, int slotno, SlruWriteAll fdata)
 
static bool SlruPhysicalReadPage (SlruCtl ctl, int64 pageno, int slotno)
 
static bool SlruPhysicalWritePage (SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
 
static void SlruReportIOError (SlruCtl ctl, int64 pageno, TransactionId xid)
 
static int SlruSelectLRUPage (SlruCtl ctl, int64 pageno)
 
static bool SlruScanDirCbDeleteCutoff (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
static void SlruInternalDeleteSegment (SlruCtl ctl, int64 segno)
 
static void SlruRecentlyUsed (SlruShared shared, int slotno)
 
Size SimpleLruShmemSize (int nslots, int nlsns)
 
int SimpleLruAutotuneBuffers (int divisor, int max)
 
void SimpleLruInit (SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
 
bool check_slru_buffers (const char *name, int *newval)
 
int SimpleLruZeroPage (SlruCtl ctl, int64 pageno)
 
int SimpleLruReadPage (SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
 
int SimpleLruReadPage_ReadOnly (SlruCtl ctl, int64 pageno, TransactionId xid)
 
void SimpleLruWritePage (SlruCtl ctl, int slotno)
 
bool SimpleLruDoesPhysicalPageExist (SlruCtl ctl, int64 pageno)
 
void SimpleLruWriteAll (SlruCtl ctl, bool allow_redirtied)
 
void SimpleLruTruncate (SlruCtl ctl, int64 cutoffPage)
 
void SlruDeleteSegment (SlruCtl ctl, int64 segno)
 
static bool SlruMayDeleteSegment (SlruCtl ctl, int64 segpage, int64 cutoffPage)
 
bool SlruScanDirCbReportPresence (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
bool SlruScanDirCbDeleteAll (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
static bool SlruCorrectSegmentFilenameLength (SlruCtl ctl, size_t len)
 
bool SlruScanDirectory (SlruCtl ctl, SlruScanCallback callback, void *data)
 
int SlruSyncFileTag (SlruCtl ctl, const FileTag *ftag, char *path)
 

Variables

static SlruErrorCause slru_errcause
 
static int slru_errno
 

Macro Definition Documentation

◆ INIT_SLRUFILETAG

#define INIT_SLRUFILETAG (   a,
  xx_handler,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = (xx_handler), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:68
Definition: sync.h:51

Definition at line 157 of file slru.c.

◆ MAX_WRITEALL_BUFFERS

#define MAX_WRITEALL_BUFFERS   16

Definition at line 124 of file slru.c.

◆ SlotGetBankNumber

#define SlotGetBankNumber (   slotno)    ((slotno) >> SLRU_BANK_BITSHIFT)

Definition at line 149 of file slru.c.

◆ SLRU_BANK_BITSHIFT

#define SLRU_BANK_BITSHIFT   4

Definition at line 143 of file slru.c.

◆ SLRU_BANK_SIZE

#define SLRU_BANK_SIZE   (1 << SLRU_BANK_BITSHIFT)

Definition at line 144 of file slru.c.

Typedef Documentation

◆ SlruWriteAll

typedef struct SlruWriteAllData* SlruWriteAll

Definition at line 133 of file slru.c.

◆ SlruWriteAllData

Enumeration Type Documentation

◆ SlruErrorCause

Enumerator
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 165 of file slru.c.

166{
SlruErrorCause
Definition: slru.c:166
@ SLRU_WRITE_FAILED
Definition: slru.c:170
@ SLRU_FSYNC_FAILED
Definition: slru.c:171
@ SLRU_SEEK_FAILED
Definition: slru.c:168
@ SLRU_OPEN_FAILED
Definition: slru.c:167
@ SLRU_CLOSE_FAILED
Definition: slru.c:172
@ SLRU_READ_FAILED
Definition: slru.c:169

Function Documentation

◆ check_slru_buffers()

bool check_slru_buffers ( const char *  name,
int *  newval 
)

Definition at line 355 of file slru.c.

356{
357 /* Valid values are multiples of SLRU_BANK_SIZE */
358 if (*newval % SLRU_BANK_SIZE == 0)
359 return true;
360
361 GUC_check_errdetail("\"%s\" must be a multiple of %d.", name,
363 return false;
364}
#define newval
#define GUC_check_errdetail
Definition: guc.h:476
#define SLRU_BANK_SIZE
Definition: slru.c:144
const char * name

References GUC_check_errdetail, name, newval, and SLRU_BANK_SIZE.

Referenced by check_commit_ts_buffers(), check_multixact_member_buffers(), check_multixact_offset_buffers(), check_notify_buffers(), check_serial_buffers(), check_subtrans_buffers(), and check_transaction_buffers().

◆ SimpleLruAutotuneBuffers()

int SimpleLruAutotuneBuffers ( int  divisor,
int  max 
)

Definition at line 232 of file slru.c.

233{
234 return Min(max - (max % SLRU_BANK_SIZE),
236 NBuffers / divisor - (NBuffers / divisor) % SLRU_BANK_SIZE));
237}
#define Min(x, y)
Definition: c.h:961
#define Max(x, y)
Definition: c.h:955
int NBuffers
Definition: globals.c:141

References Max, Min, NBuffers, and SLRU_BANK_SIZE.

Referenced by CLOGShmemBuffers(), CommitTsShmemBuffers(), and SUBTRANSShmemBuffers().

◆ SimpleLruDoesPhysicalPageExist()

bool SimpleLruDoesPhysicalPageExist ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 746 of file slru.c.

747{
748 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
749 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
750 int offset = rpageno * BLCKSZ;
751 char path[MAXPGPATH];
752 int fd;
753 bool result;
754 off_t endpos;
755
756 /* update the stats counter of checked pages */
757 pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
758
759 SlruFileName(ctl, path, segno);
760
761 fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
762 if (fd < 0)
763 {
764 /* expected: file doesn't exist */
765 if (errno == ENOENT)
766 return false;
767
768 /* report error normally */
770 slru_errno = errno;
771 SlruReportIOError(ctl, pageno, 0);
772 }
773
774 if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
775 {
777 slru_errno = errno;
778 SlruReportIOError(ctl, pageno, 0);
779 }
780
781 result = endpos >= (off_t) (offset + BLCKSZ);
782
783 if (CloseTransientFile(fd) != 0)
784 {
786 slru_errno = errno;
787 return false;
788 }
789
790 return result;
791}
int64_t int64
Definition: c.h:485
#define PG_BINARY
Definition: c.h:1230
int CloseTransientFile(int fd)
Definition: fd.c:2831
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2655
#define MAXPGPATH
static XLogRecPtr endpos
Definition: pg_receivewal.c:56
void pgstat_count_slru_page_exists(int slru_idx)
Definition: pgstat_slru.c:71
static int fd(const char *x, int i)
Definition: preproc-init.c:105
tree ctl
Definition: radixtree.h:1838
static int SlruFileName(SlruCtl ctl, char *path, int64 segno)
Definition: slru.c:91
static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:1048
static SlruErrorCause slru_errcause
Definition: slru.c:175
static int slru_errno
Definition: slru.c:176
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:39

References CloseTransientFile(), ctl, endpos, fd(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pgstat_count_slru_page_exists(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_SEEK_FAILED, SlruFileName(), and SlruReportIOError().

Referenced by ActivateCommitTs(), find_multixact_start(), MaybeExtendOffsetSlru(), and test_slru_page_exists().

◆ SimpleLruInit()

void SimpleLruInit ( SlruCtl  ctl,
const char *  name,
int  nslots,
int  nlsns,
const char *  subdir,
int  buffer_tranche_id,
int  bank_tranche_id,
SyncRequestHandler  sync_handler,
bool  long_segment_names 
)

Definition at line 252 of file slru.c.

255{
256 SlruShared shared;
257 bool found;
258 int nbanks = nslots / SLRU_BANK_SIZE;
259
261
263 SimpleLruShmemSize(nslots, nlsns),
264 &found);
265
267 {
268 /* Initialize locks and shared memory area */
269 char *ptr;
270 Size offset;
271
272 Assert(!found);
273
274 memset(shared, 0, sizeof(SlruSharedData));
275
276 shared->num_slots = nslots;
277 shared->lsn_groups_per_page = nlsns;
278
280
282
283 ptr = (char *) shared;
284 offset = MAXALIGN(sizeof(SlruSharedData));
285 shared->page_buffer = (char **) (ptr + offset);
286 offset += MAXALIGN(nslots * sizeof(char *));
287 shared->page_status = (SlruPageStatus *) (ptr + offset);
288 offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
289 shared->page_dirty = (bool *) (ptr + offset);
290 offset += MAXALIGN(nslots * sizeof(bool));
291 shared->page_number = (int64 *) (ptr + offset);
292 offset += MAXALIGN(nslots * sizeof(int64));
293 shared->page_lru_count = (int *) (ptr + offset);
294 offset += MAXALIGN(nslots * sizeof(int));
295
296 /* Initialize LWLocks */
297 shared->buffer_locks = (LWLockPadded *) (ptr + offset);
298 offset += MAXALIGN(nslots * sizeof(LWLockPadded));
299 shared->bank_locks = (LWLockPadded *) (ptr + offset);
300 offset += MAXALIGN(nbanks * sizeof(LWLockPadded));
301 shared->bank_cur_lru_count = (int *) (ptr + offset);
302 offset += MAXALIGN(nbanks * sizeof(int));
303
304 if (nlsns > 0)
305 {
306 shared->group_lsn = (XLogRecPtr *) (ptr + offset);
307 offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
308 }
309
310 ptr += BUFFERALIGN(offset);
311 for (int slotno = 0; slotno < nslots; slotno++)
312 {
313 LWLockInitialize(&shared->buffer_locks[slotno].lock,
314 buffer_tranche_id);
315
316 shared->page_buffer[slotno] = ptr;
317 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
318 shared->page_dirty[slotno] = false;
319 shared->page_lru_count[slotno] = 0;
320 ptr += BLCKSZ;
321 }
322
323 /* Initialize the slot banks. */
324 for (int bankno = 0; bankno < nbanks; bankno++)
325 {
326 LWLockInitialize(&shared->bank_locks[bankno].lock, bank_tranche_id);
327 shared->bank_cur_lru_count[bankno] = 0;
328 }
329
330 /* Should fit to estimated shmem size */
331 Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
332 }
333 else
334 {
335 Assert(found);
336 Assert(shared->num_slots == nslots);
337 }
338
339 /*
340 * Initialize the unshared control struct, including directory path. We
341 * assume caller set PagePrecedes.
342 */
343 ctl->shared = shared;
344 ctl->sync_handler = sync_handler;
345 ctl->long_segment_names = long_segment_names;
346 ctl->nbanks = nbanks;
347 strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
348}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:453
#define MAXALIGN(LEN)
Definition: c.h:768
#define BUFFERALIGN(LEN)
Definition: c.h:770
#define Assert(condition)
Definition: c.h:815
size_t Size
Definition: c.h:562
bool IsUnderPostmaster
Definition: globals.c:119
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
int pgstat_get_slru_index(const char *name)
Definition: pgstat_slru.c:132
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:382
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:199
SlruSharedData * SlruShared
Definition: slru.h:121
#define SLRU_MAX_ALLOWED_BUFFERS
Definition: slru.h:24
SlruPageStatus
Definition: slru.h:48
@ SLRU_PAGE_EMPTY
Definition: slru.h:49
int slru_stats_idx
Definition: slru.h:118
int64 * page_number
Definition: slru.h:73
int num_slots
Definition: slru.h:64
LWLockPadded * bank_locks
Definition: slru.h:80
int * page_lru_count
Definition: slru.h:74
pg_atomic_uint64 latest_page_number
Definition: slru.h:115
XLogRecPtr * group_lsn
Definition: slru.h:107
int * bank_cur_lru_count
Definition: slru.h:97
int lsn_groups_per_page
Definition: slru.h:108
SlruPageStatus * page_status
Definition: slru.h:71
bool * page_dirty
Definition: slru.h:72
LWLockPadded * buffer_locks
Definition: slru.h:77
char ** page_buffer
Definition: slru.h:70
LWLock lock
Definition: lwlock.h:70
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert, SlruSharedData::bank_cur_lru_count, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, BUFFERALIGN, ctl, SlruSharedData::group_lsn, IsUnderPostmaster, SlruSharedData::latest_page_number, LWLockPadded::lock, SlruSharedData::lsn_groups_per_page, LWLockInitialize(), MAXALIGN, name, SlruSharedData::num_slots, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_init_u64(), pgstat_get_slru_index(), ShmemInitStruct(), SimpleLruShmemSize(), SLRU_BANK_SIZE, SLRU_MAX_ALLOWED_BUFFERS, SLRU_PAGE_EMPTY, SlruSharedData::slru_stats_idx, and strlcpy().

Referenced by AsyncShmemInit(), CLOGShmemInit(), CommitTsShmemInit(), MultiXactShmemInit(), SerialInit(), SUBTRANSShmemInit(), and test_slru_shmem_startup().

◆ SimpleLruReadPage()

int SimpleLruReadPage ( SlruCtl  ctl,
int64  pageno,
bool  write_ok,
TransactionId  xid 
)

Definition at line 502 of file slru.c.

504{
505 SlruShared shared = ctl->shared;
506 LWLock *banklock = SimpleLruGetBankLock(ctl, pageno);
507
509
510 /* Outer loop handles restart if we must wait for someone else's I/O */
511 for (;;)
512 {
513 int slotno;
514 bool ok;
515
516 /* See if page already is in memory; if not, pick victim slot */
517 slotno = SlruSelectLRUPage(ctl, pageno);
518
519 /* Did we find the page in memory? */
520 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
521 shared->page_number[slotno] == pageno)
522 {
523 /*
524 * If page is still being read in, we must wait for I/O. Likewise
525 * if the page is being written and the caller said that's not OK.
526 */
527 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
528 (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
529 !write_ok))
530 {
531 SimpleLruWaitIO(ctl, slotno);
532 /* Now we must recheck state from the top */
533 continue;
534 }
535 /* Otherwise, it's ready to use */
536 SlruRecentlyUsed(shared, slotno);
537
538 /* update the stats counter of pages found in the SLRU */
540
541 return slotno;
542 }
543
544 /* We found no match; assert we selected a freeable slot */
545 Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
546 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
547 !shared->page_dirty[slotno]));
548
549 /* Mark the slot read-busy */
550 shared->page_number[slotno] = pageno;
551 shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
552 shared->page_dirty[slotno] = false;
553
554 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
555 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
556
557 /* Release bank lock while doing I/O */
558 LWLockRelease(banklock);
559
560 /* Do the read */
561 ok = SlruPhysicalReadPage(ctl, pageno, slotno);
562
563 /* Set the LSNs for this newly read-in page to zero */
564 SimpleLruZeroLSNs(ctl, slotno);
565
566 /* Re-acquire bank control lock and update page state */
567 LWLockAcquire(banklock, LW_EXCLUSIVE);
568
569 Assert(shared->page_number[slotno] == pageno &&
570 shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
571 !shared->page_dirty[slotno]);
572
573 shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
574
575 LWLockRelease(&shared->buffer_locks[slotno].lock);
576
577 /* Now it's okay to ereport if we failed */
578 if (!ok)
579 SlruReportIOError(ctl, pageno, xid);
580
581 SlruRecentlyUsed(shared, slotno);
582
583 /* update the stats counter of pages not found in SLRU */
585
586 return slotno;
587 }
588}
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1937
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_EXCLUSIVE
Definition: lwlock.h:114
void pgstat_count_slru_page_read(int slru_idx)
Definition: pgstat_slru.c:77
void pgstat_count_slru_page_hit(int slru_idx)
Definition: pgstat_slru.c:65
static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
Definition: slru.c:804
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:428
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:445
static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:1169
static void SlruRecentlyUsed(SlruShared shared, int slotno)
Definition: slru.c:1123
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition: slru.h:175
@ SLRU_PAGE_VALID
Definition: slru.h:51
@ SLRU_PAGE_WRITE_IN_PROGRESS
Definition: slru.h:52
@ SLRU_PAGE_READ_IN_PROGRESS
Definition: slru.h:50
Definition: lwlock.h:42

References Assert, SlruSharedData::buffer_locks, ctl, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_hit(), pgstat_count_slru_page_read(), SimpleLruGetBankLock(), SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruSharedData::slru_stats_idx, SlruPhysicalReadPage(), SlruRecentlyUsed(), SlruReportIOError(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), GetMultiXactIdMembers(), RecordNewMultiXact(), SerialAdd(), SetXidCommitTsInPage(), SimpleLruReadPage_ReadOnly(), SubTransSetParent(), test_slru_page_read(), TransactionIdSetPageStatusInternal(), TrimCLOG(), and TrimMultiXact().

◆ SimpleLruReadPage_ReadOnly()

int SimpleLruReadPage_ReadOnly ( SlruCtl  ctl,
int64  pageno,
TransactionId  xid 
)

Definition at line 605 of file slru.c.

606{
607 SlruShared shared = ctl->shared;
608 LWLock *banklock = SimpleLruGetBankLock(ctl, pageno);
609 int bankno = pageno % ctl->nbanks;
610 int bankstart = bankno * SLRU_BANK_SIZE;
611 int bankend = bankstart + SLRU_BANK_SIZE;
612
613 /* Try to find the page while holding only shared lock */
614 LWLockAcquire(banklock, LW_SHARED);
615
616 /* See if page is already in a buffer */
617 for (int slotno = bankstart; slotno < bankend; slotno++)
618 {
619 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
620 shared->page_number[slotno] == pageno &&
621 shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
622 {
623 /* See comments for SlruRecentlyUsed macro */
624 SlruRecentlyUsed(shared, slotno);
625
626 /* update the stats counter of pages found in the SLRU */
628
629 return slotno;
630 }
631 }
632
633 /* No luck, so switch to normal exclusive lock and do regular read */
634 LWLockRelease(banklock);
635 LWLockAcquire(banklock, LW_EXCLUSIVE);
636
637 return SimpleLruReadPage(ctl, pageno, true, xid);
638}
@ LW_SHARED
Definition: lwlock.h:115
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition: slru.c:502

References ctl, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_hit(), SimpleLruGetBankLock(), SimpleLruReadPage(), SLRU_BANK_SIZE, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SlruSharedData::slru_stats_idx, and SlruRecentlyUsed().

Referenced by asyncQueueReadAllNotifications(), find_multixact_start(), SerialGetMinConflictCommitSeqNo(), SubTransGetParent(), test_slru_page_readonly(), TransactionIdGetCommitTsData(), and TransactionIdGetStatus().

◆ SimpleLruShmemSize()

Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)

Definition at line 199 of file slru.c.

200{
201 int nbanks = nslots / SLRU_BANK_SIZE;
202 Size sz;
203
205 Assert(nslots % SLRU_BANK_SIZE == 0);
206
207 /* we assume nslots isn't so large as to risk overflow */
208 sz = MAXALIGN(sizeof(SlruSharedData));
209 sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
210 sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
211 sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
212 sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
213 sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
214 sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
215 sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */
216 sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */
217
218 if (nlsns > 0)
219 sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
220
221 return BUFFERALIGN(sz) + BLCKSZ * nslots;
222}

References Assert, BUFFERALIGN, MAXALIGN, SLRU_BANK_SIZE, and SLRU_MAX_ALLOWED_BUFFERS.

Referenced by AsyncShmemSize(), CLOGShmemSize(), CommitTsShmemSize(), MultiXactShmemSize(), PredicateLockShmemSize(), SimpleLruInit(), SUBTRANSShmemSize(), and test_slru_shmem_request().

◆ SimpleLruTruncate()

void SimpleLruTruncate ( SlruCtl  ctl,
int64  cutoffPage 
)

Definition at line 1408 of file slru.c.

1409{
1410 SlruShared shared = ctl->shared;
1411 int prevbank;
1412
1413 /* update the stats counter of truncates */
1415
1416 /*
1417 * Scan shared memory and remove any pages preceding the cutoff page, to
1418 * ensure we won't rewrite them later. (Since this is normally called in
1419 * or just after a checkpoint, any dirty pages should have been flushed
1420 * already ... we're just being extra careful here.)
1421 */
1422restart:
1423
1424 /*
1425 * An important safety check: the current endpoint page must not be
1426 * eligible for removal. This check is just a backstop against wraparound
1427 * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1428 * outdated value; therefore we don't add a memory barrier.
1429 */
1430 if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
1431 cutoffPage))
1432 {
1433 ereport(LOG,
1434 (errmsg("could not truncate directory \"%s\": apparent wraparound",
1435 ctl->Dir)));
1436 return;
1437 }
1438
1439 prevbank = SlotGetBankNumber(0);
1440 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1441 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1442 {
1443 int curbank = SlotGetBankNumber(slotno);
1444
1445 /*
1446 * If the current bank lock is not same as the previous bank lock then
1447 * release the previous lock and acquire the new lock.
1448 */
1449 if (curbank != prevbank)
1450 {
1451 LWLockRelease(&shared->bank_locks[prevbank].lock);
1452 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1453 prevbank = curbank;
1454 }
1455
1456 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1457 continue;
1458 if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1459 continue;
1460
1461 /*
1462 * If page is clean, just change state to EMPTY (expected case).
1463 */
1464 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1465 !shared->page_dirty[slotno])
1466 {
1467 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1468 continue;
1469 }
1470
1471 /*
1472 * Hmm, we have (or may have) I/O operations acting on the page, so
1473 * we've got to wait for them to finish and then start again. This is
1474 * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1475 * wouldn't it be OK to just discard it without writing it?
1476 * SlruMayDeleteSegment() uses a stricter qualification, so we might
1477 * not delete this page in the end; even if we don't delete it, we
1478 * won't have cause to read its data again. For now, keep the logic
1479 * the same as it was.)
1480 */
1481 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1482 SlruInternalWritePage(ctl, slotno, NULL);
1483 else
1484 SimpleLruWaitIO(ctl, slotno);
1485
1486 LWLockRelease(&shared->bank_locks[prevbank].lock);
1487 goto restart;
1488 }
1489
1490 LWLockRelease(&shared->bank_locks[prevbank].lock);
1491
1492 /* Now we can remove the old segment(s) */
1493 (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1494}
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
void pgstat_count_slru_truncate(int slru_idx)
Definition: pgstat_slru.c:95
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
Definition: slru.c:652
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1791
#define SlotGetBankNumber(slotno)
Definition: slru.c:149
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1728

References SlruSharedData::bank_locks, ctl, ereport, errmsg(), SlruSharedData::latest_page_number, LWLockPadded::lock, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_read_u64(), pgstat_count_slru_truncate(), SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), commit_ts_redo(), PerformOffsetsTruncation(), test_slru_page_truncate(), TruncateCLOG(), TruncateCommitTs(), and TruncateSUBTRANS().

◆ SimpleLruWaitIO()

static void SimpleLruWaitIO ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 445 of file slru.c.

446{
447 SlruShared shared = ctl->shared;
448 int bankno = SlotGetBankNumber(slotno);
449
450 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
451
452 /* See notes at top of file */
453 LWLockRelease(&shared->bank_locks[bankno].lock);
454 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
455 LWLockRelease(&shared->buffer_locks[slotno].lock);
456 LWLockAcquire(&shared->bank_locks[bankno].lock, LW_EXCLUSIVE);
457
458 /*
459 * If the slot is still in an io-in-progress state, then either someone
460 * already started a new I/O on the slot, or a previous I/O failed and
461 * neglected to reset the page state. That shouldn't happen, really, but
462 * it seems worth a few extra cycles to check and recover from it. We can
463 * cheaply test for failure by seeing if the buffer lock is still held (we
464 * assume that transaction abort would release the lock).
465 */
466 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
467 shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
468 {
470 {
471 /* indeed, the I/O must have failed */
472 if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
473 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
474 else /* write_in_progress */
475 {
476 shared->page_status[slotno] = SLRU_PAGE_VALID;
477 shared->page_dirty[slotno] = true;
478 }
479 LWLockRelease(&shared->buffer_locks[slotno].lock);
480 }
481 }
482}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339

References Assert, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, ctl, LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruDeleteSegment(), SlruInternalWritePage(), and SlruSelectLRUPage().

◆ SimpleLruWriteAll()

void SimpleLruWriteAll ( SlruCtl  ctl,
bool  allow_redirtied 
)

Definition at line 1322 of file slru.c.

1323{
1324 SlruShared shared = ctl->shared;
1325 SlruWriteAllData fdata;
1326 int64 pageno = 0;
1327 int prevbank = SlotGetBankNumber(0);
1328 bool ok;
1329
1330 /* update the stats counter of flushes */
1332
1333 /*
1334 * Find and write dirty pages
1335 */
1336 fdata.num_files = 0;
1337
1338 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1339
1340 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1341 {
1342 int curbank = SlotGetBankNumber(slotno);
1343
1344 /*
1345 * If the current bank lock is not same as the previous bank lock then
1346 * release the previous lock and acquire the new lock.
1347 */
1348 if (curbank != prevbank)
1349 {
1350 LWLockRelease(&shared->bank_locks[prevbank].lock);
1351 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1352 prevbank = curbank;
1353 }
1354
1355 /* Do nothing if slot is unused */
1356 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1357 continue;
1358
1359 SlruInternalWritePage(ctl, slotno, &fdata);
1360
1361 /*
1362 * In some places (e.g. checkpoints), we cannot assert that the slot
1363 * is clean now, since another process might have re-dirtied it
1364 * already. That's okay.
1365 */
1366 Assert(allow_redirtied ||
1367 shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1368 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1369 !shared->page_dirty[slotno]));
1370 }
1371
1372 LWLockRelease(&shared->bank_locks[prevbank].lock);
1373
1374 /*
1375 * Now close any files that were open
1376 */
1377 ok = true;
1378 for (int i = 0; i < fdata.num_files; i++)
1379 {
1380 if (CloseTransientFile(fdata.fd[i]) != 0)
1381 {
1383 slru_errno = errno;
1384 pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1385 ok = false;
1386 }
1387 }
1388 if (!ok)
1390
1391 /* Ensure that directory entries for new files are on disk. */
1392 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1393 fsync_fname(ctl->Dir, true);
1394}
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:755
int i
Definition: isn.c:72
void pgstat_count_slru_flush(int slru_idx)
Definition: pgstat_slru.c:89
int num_files
Definition: slru.c:128
int fd[MAX_WRITEALL_BUFFERS]
Definition: slru.c:129
int64 segno[MAX_WRITEALL_BUFFERS]
Definition: slru.c:130
@ SYNC_HANDLER_NONE
Definition: sync.h:42
#define InvalidTransactionId
Definition: transam.h:31

References Assert, SlruSharedData::bank_locks, CloseTransientFile(), ctl, SlruWriteAllData::fd, fsync_fname(), i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruWriteAllData::num_files, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pgstat_count_slru_flush(), SlruWriteAllData::segno, SlotGetBankNumber, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruReportIOError(), and SYNC_HANDLER_NONE.

Referenced by CheckPointCLOG(), CheckPointCommitTs(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), find_multixact_start(), and test_slru_page_writeall().

◆ SimpleLruWritePage()

void SimpleLruWritePage ( SlruCtl  ctl,
int  slotno 
)

◆ SimpleLruZeroLSNs()

static void SimpleLruZeroLSNs ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 428 of file slru.c.

429{
430 SlruShared shared = ctl->shared;
431
432 if (shared->lsn_groups_per_page > 0)
433 MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
434 shared->lsn_groups_per_page * sizeof(XLogRecPtr));
435}
#define MemSet(start, val, len)
Definition: c.h:977

References ctl, SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, and MemSet.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SimpleLruZeroPage()

int SimpleLruZeroPage ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 375 of file slru.c.

376{
377 SlruShared shared = ctl->shared;
378 int slotno;
379
381
382 /* Find a suitable buffer slot for the page */
383 slotno = SlruSelectLRUPage(ctl, pageno);
384 Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
385 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
386 !shared->page_dirty[slotno]) ||
387 shared->page_number[slotno] == pageno);
388
389 /* Mark the slot as containing this page */
390 shared->page_number[slotno] = pageno;
391 shared->page_status[slotno] = SLRU_PAGE_VALID;
392 shared->page_dirty[slotno] = true;
393 SlruRecentlyUsed(shared, slotno);
394
395 /* Set the buffer to zeroes */
396 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
397
398 /* Set the LSNs for this new page to zero */
399 SimpleLruZeroLSNs(ctl, slotno);
400
401 /*
402 * Assume this page is now the latest active page.
403 *
404 * Note that because both this routine and SlruSelectLRUPage run with
405 * ControlLock held, it is not possible for this to be zeroing a page that
406 * SlruSelectLRUPage is going to evict simultaneously. Therefore, there's
407 * no memory barrier here.
408 */
409 pg_atomic_write_u64(&shared->latest_page_number, pageno);
410
411 /* update the stats counter of zeroed pages */
413
414 return slotno;
415}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
void pgstat_count_slru_page_zeroed(int slru_idx)
Definition: pgstat_slru.c:59

References Assert, ctl, SlruSharedData::latest_page_number, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_write_u64(), pgstat_count_slru_page_zeroed(), SimpleLruGetBankLock(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruRecentlyUsed(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), SerialAdd(), test_slru_page_write(), ZeroCLOGPage(), ZeroCommitTsPage(), ZeroMultiXactMemberPage(), ZeroMultiXactOffsetPage(), and ZeroSUBTRANSPage().

◆ SlruCorrectSegmentFilenameLength()

static bool SlruCorrectSegmentFilenameLength ( SlruCtl  ctl,
size_t  len 
)
inlinestatic

Definition at line 1758 of file slru.c.

1759{
1760 if (ctl->long_segment_names)
1761 return (len == 15); /* see SlruFileName() */
1762 else
1763
1764 /*
1765 * Commit 638cf09e76d allowed 5-character lengths. Later commit
1766 * 73c986adde5 allowed 6-character length.
1767 *
1768 * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1769 * numbers, and the corresponding 15-character file names, which may
1770 * eventually deprecate the support for 4, 5, and 6-character names.
1771 */
1772 return (len == 4 || len == 5 || len == 6);
1773}
const void size_t len

References ctl, and len.

Referenced by SlruScanDirectory().

◆ SlruDeleteSegment()

void SlruDeleteSegment ( SlruCtl  ctl,
int64  segno 
)

Definition at line 1526 of file slru.c.

1527{
1528 SlruShared shared = ctl->shared;
1529 int prevbank = SlotGetBankNumber(0);
1530 bool did_write;
1531
1532 /* Clean out any possibly existing references to the segment. */
1533 LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1534restart:
1535 did_write = false;
1536 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1537 {
1538 int64 pagesegno;
1539 int curbank = SlotGetBankNumber(slotno);
1540
1541 /*
1542 * If the current bank lock is not same as the previous bank lock then
1543 * release the previous lock and acquire the new lock.
1544 */
1545 if (curbank != prevbank)
1546 {
1547 LWLockRelease(&shared->bank_locks[prevbank].lock);
1548 LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1549 prevbank = curbank;
1550 }
1551
1552 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1553 continue;
1554
1555 pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1556 /* not the segment we're looking for */
1557 if (pagesegno != segno)
1558 continue;
1559
1560 /* If page is clean, just change state to EMPTY (expected case). */
1561 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1562 !shared->page_dirty[slotno])
1563 {
1564 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1565 continue;
1566 }
1567
1568 /* Same logic as SimpleLruTruncate() */
1569 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1570 SlruInternalWritePage(ctl, slotno, NULL);
1571 else
1572 SimpleLruWaitIO(ctl, slotno);
1573
1574 did_write = true;
1575 }
1576
1577 /*
1578 * Be extra careful and re-check. The IO functions release the control
1579 * lock, so new pages could have been read in.
1580 */
1581 if (did_write)
1582 goto restart;
1583
1585
1586 LWLockRelease(&shared->bank_locks[prevbank].lock);
1587}
static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno)
Definition: slru.c:1503

References SlruSharedData::bank_locks, ctl, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruWriteAllData::segno, SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruInternalWritePage().

Referenced by PerformMembersTruncation(), and test_slru_page_delete().

◆ SlruFileName()

static int SlruFileName ( SlruCtl  ctl,
char *  path,
int64  segno 
)
inlinestatic

Definition at line 91 of file slru.c.

92{
93 if (ctl->long_segment_names)
94 {
95 /*
96 * We could use 16 characters here but the disadvantage would be that
97 * the SLRU segments will be hard to distinguish from WAL segments.
98 *
99 * For this reason we use 15 characters. It is enough but also means
100 * that in the future we can't decrease SLRU_PAGES_PER_SEGMENT easily.
101 */
102 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
103 return snprintf(path, MAXPGPATH, "%s/%015llX", ctl->Dir,
104 (long long) segno);
105 }
106 else
107 {
108 /*
109 * Despite the fact that %04X format string is used up to 24 bit
110 * integers are allowed. See SlruCorrectSegmentFilenameLength()
111 */
112 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
113 return snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir,
114 (unsigned int) segno);
115 }
116}
#define INT64CONST(x)
Definition: c.h:502
#define snprintf
Definition: port.h:238

References Assert, ctl, INT64CONST, MAXPGPATH, and snprintf.

Referenced by SimpleLruDoesPhysicalPageExist(), SlruInternalDeleteSegment(), SlruPhysicalReadPage(), SlruPhysicalWritePage(), SlruReportIOError(), and SlruSyncFileTag().

◆ SlruInternalDeleteSegment()

static void SlruInternalDeleteSegment ( SlruCtl  ctl,
int64  segno 
)
static

Definition at line 1503 of file slru.c.

1504{
1505 char path[MAXPGPATH];
1506
1507 /* Forget any fsync requests queued for this segment. */
1508 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1509 {
1510 FileTag tag;
1511
1512 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1514 }
1515
1516 /* Unlink the file. */
1517 SlruFileName(ctl, path, segno);
1518 ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1519 unlink(path);
1520}
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define DEBUG2
Definition: elog.h:29
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
Definition: slru.c:157
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:580
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References ctl, DEBUG2, ereport, errmsg_internal(), INIT_SLRUFILETAG, MAXPGPATH, RegisterSyncRequest(), SlruWriteAllData::segno, SlruFileName(), SYNC_FORGET_REQUEST, and SYNC_HANDLER_NONE.

Referenced by SlruDeleteSegment(), SlruScanDirCbDeleteAll(), and SlruScanDirCbDeleteCutoff().

◆ SlruInternalWritePage()

static void SlruInternalWritePage ( SlruCtl  ctl,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 652 of file slru.c.

653{
654 SlruShared shared = ctl->shared;
655 int64 pageno = shared->page_number[slotno];
656 int bankno = SlotGetBankNumber(slotno);
657 bool ok;
658
659 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
661
662 /* If a write is in progress, wait for it to finish */
663 while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
664 shared->page_number[slotno] == pageno)
665 {
666 SimpleLruWaitIO(ctl, slotno);
667 }
668
669 /*
670 * Do nothing if page is not dirty, or if buffer no longer contains the
671 * same page we were called for.
672 */
673 if (!shared->page_dirty[slotno] ||
674 shared->page_status[slotno] != SLRU_PAGE_VALID ||
675 shared->page_number[slotno] != pageno)
676 return;
677
678 /*
679 * Mark the slot write-busy, and clear the dirtybit. After this point, a
680 * transaction status update on this page will mark it dirty again.
681 */
683 shared->page_dirty[slotno] = false;
684
685 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
686 LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
687
688 /* Release bank lock while doing I/O */
689 LWLockRelease(&shared->bank_locks[bankno].lock);
690
691 /* Do the write */
692 ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
693
694 /* If we failed, and we're in a flush, better close the files */
695 if (!ok && fdata)
696 {
697 for (int i = 0; i < fdata->num_files; i++)
698 CloseTransientFile(fdata->fd[i]);
699 }
700
701 /* Re-acquire bank lock and update page state */
702 LWLockAcquire(&shared->bank_locks[bankno].lock, LW_EXCLUSIVE);
703
704 Assert(shared->page_number[slotno] == pageno &&
705 shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
706
707 /* If we failed to write, mark the page dirty again */
708 if (!ok)
709 shared->page_dirty[slotno] = true;
710
711 shared->page_status[slotno] = SLRU_PAGE_VALID;
712
713 LWLockRelease(&shared->buffer_locks[slotno].lock);
714
715 /* Now it's okay to ereport if we failed */
716 if (!ok)
718
719 /* If part of a checkpoint, count this as a SLRU buffer written. */
720 if (fdata)
721 {
724 }
725}
PgStat_CheckpointerStats PendingCheckpointerStats
static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
Definition: slru.c:876
int ckpt_slru_written
Definition: xlog.h:168
PgStat_Counter slru_written
Definition: pgstat.h:265
CheckpointStatsData CheckpointStats
Definition: xlog.c:209

References Assert, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, CheckpointStats, CheckpointStatsData::ckpt_slru_written, CloseTransientFile(), ctl, SlruWriteAllData::fd, i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), SlruWriteAllData::num_files, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, PendingCheckpointerStats, SimpleLruGetBankLock(), SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, PgStat_CheckpointerStats::slru_written, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruTruncate(), SimpleLruWriteAll(), SimpleLruWritePage(), SlruDeleteSegment(), and SlruSelectLRUPage().

◆ SlruMayDeleteSegment()

static bool SlruMayDeleteSegment ( SlruCtl  ctl,
int64  segpage,
int64  cutoffPage 
)
static

Definition at line 1603 of file slru.c.

1604{
1605 int64 seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
1606
1607 Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
1608
1609 return (ctl->PagePrecedes(segpage, cutoffPage) &&
1610 ctl->PagePrecedes(seg_last_page, cutoffPage));
1611}

References Assert, ctl, and SLRU_PAGES_PER_SEGMENT.

Referenced by SlruScanDirCbDeleteCutoff(), and SlruScanDirCbReportPresence().

◆ SlruPhysicalReadPage()

static bool SlruPhysicalReadPage ( SlruCtl  ctl,
int64  pageno,
int  slotno 
)
static

Definition at line 804 of file slru.c.

805{
806 SlruShared shared = ctl->shared;
807 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
808 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
809 off_t offset = rpageno * BLCKSZ;
810 char path[MAXPGPATH];
811 int fd;
812
813 SlruFileName(ctl, path, segno);
814
815 /*
816 * In a crash-and-restart situation, it's possible for us to receive
817 * commands to set the commit status of transactions whose bits are in
818 * already-truncated segments of the commit log (see notes in
819 * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
820 * where the file doesn't exist, and return zeroes instead.
821 */
822 fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
823 if (fd < 0)
824 {
825 if (errno != ENOENT || !InRecovery)
826 {
828 slru_errno = errno;
829 return false;
830 }
831
832 ereport(LOG,
833 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
834 path)));
835 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
836 return true;
837 }
838
839 errno = 0;
840 pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
841 if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
842 {
845 slru_errno = errno;
847 return false;
848 }
850
851 if (CloseTransientFile(fd) != 0)
852 {
854 slru_errno = errno;
855 return false;
856 }
857
858 return true;
859}
#define pg_pread
Definition: port.h:225
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
bool InRecovery
Definition: xlogutils.c:50

References CloseTransientFile(), ctl, ereport, errmsg(), fd(), InRecovery, LOG, MAXPGPATH, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_pread, pgstat_report_wait_end(), pgstat_report_wait_start(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, and SlruFileName().

Referenced by SimpleLruReadPage().

◆ SlruPhysicalWritePage()

static bool SlruPhysicalWritePage ( SlruCtl  ctl,
int64  pageno,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 876 of file slru.c.

877{
878 SlruShared shared = ctl->shared;
879 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
880 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
881 off_t offset = rpageno * BLCKSZ;
882 char path[MAXPGPATH];
883 int fd = -1;
884
885 /* update the stats counter of written pages */
887
888 /*
889 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
890 * write out data before associated WAL records. This is the same action
891 * performed during FlushBuffer() in the main buffer manager.
892 */
893 if (shared->group_lsn != NULL)
894 {
895 /*
896 * We must determine the largest async-commit LSN for the page. This
897 * is a bit tedious, but since this entire function is a slow path
898 * anyway, it seems better to do this here than to maintain a per-page
899 * LSN variable (which'd need an extra comparison in the
900 * transaction-commit path).
901 */
902 XLogRecPtr max_lsn;
903 int lsnindex;
904
905 lsnindex = slotno * shared->lsn_groups_per_page;
906 max_lsn = shared->group_lsn[lsnindex++];
907 for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
908 {
909 XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
910
911 if (max_lsn < this_lsn)
912 max_lsn = this_lsn;
913 }
914
915 if (!XLogRecPtrIsInvalid(max_lsn))
916 {
917 /*
918 * As noted above, elog(ERROR) is not acceptable here, so if
919 * XLogFlush were to fail, we must PANIC. This isn't much of a
920 * restriction because XLogFlush is just about all critical
921 * section anyway, but let's make sure.
922 */
924 XLogFlush(max_lsn);
926 }
927 }
928
929 /*
930 * During a SimpleLruWriteAll, we may already have the desired file open.
931 */
932 if (fdata)
933 {
934 for (int i = 0; i < fdata->num_files; i++)
935 {
936 if (fdata->segno[i] == segno)
937 {
938 fd = fdata->fd[i];
939 break;
940 }
941 }
942 }
943
944 if (fd < 0)
945 {
946 /*
947 * If the file doesn't already exist, we should create it. It is
948 * possible for this to need to happen when writing a page that's not
949 * first in its segment; we assume the OS can cope with that. (Note:
950 * it might seem that it'd be okay to create files only when
951 * SimpleLruZeroPage is called for the first page of a segment.
952 * However, if after a crash and restart the REDO logic elects to
953 * replay the log from a checkpoint before the latest one, then it's
954 * possible that we will get commands to set transaction status of
955 * transactions that have already been truncated from the commit log.
956 * Easiest way to deal with that is to accept references to
957 * nonexistent files here and in SlruPhysicalReadPage.)
958 *
959 * Note: it is possible for more than one backend to be executing this
960 * code simultaneously for different pages of the same file. Hence,
961 * don't use O_EXCL or O_TRUNC or anything like that.
962 */
963 SlruFileName(ctl, path, segno);
964 fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
965 if (fd < 0)
966 {
968 slru_errno = errno;
969 return false;
970 }
971
972 if (fdata)
973 {
974 if (fdata->num_files < MAX_WRITEALL_BUFFERS)
975 {
976 fdata->fd[fdata->num_files] = fd;
977 fdata->segno[fdata->num_files] = segno;
978 fdata->num_files++;
979 }
980 else
981 {
982 /*
983 * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
984 * fall back to treating it as a standalone write.
985 */
986 fdata = NULL;
987 }
988 }
989 }
990
991 errno = 0;
992 pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
993 if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
994 {
996 /* if write didn't set errno, assume problem is no disk space */
997 if (errno == 0)
998 errno = ENOSPC;
1000 slru_errno = errno;
1001 if (!fdata)
1003 return false;
1004 }
1006
1007 /* Queue up a sync request for the checkpointer. */
1008 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1009 {
1010 FileTag tag;
1011
1012 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1013 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
1014 {
1015 /* No space to enqueue sync request. Do it synchronously. */
1016 pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
1017 if (pg_fsync(fd) != 0)
1018 {
1021 slru_errno = errno;
1023 return false;
1024 }
1026 }
1027 }
1028
1029 /* Close file, unless part of flush request. */
1030 if (!fdata)
1031 {
1032 if (CloseTransientFile(fd) != 0)
1033 {
1035 slru_errno = errno;
1036 return false;
1037 }
1038 }
1039
1040 return true;
1041}
int pg_fsync(int fd)
Definition: fd.c:385
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_slru_page_written(int slru_idx)
Definition: pgstat_slru.c:83
#define pg_pwrite
Definition: port.h:226
#define MAX_WRITEALL_BUFFERS
Definition: slru.c:124
@ SYNC_REQUEST
Definition: sync.h:25
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2802
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29

References CloseTransientFile(), ctl, END_CRIT_SECTION, fd(), SlruWriteAllData::fd, SlruSharedData::group_lsn, i, INIT_SLRUFILETAG, SlruSharedData::lsn_groups_per_page, MAX_WRITEALL_BUFFERS, MAXPGPATH, SlruWriteAllData::num_files, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), pg_pwrite, pgstat_count_slru_page_written(), pgstat_report_wait_end(), pgstat_report_wait_start(), RegisterSyncRequest(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SLRU_WRITE_FAILED, SlruFileName(), START_CRIT_SECTION, SYNC_HANDLER_NONE, SYNC_REQUEST, XLogFlush(), and XLogRecPtrIsInvalid.

Referenced by SlruInternalWritePage().

◆ SlruRecentlyUsed()

static void SlruRecentlyUsed ( SlruShared  shared,
int  slotno 
)
inlinestatic

Definition at line 1123 of file slru.c.

1124{
1125 int bankno = SlotGetBankNumber(slotno);
1126 int new_lru_count = shared->bank_cur_lru_count[bankno];
1127
1128 Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
1129
1130 /*
1131 * The reason for the if-test is that there are often many consecutive
1132 * accesses to the same page (particularly the latest page). By
1133 * suppressing useless increments of bank_cur_lru_count, we reduce the
1134 * probability that old pages' counts will "wrap around" and make them
1135 * appear recently used.
1136 *
1137 * We allow this code to be executed concurrently by multiple processes
1138 * within SimpleLruReadPage_ReadOnly(). As long as int reads and writes
1139 * are atomic, this should not cause any completely-bogus values to enter
1140 * the computation. However, it is possible for either bank_cur_lru_count
1141 * or individual page_lru_count entries to be "reset" to lower values than
1142 * they should have, in case a process is delayed while it executes this
1143 * function. With care in SlruSelectLRUPage(), this does little harm, and
1144 * in any case the absolute worst possible consequence is a nonoptimal
1145 * choice of page to evict. The gain from allowing concurrent reads of
1146 * SLRU pages seems worth it.
1147 */
1148 if (new_lru_count != shared->page_lru_count[slotno])
1149 {
1150 shared->bank_cur_lru_count[bankno] = ++new_lru_count;
1151 shared->page_lru_count[slotno] = new_lru_count;
1152 }
1153}

References Assert, SlruSharedData::bank_cur_lru_count, SlruSharedData::page_lru_count, SlruSharedData::page_status, SlotGetBankNumber, and SLRU_PAGE_EMPTY.

Referenced by SimpleLruReadPage(), SimpleLruReadPage_ReadOnly(), and SimpleLruZeroPage().

◆ SlruReportIOError()

static void SlruReportIOError ( SlruCtl  ctl,
int64  pageno,
TransactionId  xid 
)
static

Definition at line 1048 of file slru.c.

1049{
1050 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
1051 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
1052 int offset = rpageno * BLCKSZ;
1053 char path[MAXPGPATH];
1054
1055 SlruFileName(ctl, path, segno);
1056 errno = slru_errno;
1057 switch (slru_errcause)
1058 {
1059 case SLRU_OPEN_FAILED:
1060 ereport(ERROR,
1062 errmsg("could not access status of transaction %u", xid),
1063 errdetail("Could not open file \"%s\": %m.", path)));
1064 break;
1065 case SLRU_SEEK_FAILED:
1066 ereport(ERROR,
1068 errmsg("could not access status of transaction %u", xid),
1069 errdetail("Could not seek in file \"%s\" to offset %d: %m.",
1070 path, offset)));
1071 break;
1072 case SLRU_READ_FAILED:
1073 if (errno)
1074 ereport(ERROR,
1076 errmsg("could not access status of transaction %u", xid),
1077 errdetail("Could not read from file \"%s\" at offset %d: %m.",
1078 path, offset)));
1079 else
1080 ereport(ERROR,
1081 (errmsg("could not access status of transaction %u", xid),
1082 errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
1083 break;
1084 case SLRU_WRITE_FAILED:
1085 if (errno)
1086 ereport(ERROR,
1088 errmsg("could not access status of transaction %u", xid),
1089 errdetail("Could not write to file \"%s\" at offset %d: %m.",
1090 path, offset)));
1091 else
1092 ereport(ERROR,
1093 (errmsg("could not access status of transaction %u", xid),
1094 errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1095 path, offset)));
1096 break;
1097 case SLRU_FSYNC_FAILED:
1100 errmsg("could not access status of transaction %u", xid),
1101 errdetail("Could not fsync file \"%s\": %m.",
1102 path)));
1103 break;
1104 case SLRU_CLOSE_FAILED:
1105 ereport(ERROR,
1107 errmsg("could not access status of transaction %u", xid),
1108 errdetail("Could not close file \"%s\": %m.",
1109 path)));
1110 break;
1111 default:
1112 /* can't get here, we trust */
1113 elog(ERROR, "unrecognized SimpleLru error cause: %d",
1114 (int) slru_errcause);
1115 break;
1116 }
1117}
int errcode_for_file_access(void)
Definition: elog.c:876
int errdetail(const char *fmt,...)
Definition: elog.c:1203
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
int data_sync_elevel(int elevel)
Definition: fd.c:3959

References ctl, data_sync_elevel(), elog, ereport, errcode_for_file_access(), errdetail(), errmsg(), ERROR, MAXPGPATH, SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName().

Referenced by SimpleLruDoesPhysicalPageExist(), SimpleLruReadPage(), SimpleLruWriteAll(), and SlruInternalWritePage().

◆ SlruScanDirCbDeleteAll()

bool SlruScanDirCbDeleteAll ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)

Definition at line 1744 of file slru.c.

1745{
1747
1748 return false; /* keep going */
1749}

References ctl, SLRU_PAGES_PER_SEGMENT, and SlruInternalDeleteSegment().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), and test_slru_scan_cb().

◆ SlruScanDirCbDeleteCutoff()

static bool SlruScanDirCbDeleteCutoff ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)
static

Definition at line 1728 of file slru.c.

1730{
1731 int64 cutoffPage = *(int64 *) data;
1732
1733 if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1735
1736 return false; /* keep going */
1737}
const void * data
static bool SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage)
Definition: slru.c:1603

References ctl, data, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruMayDeleteSegment().

Referenced by SimpleLruTruncate().

◆ SlruScanDirCbReportPresence()

bool SlruScanDirCbReportPresence ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)

Definition at line 1712 of file slru.c.

1714{
1715 int64 cutoffPage = *(int64 *) data;
1716
1717 if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1718 return true; /* found one; don't iterate any more */
1719
1720 return false; /* keep going */
1721}

References ctl, data, and SlruMayDeleteSegment().

Referenced by TruncateCLOG(), and TruncateCommitTs().

◆ SlruScanDirectory()

bool SlruScanDirectory ( SlruCtl  ctl,
SlruScanCallback  callback,
void *  data 
)

Definition at line 1791 of file slru.c.

1792{
1793 bool retval = false;
1794 DIR *cldir;
1795 struct dirent *clde;
1796 int64 segno;
1797 int64 segpage;
1798
1799 cldir = AllocateDir(ctl->Dir);
1800 while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1801 {
1802 size_t len;
1803
1804 len = strlen(clde->d_name);
1805
1807 strspn(clde->d_name, "0123456789ABCDEF") == len)
1808 {
1809 segno = strtoi64(clde->d_name, NULL, 16);
1810 segpage = segno * SLRU_PAGES_PER_SEGMENT;
1811
1812 elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1813 ctl->Dir, clde->d_name);
1814 retval = callback(ctl, clde->d_name, segpage, data);
1815 if (retval)
1816 break;
1817 }
1818 }
1819 FreeDir(cldir);
1820
1821 return retval;
1822}
int FreeDir(DIR *dir)
Definition: fd.c:2983
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2865
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2931
static bool SlruCorrectSegmentFilenameLength(SlruCtl ctl, size_t len)
Definition: slru.c:1758
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References AllocateDir(), callback(), ctl, dirent::d_name, data, DEBUG2, elog, FreeDir(), len, ReadDir(), SLRU_PAGES_PER_SEGMENT, and SlruCorrectSegmentFilenameLength().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), SimpleLruTruncate(), test_slru_delete_all(), TruncateCLOG(), TruncateCommitTs(), and TruncateMultiXact().

◆ SlruSelectLRUPage()

static int SlruSelectLRUPage ( SlruCtl  ctl,
int64  pageno 
)
static

Definition at line 1169 of file slru.c.

1170{
1171 SlruShared shared = ctl->shared;
1172
1173 /* Outer loop handles restart after I/O */
1174 for (;;)
1175 {
1176 int cur_count;
1177 int bestvalidslot = 0; /* keep compiler quiet */
1178 int best_valid_delta = -1;
1179 int64 best_valid_page_number = 0; /* keep compiler quiet */
1180 int bestinvalidslot = 0; /* keep compiler quiet */
1181 int best_invalid_delta = -1;
1182 int64 best_invalid_page_number = 0; /* keep compiler quiet */
1183 int bankno = pageno % ctl->nbanks;
1184 int bankstart = bankno * SLRU_BANK_SIZE;
1185 int bankend = bankstart + SLRU_BANK_SIZE;
1186
1188
1189 /* See if page already has a buffer assigned */
1190 for (int slotno = bankstart; slotno < bankend; slotno++)
1191 {
1192 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
1193 shared->page_number[slotno] == pageno)
1194 return slotno;
1195 }
1196
1197 /*
1198 * If we find any EMPTY slot, just select that one. Else choose a
1199 * victim page to replace. We normally take the least recently used
1200 * valid page, but we will never take the slot containing
1201 * latest_page_number, even if it appears least recently used. We
1202 * will select a slot that is already I/O busy only if there is no
1203 * other choice: a read-busy slot will not be least recently used once
1204 * the read finishes, and waiting for an I/O on a write-busy slot is
1205 * inferior to just picking some other slot. Testing shows the slot
1206 * we pick instead will often be clean, allowing us to begin a read at
1207 * once.
1208 *
1209 * Normally the page_lru_count values will all be different and so
1210 * there will be a well-defined LRU page. But since we allow
1211 * concurrent execution of SlruRecentlyUsed() within
1212 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1213 * acquire the same lru_count values. In that case we break ties by
1214 * choosing the furthest-back page.
1215 *
1216 * Notice that this next line forcibly advances cur_lru_count to a
1217 * value that is certainly beyond any value that will be in the
1218 * page_lru_count array after the loop finishes. This ensures that
1219 * the next execution of SlruRecentlyUsed will mark the page newly
1220 * used, even if it's for a page that has the current counter value.
1221 * That gets us back on the path to having good data when there are
1222 * multiple pages with the same lru_count.
1223 */
1224 cur_count = (shared->bank_cur_lru_count[bankno])++;
1225 for (int slotno = bankstart; slotno < bankend; slotno++)
1226 {
1227 int this_delta;
1228 int64 this_page_number;
1229
1230 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1231 return slotno;
1232
1233 this_delta = cur_count - shared->page_lru_count[slotno];
1234 if (this_delta < 0)
1235 {
1236 /*
1237 * Clean up in case shared updates have caused cur_count
1238 * increments to get "lost". We back off the page counts,
1239 * rather than trying to increase cur_count, to avoid any
1240 * question of infinite loops or failure in the presence of
1241 * wrapped-around counts.
1242 */
1243 shared->page_lru_count[slotno] = cur_count;
1244 this_delta = 0;
1245 }
1246
1247 /*
1248 * If this page is the one most recently zeroed, don't consider it
1249 * an eviction candidate. See comments in SimpleLruZeroPage for an
1250 * explanation about the lack of a memory barrier here.
1251 */
1252 this_page_number = shared->page_number[slotno];
1253 if (this_page_number ==
1255 continue;
1256
1257 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1258 {
1259 if (this_delta > best_valid_delta ||
1260 (this_delta == best_valid_delta &&
1261 ctl->PagePrecedes(this_page_number,
1262 best_valid_page_number)))
1263 {
1264 bestvalidslot = slotno;
1265 best_valid_delta = this_delta;
1266 best_valid_page_number = this_page_number;
1267 }
1268 }
1269 else
1270 {
1271 if (this_delta > best_invalid_delta ||
1272 (this_delta == best_invalid_delta &&
1273 ctl->PagePrecedes(this_page_number,
1274 best_invalid_page_number)))
1275 {
1276 bestinvalidslot = slotno;
1277 best_invalid_delta = this_delta;
1278 best_invalid_page_number = this_page_number;
1279 }
1280 }
1281 }
1282
1283 /*
1284 * If all pages (except possibly the latest one) are I/O busy, we'll
1285 * have to wait for an I/O to complete and then retry. In that
1286 * unhappy case, we choose to wait for the I/O on the least recently
1287 * used slot, on the assumption that it was likely initiated first of
1288 * all the I/Os in progress and may therefore finish first.
1289 */
1290 if (best_valid_delta < 0)
1291 {
1292 SimpleLruWaitIO(ctl, bestinvalidslot);
1293 continue;
1294 }
1295
1296 /*
1297 * If the selected page is clean, we're set.
1298 */
1299 if (!shared->page_dirty[bestvalidslot])
1300 return bestvalidslot;
1301
1302 /*
1303 * Write the page.
1304 */
1305 SlruInternalWritePage(ctl, bestvalidslot, NULL);
1306
1307 /*
1308 * Now loop back and try again. This is the easiest way of dealing
1309 * with corner cases such as the victim page being re-dirtied while we
1310 * wrote it.
1311 */
1312 }
1313}
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1893

References Assert, SlruSharedData::bank_cur_lru_count, ctl, SlruSharedData::latest_page_number, LWLockHeldByMe(), SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_read_u64(), SimpleLruGetBankLock(), SimpleLruWaitIO(), SLRU_BANK_SIZE, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SlruSyncFileTag()

int SlruSyncFileTag ( SlruCtl  ctl,
const FileTag ftag,
char *  path 
)

Definition at line 1831 of file slru.c.

1832{
1833 int fd;
1834 int save_errno;
1835 int result;
1836
1837 SlruFileName(ctl, path, ftag->segno);
1838
1839 fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
1840 if (fd < 0)
1841 return -1;
1842
1843 pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
1844 result = pg_fsync(fd);
1846 save_errno = errno;
1847
1849
1850 errno = save_errno;
1851 return result;
1852}
uint64 segno
Definition: sync.h:55

References CloseTransientFile(), ctl, fd(), OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), FileTag::segno, and SlruFileName().

Referenced by clogsyncfiletag(), committssyncfiletag(), multixactmemberssyncfiletag(), multixactoffsetssyncfiletag(), and test_slru_page_sync().

Variable Documentation

◆ slru_errcause

◆ slru_errno