PostgreSQL Source Code  git master
slru.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruWriteAllData
 

Macros

#define MAX_WRITEALL_BUFFERS   16
 
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
 
#define SlruRecentlyUsed(shared, slotno)
 

Typedefs

typedef struct SlruWriteAllData SlruWriteAllData
 
typedef struct SlruWriteAllDataSlruWriteAll
 

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED , SLRU_SEEK_FAILED , SLRU_READ_FAILED , SLRU_WRITE_FAILED ,
  SLRU_FSYNC_FAILED , SLRU_CLOSE_FAILED
}
 

Functions

static int SlruFileName (SlruCtl ctl, char *path, int64 segno)
 
static void SimpleLruZeroLSNs (SlruCtl ctl, int slotno)
 
static void SimpleLruWaitIO (SlruCtl ctl, int slotno)
 
static void SlruInternalWritePage (SlruCtl ctl, int slotno, SlruWriteAll fdata)
 
static bool SlruPhysicalReadPage (SlruCtl ctl, int64 pageno, int slotno)
 
static bool SlruPhysicalWritePage (SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
 
static void SlruReportIOError (SlruCtl ctl, int64 pageno, TransactionId xid)
 
static int SlruSelectLRUPage (SlruCtl ctl, int64 pageno)
 
static bool SlruScanDirCbDeleteCutoff (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
static void SlruInternalDeleteSegment (SlruCtl ctl, int64 segno)
 
Size SimpleLruShmemSize (int nslots, int nlsns)
 
void SimpleLruInit (SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
 
int SimpleLruZeroPage (SlruCtl ctl, int64 pageno)
 
int SimpleLruReadPage (SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
 
int SimpleLruReadPage_ReadOnly (SlruCtl ctl, int64 pageno, TransactionId xid)
 
void SimpleLruWritePage (SlruCtl ctl, int slotno)
 
bool SimpleLruDoesPhysicalPageExist (SlruCtl ctl, int64 pageno)
 
void SimpleLruWriteAll (SlruCtl ctl, bool allow_redirtied)
 
void SimpleLruTruncate (SlruCtl ctl, int64 cutoffPage)
 
void SlruDeleteSegment (SlruCtl ctl, int64 segno)
 
static bool SlruMayDeleteSegment (SlruCtl ctl, int64 segpage, int64 cutoffPage)
 
bool SlruScanDirCbReportPresence (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
bool SlruScanDirCbDeleteAll (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
static bool SlruCorrectSegmentFilenameLength (SlruCtl ctl, size_t len)
 
bool SlruScanDirectory (SlruCtl ctl, SlruScanCallback callback, void *data)
 
int SlruSyncFileTag (SlruCtl ctl, const FileTag *ftag, char *path)
 

Variables

static SlruErrorCause slru_errcause
 
static int slru_errno
 

Macro Definition Documentation

◆ INIT_SLRUFILETAG

#define INIT_SLRUFILETAG (   a,
  xx_handler,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = (xx_handler), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:69
Definition: sync.h:51

Definition at line 114 of file slru.c.

◆ MAX_WRITEALL_BUFFERS

#define MAX_WRITEALL_BUFFERS   16

Definition at line 98 of file slru.c.

◆ SlruRecentlyUsed

#define SlruRecentlyUsed (   shared,
  slotno 
)
Value:
do { \
int new_lru_count = (shared)->cur_lru_count; \
if (new_lru_count != (shared)->page_lru_count[slotno]) { \
(shared)->cur_lru_count = ++new_lru_count; \
(shared)->page_lru_count[slotno] = new_lru_count; \
} \
} while (0)

Definition at line 140 of file slru.c.

Typedef Documentation

◆ SlruWriteAll

typedef struct SlruWriteAllData* SlruWriteAll

Definition at line 107 of file slru.c.

◆ SlruWriteAllData

Enumeration Type Documentation

◆ SlruErrorCause

Enumerator
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 150 of file slru.c.

151 {
SlruErrorCause
Definition: slru.c:151
@ SLRU_WRITE_FAILED
Definition: slru.c:155
@ SLRU_FSYNC_FAILED
Definition: slru.c:156
@ SLRU_SEEK_FAILED
Definition: slru.c:153
@ SLRU_OPEN_FAILED
Definition: slru.c:152
@ SLRU_CLOSE_FAILED
Definition: slru.c:157
@ SLRU_READ_FAILED
Definition: slru.c:154

Function Documentation

◆ SimpleLruDoesPhysicalPageExist()

bool SimpleLruDoesPhysicalPageExist ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 657 of file slru.c.

658 {
659  int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
660  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
661  int offset = rpageno * BLCKSZ;
662  char path[MAXPGPATH];
663  int fd;
664  bool result;
665  off_t endpos;
666 
667  /* update the stats counter of checked pages */
669 
670  SlruFileName(ctl, path, segno);
671 
672  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
673  if (fd < 0)
674  {
675  /* expected: file doesn't exist */
676  if (errno == ENOENT)
677  return false;
678 
679  /* report error normally */
681  slru_errno = errno;
682  SlruReportIOError(ctl, pageno, 0);
683  }
684 
685  if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
686  {
688  slru_errno = errno;
689  SlruReportIOError(ctl, pageno, 0);
690  }
691 
692  result = endpos >= (off_t) (offset + BLCKSZ);
693 
694  if (CloseTransientFile(fd) != 0)
695  {
697  slru_errno = errno;
698  return false;
699  }
700 
701  return result;
702 }
#define PG_BINARY
Definition: c.h:1262
int CloseTransientFile(int fd)
Definition: fd.c:2809
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
#define MAXPGPATH
static XLogRecPtr endpos
Definition: pg_receivewal.c:56
void pgstat_count_slru_page_exists(int slru_idx)
Definition: pgstat_slru.c:71
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static int SlruFileName(SlruCtl ctl, char *path, int64 segno)
Definition: slru.c:65
static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:959
static SlruErrorCause slru_errcause
Definition: slru.c:160
static int slru_errno
Definition: slru.c:161
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:34
SlruShared shared
Definition: slru.h:115
int slru_stats_idx
Definition: slru.h:104

References CloseTransientFile(), endpos, fd(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pgstat_count_slru_page_exists(), SlruWriteAllData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_SEEK_FAILED, SlruSharedData::slru_stats_idx, SlruFileName(), and SlruReportIOError().

Referenced by ActivateCommitTs(), find_multixact_start(), MaybeExtendOffsetSlru(), and test_slru_page_exists().

◆ SimpleLruInit()

void SimpleLruInit ( SlruCtl  ctl,
const char *  name,
int  nslots,
int  nlsns,
LWLock ctllock,
const char *  subdir,
int  tranche_id,
SyncRequestHandler  sync_handler,
bool  long_segment_names 
)

Definition at line 215 of file slru.c.

218 {
219  SlruShared shared;
220  bool found;
221 
222  shared = (SlruShared) ShmemInitStruct(name,
223  SimpleLruShmemSize(nslots, nlsns),
224  &found);
225 
226  if (!IsUnderPostmaster)
227  {
228  /* Initialize locks and shared memory area */
229  char *ptr;
230  Size offset;
231 
232  Assert(!found);
233 
234  memset(shared, 0, sizeof(SlruSharedData));
235 
236  shared->ControlLock = ctllock;
237 
238  shared->num_slots = nslots;
239  shared->lsn_groups_per_page = nlsns;
240 
241  shared->cur_lru_count = 0;
243 
245 
246  ptr = (char *) shared;
247  offset = MAXALIGN(sizeof(SlruSharedData));
248  shared->page_buffer = (char **) (ptr + offset);
249  offset += MAXALIGN(nslots * sizeof(char *));
250  shared->page_status = (SlruPageStatus *) (ptr + offset);
251  offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
252  shared->page_dirty = (bool *) (ptr + offset);
253  offset += MAXALIGN(nslots * sizeof(bool));
254  shared->page_number = (int64 *) (ptr + offset);
255  offset += MAXALIGN(nslots * sizeof(int64));
256  shared->page_lru_count = (int *) (ptr + offset);
257  offset += MAXALIGN(nslots * sizeof(int));
258 
259  /* Initialize LWLocks */
260  shared->buffer_locks = (LWLockPadded *) (ptr + offset);
261  offset += MAXALIGN(nslots * sizeof(LWLockPadded));
262 
263  if (nlsns > 0)
264  {
265  shared->group_lsn = (XLogRecPtr *) (ptr + offset);
266  offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
267  }
268 
269  ptr += BUFFERALIGN(offset);
270  for (int slotno = 0; slotno < nslots; slotno++)
271  {
272  LWLockInitialize(&shared->buffer_locks[slotno].lock,
273  tranche_id);
274 
275  shared->page_buffer[slotno] = ptr;
276  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
277  shared->page_dirty[slotno] = false;
278  shared->page_lru_count[slotno] = 0;
279  ptr += BLCKSZ;
280  }
281 
282  /* Should fit to estimated shmem size */
283  Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
284  }
285  else
286  Assert(found);
287 
288  /*
289  * Initialize the unshared control struct, including directory path. We
290  * assume caller set PagePrecedes.
291  */
292  ctl->shared = shared;
293  ctl->sync_handler = sync_handler;
294  ctl->long_segment_names = long_segment_names;
295  strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
296 }
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:410
#define MAXALIGN(LEN)
Definition: c.h:800
#define BUFFERALIGN(LEN)
Definition: c.h:802
size_t Size
Definition: c.h:594
bool IsUnderPostmaster
Definition: globals.c:116
Assert(fmt[strlen(fmt) - 1] !='\n')
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:703
int pgstat_get_slru_index(const char *name)
Definition: pgstat_slru.c:132
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:388
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:183
SlruSharedData * SlruShared
Definition: slru.h:107
SlruPageStatus
Definition: slru.h:43
@ SLRU_PAGE_EMPTY
Definition: slru.h:44
bool long_segment_names
Definition: slru.h:141
SyncRequestHandler sync_handler
Definition: slru.h:121
char Dir[64]
Definition: slru.h:147
int64 * page_number
Definition: slru.h:70
int num_slots
Definition: slru.h:61
int * page_lru_count
Definition: slru.h:71
pg_atomic_uint64 latest_page_number
Definition: slru.h:101
XLogRecPtr * group_lsn
Definition: slru.h:82
int cur_lru_count
Definition: slru.h:94
int lsn_groups_per_page
Definition: slru.h:83
SlruPageStatus * page_status
Definition: slru.h:68
LWLock * ControlLock
Definition: slru.h:58
bool * page_dirty
Definition: slru.h:69
LWLockPadded * buffer_locks
Definition: slru.h:72
char ** page_buffer
Definition: slru.h:67
LWLock lock
Definition: lwlock.h:69
const char * name
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), SlruSharedData::buffer_locks, BUFFERALIGN, SlruSharedData::ControlLock, SlruSharedData::cur_lru_count, SlruCtlData::Dir, SlruSharedData::group_lsn, IsUnderPostmaster, SlruSharedData::latest_page_number, LWLockPadded::lock, SlruCtlData::long_segment_names, SlruSharedData::lsn_groups_per_page, LWLockInitialize(), MAXALIGN, name, SlruSharedData::num_slots, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_init_u64(), pgstat_get_slru_index(), SlruCtlData::shared, ShmemInitStruct(), SimpleLruShmemSize(), SLRU_PAGE_EMPTY, SlruSharedData::slru_stats_idx, strlcpy(), and SlruCtlData::sync_handler.

Referenced by AsyncShmemInit(), CLOGShmemInit(), CommitTsShmemInit(), MultiXactShmemInit(), SerialInit(), SUBTRANSShmemInit(), and test_slru_shmem_startup().

◆ SimpleLruReadPage()

int SimpleLruReadPage ( SlruCtl  ctl,
int64  pageno,
bool  write_ok,
TransactionId  xid 
)

Definition at line 429 of file slru.c.

431 {
432  SlruShared shared = ctl->shared;
433 
434  /* Outer loop handles restart if we must wait for someone else's I/O */
435  for (;;)
436  {
437  int slotno;
438  bool ok;
439 
440  /* See if page already is in memory; if not, pick victim slot */
441  slotno = SlruSelectLRUPage(ctl, pageno);
442 
443  /* Did we find the page in memory? */
444  if (shared->page_number[slotno] == pageno &&
445  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
446  {
447  /*
448  * If page is still being read in, we must wait for I/O. Likewise
449  * if the page is being written and the caller said that's not OK.
450  */
451  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
452  (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
453  !write_ok))
454  {
455  SimpleLruWaitIO(ctl, slotno);
456  /* Now we must recheck state from the top */
457  continue;
458  }
459  /* Otherwise, it's ready to use */
460  SlruRecentlyUsed(shared, slotno);
461 
462  /* update the stats counter of pages found in the SLRU */
464 
465  return slotno;
466  }
467 
468  /* We found no match; assert we selected a freeable slot */
469  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
470  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
471  !shared->page_dirty[slotno]));
472 
473  /* Mark the slot read-busy */
474  shared->page_number[slotno] = pageno;
475  shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
476  shared->page_dirty[slotno] = false;
477 
478  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
479  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
480 
481  /* Release control lock while doing I/O */
482  LWLockRelease(shared->ControlLock);
483 
484  /* Do the read */
485  ok = SlruPhysicalReadPage(ctl, pageno, slotno);
486 
487  /* Set the LSNs for this newly read-in page to zero */
488  SimpleLruZeroLSNs(ctl, slotno);
489 
490  /* Re-acquire control lock and update page state */
492 
493  Assert(shared->page_number[slotno] == pageno &&
494  shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
495  !shared->page_dirty[slotno]);
496 
497  shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
498 
499  LWLockRelease(&shared->buffer_locks[slotno].lock);
500 
501  /* Now it's okay to ereport if we failed */
502  if (!ok)
503  SlruReportIOError(ctl, pageno, xid);
504 
505  SlruRecentlyUsed(shared, slotno);
506 
507  /* update the stats counter of pages not found in SLRU */
509 
510  return slotno;
511  }
512 }
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LW_EXCLUSIVE
Definition: lwlock.h:116
void pgstat_count_slru_page_read(int slru_idx)
Definition: pgstat_slru.c:77
void pgstat_count_slru_page_hit(int slru_idx)
Definition: pgstat_slru.c:65
static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
Definition: slru.c:715
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:140
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:358
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:375
static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:1044
@ SLRU_PAGE_VALID
Definition: slru.h:46
@ SLRU_PAGE_WRITE_IN_PROGRESS
Definition: slru.h:47
@ SLRU_PAGE_READ_IN_PROGRESS
Definition: slru.h:45

References Assert(), SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_hit(), pgstat_count_slru_page_read(), SlruCtlData::shared, SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruSharedData::slru_stats_idx, SlruPhysicalReadPage(), SlruRecentlyUsed, SlruReportIOError(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), GetMultiXactIdMembers(), RecordNewMultiXact(), SerialAdd(), SetXidCommitTsInPage(), SimpleLruReadPage_ReadOnly(), SubTransSetParent(), test_slru_page_read(), TransactionIdSetPageStatusInternal(), TrimCLOG(), and TrimMultiXact().

◆ SimpleLruReadPage_ReadOnly()

int SimpleLruReadPage_ReadOnly ( SlruCtl  ctl,
int64  pageno,
TransactionId  xid 
)

Definition at line 529 of file slru.c.

530 {
531  SlruShared shared = ctl->shared;
532 
533  /* Try to find the page while holding only shared lock */
535 
536  /* See if page is already in a buffer */
537  for (int slotno = 0; slotno < shared->num_slots; slotno++)
538  {
539  if (shared->page_number[slotno] == pageno &&
540  shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
541  shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
542  {
543  /* See comments for SlruRecentlyUsed macro */
544  SlruRecentlyUsed(shared, slotno);
545 
546  /* update the stats counter of pages found in the SLRU */
548 
549  return slotno;
550  }
551  }
552 
553  /* No luck, so switch to normal exclusive lock and do regular read */
554  LWLockRelease(shared->ControlLock);
556 
557  return SimpleLruReadPage(ctl, pageno, true, xid);
558 }
@ LW_SHARED
Definition: lwlock.h:117
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition: slru.c:429

References SlruSharedData::ControlLock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_hit(), SlruCtlData::shared, SimpleLruReadPage(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SlruSharedData::slru_stats_idx, and SlruRecentlyUsed.

Referenced by asyncQueueReadAllNotifications(), find_multixact_start(), SerialGetMinConflictCommitSeqNo(), SubTransGetParent(), test_slru_page_readonly(), TransactionIdGetCommitTsData(), and TransactionIdGetStatus().

◆ SimpleLruShmemSize()

Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)

Definition at line 183 of file slru.c.

184 {
185  Size sz;
186 
187  /* we assume nslots isn't so large as to risk overflow */
188  sz = MAXALIGN(sizeof(SlruSharedData));
189  sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
190  sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
191  sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
192  sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
193  sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
194  sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
195 
196  if (nlsns > 0)
197  sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
198 
199  return BUFFERALIGN(sz) + BLCKSZ * nslots;
200 }

References BUFFERALIGN, and MAXALIGN.

Referenced by AsyncShmemSize(), CLOGShmemSize(), CommitTsShmemSize(), MultiXactShmemSize(), PredicateLockShmemSize(), SimpleLruInit(), SUBTRANSShmemSize(), and test_slru_shmem_request().

◆ SimpleLruTruncate()

void SimpleLruTruncate ( SlruCtl  ctl,
int64  cutoffPage 
)

Definition at line 1259 of file slru.c.

1260 {
1261  SlruShared shared = ctl->shared;
1262 
1263  /* update the stats counter of truncates */
1265 
1266  /*
1267  * Scan shared memory and remove any pages preceding the cutoff page, to
1268  * ensure we won't rewrite them later. (Since this is normally called in
1269  * or just after a checkpoint, any dirty pages should have been flushed
1270  * already ... we're just being extra careful here.)
1271  */
1273 
1274 restart:
1275 
1276  /*
1277  * An important safety check: the current endpoint page must not be
1278  * eligible for removal. This check is just a backstop against wraparound
1279  * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1280  * outdated value; therefore we don't add a memory barrier.
1281  */
1283  cutoffPage))
1284  {
1285  LWLockRelease(shared->ControlLock);
1286  ereport(LOG,
1287  (errmsg("could not truncate directory \"%s\": apparent wraparound",
1288  ctl->Dir)));
1289  return;
1290  }
1291 
1292  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1293  {
1294  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1295  continue;
1296  if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1297  continue;
1298 
1299  /*
1300  * If page is clean, just change state to EMPTY (expected case).
1301  */
1302  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1303  !shared->page_dirty[slotno])
1304  {
1305  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1306  continue;
1307  }
1308 
1309  /*
1310  * Hmm, we have (or may have) I/O operations acting on the page, so
1311  * we've got to wait for them to finish and then start again. This is
1312  * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1313  * wouldn't it be OK to just discard it without writing it?
1314  * SlruMayDeleteSegment() uses a stricter qualification, so we might
1315  * not delete this page in the end; even if we don't delete it, we
1316  * won't have cause to read its data again. For now, keep the logic
1317  * the same as it was.)
1318  */
1319  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1320  SlruInternalWritePage(ctl, slotno, NULL);
1321  else
1322  SimpleLruWaitIO(ctl, slotno);
1323  goto restart;
1324  }
1325 
1326  LWLockRelease(shared->ControlLock);
1327 
1328  /* Now we can remove the old segment(s) */
1329  (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1330 }
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:424
int errmsg(const char *fmt,...)
Definition: elog.c:1075
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
void pgstat_count_slru_truncate(int slru_idx)
Definition: pgstat_slru.c:95
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
Definition: slru.c:572
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1613
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1550
bool(* PagePrecedes)(int64, int64)
Definition: slru.h:133

References SlruSharedData::ControlLock, SlruCtlData::Dir, ereport, errmsg(), SlruSharedData::latest_page_number, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, pg_atomic_read_u64(), pgstat_count_slru_truncate(), SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), commit_ts_redo(), PerformOffsetsTruncation(), test_slru_page_truncate(), TruncateCLOG(), TruncateCommitTs(), and TruncateSUBTRANS().

◆ SimpleLruWaitIO()

static void SimpleLruWaitIO ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 375 of file slru.c.

376 {
377  SlruShared shared = ctl->shared;
378 
379  /* See notes at top of file */
380  LWLockRelease(shared->ControlLock);
381  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
382  LWLockRelease(&shared->buffer_locks[slotno].lock);
384 
385  /*
386  * If the slot is still in an io-in-progress state, then either someone
387  * already started a new I/O on the slot, or a previous I/O failed and
388  * neglected to reset the page state. That shouldn't happen, really, but
389  * it seems worth a few extra cycles to check and recover from it. We can
390  * cheaply test for failure by seeing if the buffer lock is still held (we
391  * assume that transaction abort would release the lock).
392  */
393  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
394  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
395  {
396  if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
397  {
398  /* indeed, the I/O must have failed */
399  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
400  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
401  else /* write_in_progress */
402  {
403  shared->page_status[slotno] = SLRU_PAGE_VALID;
404  shared->page_dirty[slotno] = true;
405  }
406  LWLockRelease(&shared->buffer_locks[slotno].lock);
407  }
408  }
409 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1339

References SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlruCtlData::shared, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruDeleteSegment(), SlruInternalWritePage(), and SlruSelectLRUPage().

◆ SimpleLruWriteAll()

void SimpleLruWriteAll ( SlruCtl  ctl,
bool  allow_redirtied 
)

Definition at line 1191 of file slru.c.

1192 {
1193  SlruShared shared = ctl->shared;
1194  SlruWriteAllData fdata;
1195  int64 pageno = 0;
1196  bool ok;
1197 
1198  /* update the stats counter of flushes */
1200 
1201  /*
1202  * Find and write dirty pages
1203  */
1204  fdata.num_files = 0;
1205 
1207 
1208  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1209  {
1210  SlruInternalWritePage(ctl, slotno, &fdata);
1211 
1212  /*
1213  * In some places (e.g. checkpoints), we cannot assert that the slot
1214  * is clean now, since another process might have re-dirtied it
1215  * already. That's okay.
1216  */
1217  Assert(allow_redirtied ||
1218  shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1219  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1220  !shared->page_dirty[slotno]));
1221  }
1222 
1223  LWLockRelease(shared->ControlLock);
1224 
1225  /*
1226  * Now close any files that were open
1227  */
1228  ok = true;
1229  for (int i = 0; i < fdata.num_files; i++)
1230  {
1231  if (CloseTransientFile(fdata.fd[i]) != 0)
1232  {
1234  slru_errno = errno;
1235  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1236  ok = false;
1237  }
1238  }
1239  if (!ok)
1241 
1242  /* Ensure that directory entries for new files are on disk. */
1243  if (ctl->sync_handler != SYNC_HANDLER_NONE)
1244  fsync_fname(ctl->Dir, true);
1245 }
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int i
Definition: isn.c:73
void pgstat_count_slru_flush(int slru_idx)
Definition: pgstat_slru.c:89
int num_files
Definition: slru.c:102
int fd[MAX_WRITEALL_BUFFERS]
Definition: slru.c:103
int64 segno[MAX_WRITEALL_BUFFERS]
Definition: slru.c:104
@ SYNC_HANDLER_NONE
Definition: sync.h:42
#define InvalidTransactionId
Definition: transam.h:31

References Assert(), CloseTransientFile(), SlruSharedData::ControlLock, SlruCtlData::Dir, SlruWriteAllData::fd, fsync_fname(), i, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruWriteAllData::num_files, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pgstat_count_slru_flush(), SlruWriteAllData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruReportIOError(), SlruCtlData::sync_handler, and SYNC_HANDLER_NONE.

Referenced by CheckPointCLOG(), CheckPointCommitTs(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), find_multixact_start(), and test_slru_page_writeall().

◆ SimpleLruWritePage()

void SimpleLruWritePage ( SlruCtl  ctl,
int  slotno 
)

◆ SimpleLruZeroLSNs()

static void SimpleLruZeroLSNs ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 358 of file slru.c.

359 {
360  SlruShared shared = ctl->shared;
361 
362  if (shared->lsn_groups_per_page > 0)
363  MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
364  shared->lsn_groups_per_page * sizeof(XLogRecPtr));
365 }
#define MemSet(start, val, len)
Definition: c.h:1009

References SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, MemSet, and SlruCtlData::shared.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SimpleLruZeroPage()

int SimpleLruZeroPage ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 307 of file slru.c.

308 {
309  SlruShared shared = ctl->shared;
310  int slotno;
311 
312  /* Find a suitable buffer slot for the page */
313  slotno = SlruSelectLRUPage(ctl, pageno);
314  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
315  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
316  !shared->page_dirty[slotno]) ||
317  shared->page_number[slotno] == pageno);
318 
319  /* Mark the slot as containing this page */
320  shared->page_number[slotno] = pageno;
321  shared->page_status[slotno] = SLRU_PAGE_VALID;
322  shared->page_dirty[slotno] = true;
323  SlruRecentlyUsed(shared, slotno);
324 
325  /* Set the buffer to zeroes */
326  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
327 
328  /* Set the LSNs for this new page to zero */
329  SimpleLruZeroLSNs(ctl, slotno);
330 
331  /*
332  * Assume this page is now the latest active page.
333  *
334  * Note that because both this routine and SlruSelectLRUPage run with
335  * ControlLock held, it is not possible for this to be zeroing a page that
336  * SlruSelectLRUPage is going to evict simultaneously. Therefore, there's
337  * no memory barrier here.
338  */
339  pg_atomic_write_u64(&shared->latest_page_number, pageno);
340 
341  /* update the stats counter of zeroed pages */
343 
344  return slotno;
345 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:433
void pgstat_count_slru_page_zeroed(int slru_idx)
Definition: pgstat_slru.c:59

References Assert(), SlruSharedData::latest_page_number, MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_write_u64(), pgstat_count_slru_page_zeroed(), SlruCtlData::shared, SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruRecentlyUsed, and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), SerialAdd(), test_slru_page_write(), ZeroCLOGPage(), ZeroCommitTsPage(), ZeroMultiXactMemberPage(), ZeroMultiXactOffsetPage(), and ZeroSUBTRANSPage().

◆ SlruCorrectSegmentFilenameLength()

static bool SlruCorrectSegmentFilenameLength ( SlruCtl  ctl,
size_t  len 
)
inlinestatic

Definition at line 1580 of file slru.c.

1581 {
1582  if (ctl->long_segment_names)
1583  return (len == 15); /* see SlruFileName() */
1584  else
1585 
1586  /*
1587  * Commit 638cf09e76d allowed 5-character lengths. Later commit
1588  * 73c986adde5 allowed 6-character length.
1589  *
1590  * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1591  * numbers, and the corresponding 15-character file names, which may
1592  * eventually deprecate the support for 4, 5, and 6-character names.
1593  */
1594  return (len == 4 || len == 5 || len == 6);
1595 }
const void size_t len

References len, and SlruCtlData::long_segment_names.

Referenced by SlruScanDirectory().

◆ SlruDeleteSegment()

void SlruDeleteSegment ( SlruCtl  ctl,
int64  segno 
)

Definition at line 1362 of file slru.c.

1363 {
1364  SlruShared shared = ctl->shared;
1365  bool did_write;
1366 
1367  /* Clean out any possibly existing references to the segment. */
1369 restart:
1370  did_write = false;
1371  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1372  {
1373  int pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1374 
1375  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1376  continue;
1377 
1378  /* not the segment we're looking for */
1379  if (pagesegno != segno)
1380  continue;
1381 
1382  /* If page is clean, just change state to EMPTY (expected case). */
1383  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1384  !shared->page_dirty[slotno])
1385  {
1386  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1387  continue;
1388  }
1389 
1390  /* Same logic as SimpleLruTruncate() */
1391  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1392  SlruInternalWritePage(ctl, slotno, NULL);
1393  else
1394  SimpleLruWaitIO(ctl, slotno);
1395 
1396  did_write = true;
1397  }
1398 
1399  /*
1400  * Be extra careful and re-check. The IO functions release the control
1401  * lock, so new pages could have been read in.
1402  */
1403  if (did_write)
1404  goto restart;
1405 
1406  SlruInternalDeleteSegment(ctl, segno);
1407 
1408  LWLockRelease(shared->ControlLock);
1409 }
static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno)
Definition: slru.c:1339

References SlruSharedData::ControlLock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruWriteAllData::segno, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruInternalWritePage().

Referenced by PerformMembersTruncation(), and test_slru_page_delete().

◆ SlruFileName()

static int SlruFileName ( SlruCtl  ctl,
char *  path,
int64  segno 
)
inlinestatic

Definition at line 65 of file slru.c.

66 {
67  if (ctl->long_segment_names)
68  {
69  /*
70  * We could use 16 characters here but the disadvantage would be that
71  * the SLRU segments will be hard to distinguish from WAL segments.
72  *
73  * For this reason we use 15 characters. It is enough but also means
74  * that in the future we can't decrease SLRU_PAGES_PER_SEGMENT easily.
75  */
76  Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
77  return snprintf(path, MAXPGPATH, "%s/%015llX", ctl->Dir,
78  (long long) segno);
79  }
80  else
81  {
82  /*
83  * Despite the fact that %04X format string is used up to 24 bit
84  * integers are allowed. See SlruCorrectSegmentFilenameLength()
85  */
86  Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
87  return snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir,
88  (unsigned int) segno);
89  }
90 }
#define snprintf
Definition: port.h:238

References Assert(), SlruCtlData::Dir, SlruCtlData::long_segment_names, MAXPGPATH, and snprintf.

Referenced by SimpleLruDoesPhysicalPageExist(), SlruInternalDeleteSegment(), SlruPhysicalReadPage(), SlruPhysicalWritePage(), SlruReportIOError(), and SlruSyncFileTag().

◆ SlruInternalDeleteSegment()

static void SlruInternalDeleteSegment ( SlruCtl  ctl,
int64  segno 
)
static

Definition at line 1339 of file slru.c.

1340 {
1341  char path[MAXPGPATH];
1342 
1343  /* Forget any fsync requests queued for this segment. */
1344  if (ctl->sync_handler != SYNC_HANDLER_NONE)
1345  {
1346  FileTag tag;
1347 
1348  INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1350  }
1351 
1352  /* Unlink the file. */
1353  SlruFileName(ctl, path, segno);
1354  ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1355  unlink(path);
1356 }
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1162
#define DEBUG2
Definition: elog.h:29
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
Definition: slru.c:114
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:585
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References DEBUG2, ereport, errmsg_internal(), INIT_SLRUFILETAG, MAXPGPATH, RegisterSyncRequest(), SlruWriteAllData::segno, SlruFileName(), SYNC_FORGET_REQUEST, SlruCtlData::sync_handler, and SYNC_HANDLER_NONE.

Referenced by SlruDeleteSegment(), SlruScanDirCbDeleteAll(), and SlruScanDirCbDeleteCutoff().

◆ SlruInternalWritePage()

static void SlruInternalWritePage ( SlruCtl  ctl,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 572 of file slru.c.

573 {
574  SlruShared shared = ctl->shared;
575  int64 pageno = shared->page_number[slotno];
576  bool ok;
577 
578  /* If a write is in progress, wait for it to finish */
579  while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
580  shared->page_number[slotno] == pageno)
581  {
582  SimpleLruWaitIO(ctl, slotno);
583  }
584 
585  /*
586  * Do nothing if page is not dirty, or if buffer no longer contains the
587  * same page we were called for.
588  */
589  if (!shared->page_dirty[slotno] ||
590  shared->page_status[slotno] != SLRU_PAGE_VALID ||
591  shared->page_number[slotno] != pageno)
592  return;
593 
594  /*
595  * Mark the slot write-busy, and clear the dirtybit. After this point, a
596  * transaction status update on this page will mark it dirty again.
597  */
598  shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
599  shared->page_dirty[slotno] = false;
600 
601  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
602  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
603 
604  /* Release control lock while doing I/O */
605  LWLockRelease(shared->ControlLock);
606 
607  /* Do the write */
608  ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
609 
610  /* If we failed, and we're in a flush, better close the files */
611  if (!ok && fdata)
612  {
613  for (int i = 0; i < fdata->num_files; i++)
614  CloseTransientFile(fdata->fd[i]);
615  }
616 
617  /* Re-acquire control lock and update page state */
619 
620  Assert(shared->page_number[slotno] == pageno &&
621  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
622 
623  /* If we failed to write, mark the page dirty again */
624  if (!ok)
625  shared->page_dirty[slotno] = true;
626 
627  shared->page_status[slotno] = SLRU_PAGE_VALID;
628 
629  LWLockRelease(&shared->buffer_locks[slotno].lock);
630 
631  /* Now it's okay to ereport if we failed */
632  if (!ok)
634 
635  /* If part of a checkpoint, count this as a buffer written. */
636  if (fdata)
638 }
static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
Definition: slru.c:787
int ckpt_bufs_written
Definition: xlog.h:165
CheckpointStatsData CheckpointStats
Definition: xlog.c:213

References Assert(), SlruSharedData::buffer_locks, CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CloseTransientFile(), SlruSharedData::ControlLock, SlruWriteAllData::fd, i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruWriteAllData::num_files, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruTruncate(), SimpleLruWriteAll(), SimpleLruWritePage(), SlruDeleteSegment(), and SlruSelectLRUPage().

◆ SlruMayDeleteSegment()

static bool SlruMayDeleteSegment ( SlruCtl  ctl,
int64  segpage,
int64  cutoffPage 
)
static

Definition at line 1425 of file slru.c.

1426 {
1427  int64 seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
1428 
1429  Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
1430 
1431  return (ctl->PagePrecedes(segpage, cutoffPage) &&
1432  ctl->PagePrecedes(seg_last_page, cutoffPage));
1433 }

References Assert(), SlruCtlData::PagePrecedes, and SLRU_PAGES_PER_SEGMENT.

Referenced by SlruScanDirCbDeleteCutoff(), and SlruScanDirCbReportPresence().

◆ SlruPhysicalReadPage()

static bool SlruPhysicalReadPage ( SlruCtl  ctl,
int64  pageno,
int  slotno 
)
static

Definition at line 715 of file slru.c.

716 {
717  SlruShared shared = ctl->shared;
718  int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
719  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
720  off_t offset = rpageno * BLCKSZ;
721  char path[MAXPGPATH];
722  int fd;
723 
724  SlruFileName(ctl, path, segno);
725 
726  /*
727  * In a crash-and-restart situation, it's possible for us to receive
728  * commands to set the commit status of transactions whose bits are in
729  * already-truncated segments of the commit log (see notes in
730  * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
731  * where the file doesn't exist, and return zeroes instead.
732  */
733  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
734  if (fd < 0)
735  {
736  if (errno != ENOENT || !InRecovery)
737  {
739  slru_errno = errno;
740  return false;
741  }
742 
743  ereport(LOG,
744  (errmsg("file \"%s\" doesn't exist, reading as zeroes",
745  path)));
746  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
747  return true;
748  }
749 
750  errno = 0;
751  pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
752  if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
753  {
756  slru_errno = errno;
758  return false;
759  }
761 
762  if (CloseTransientFile(fd) != 0)
763  {
765  slru_errno = errno;
766  return false;
767  }
768 
769  return true;
770 }
#define pg_pread
Definition: port.h:225
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:88
static void pgstat_report_wait_end(void)
Definition: wait_event.h:104
bool InRecovery
Definition: xlogutils.c:53

References CloseTransientFile(), ereport, errmsg(), fd(), InRecovery, LOG, MAXPGPATH, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_pread, pgstat_report_wait_end(), pgstat_report_wait_start(), SlruWriteAllData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, and SlruFileName().

Referenced by SimpleLruReadPage().

◆ SlruPhysicalWritePage()

static bool SlruPhysicalWritePage ( SlruCtl  ctl,
int64  pageno,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 787 of file slru.c.

788 {
789  SlruShared shared = ctl->shared;
790  int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
791  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
792  off_t offset = rpageno * BLCKSZ;
793  char path[MAXPGPATH];
794  int fd = -1;
795 
796  /* update the stats counter of written pages */
798 
799  /*
800  * Honor the write-WAL-before-data rule, if appropriate, so that we do not
801  * write out data before associated WAL records. This is the same action
802  * performed during FlushBuffer() in the main buffer manager.
803  */
804  if (shared->group_lsn != NULL)
805  {
806  /*
807  * We must determine the largest async-commit LSN for the page. This
808  * is a bit tedious, but since this entire function is a slow path
809  * anyway, it seems better to do this here than to maintain a per-page
810  * LSN variable (which'd need an extra comparison in the
811  * transaction-commit path).
812  */
813  XLogRecPtr max_lsn;
814  int lsnindex;
815 
816  lsnindex = slotno * shared->lsn_groups_per_page;
817  max_lsn = shared->group_lsn[lsnindex++];
818  for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
819  {
820  XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
821 
822  if (max_lsn < this_lsn)
823  max_lsn = this_lsn;
824  }
825 
826  if (!XLogRecPtrIsInvalid(max_lsn))
827  {
828  /*
829  * As noted above, elog(ERROR) is not acceptable here, so if
830  * XLogFlush were to fail, we must PANIC. This isn't much of a
831  * restriction because XLogFlush is just about all critical
832  * section anyway, but let's make sure.
833  */
835  XLogFlush(max_lsn);
837  }
838  }
839 
840  /*
841  * During a SimpleLruWriteAll, we may already have the desired file open.
842  */
843  if (fdata)
844  {
845  for (int i = 0; i < fdata->num_files; i++)
846  {
847  if (fdata->segno[i] == segno)
848  {
849  fd = fdata->fd[i];
850  break;
851  }
852  }
853  }
854 
855  if (fd < 0)
856  {
857  /*
858  * If the file doesn't already exist, we should create it. It is
859  * possible for this to need to happen when writing a page that's not
860  * first in its segment; we assume the OS can cope with that. (Note:
861  * it might seem that it'd be okay to create files only when
862  * SimpleLruZeroPage is called for the first page of a segment.
863  * However, if after a crash and restart the REDO logic elects to
864  * replay the log from a checkpoint before the latest one, then it's
865  * possible that we will get commands to set transaction status of
866  * transactions that have already been truncated from the commit log.
867  * Easiest way to deal with that is to accept references to
868  * nonexistent files here and in SlruPhysicalReadPage.)
869  *
870  * Note: it is possible for more than one backend to be executing this
871  * code simultaneously for different pages of the same file. Hence,
872  * don't use O_EXCL or O_TRUNC or anything like that.
873  */
874  SlruFileName(ctl, path, segno);
875  fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
876  if (fd < 0)
877  {
879  slru_errno = errno;
880  return false;
881  }
882 
883  if (fdata)
884  {
885  if (fdata->num_files < MAX_WRITEALL_BUFFERS)
886  {
887  fdata->fd[fdata->num_files] = fd;
888  fdata->segno[fdata->num_files] = segno;
889  fdata->num_files++;
890  }
891  else
892  {
893  /*
894  * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
895  * fall back to treating it as a standalone write.
896  */
897  fdata = NULL;
898  }
899  }
900  }
901 
902  errno = 0;
903  pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
904  if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
905  {
907  /* if write didn't set errno, assume problem is no disk space */
908  if (errno == 0)
909  errno = ENOSPC;
911  slru_errno = errno;
912  if (!fdata)
914  return false;
915  }
917 
918  /* Queue up a sync request for the checkpointer. */
919  if (ctl->sync_handler != SYNC_HANDLER_NONE)
920  {
921  FileTag tag;
922 
923  INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
924  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
925  {
926  /* No space to enqueue sync request. Do it synchronously. */
927  pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
928  if (pg_fsync(fd) != 0)
929  {
932  slru_errno = errno;
934  return false;
935  }
937  }
938  }
939 
940  /* Close file, unless part of flush request. */
941  if (!fdata)
942  {
943  if (CloseTransientFile(fd) != 0)
944  {
946  slru_errno = errno;
947  return false;
948  }
949  }
950 
951  return true;
952 }
int pg_fsync(int fd)
Definition: fd.c:386
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_slru_page_written(int slru_idx)
Definition: pgstat_slru.c:83
#define pg_pwrite
Definition: port.h:226
#define MAX_WRITEALL_BUFFERS
Definition: slru.c:98
@ SYNC_REQUEST
Definition: sync.h:25
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2733
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29

References CloseTransientFile(), END_CRIT_SECTION, fd(), SlruWriteAllData::fd, SlruSharedData::group_lsn, i, INIT_SLRUFILETAG, SlruSharedData::lsn_groups_per_page, MAX_WRITEALL_BUFFERS, MAXPGPATH, SlruWriteAllData::num_files, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), pg_pwrite, pgstat_count_slru_page_written(), pgstat_report_wait_end(), pgstat_report_wait_start(), RegisterSyncRequest(), SlruWriteAllData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SLRU_WRITE_FAILED, SlruFileName(), START_CRIT_SECTION, SlruCtlData::sync_handler, SYNC_HANDLER_NONE, SYNC_REQUEST, XLogFlush(), and XLogRecPtrIsInvalid.

Referenced by SlruInternalWritePage().

◆ SlruReportIOError()

static void SlruReportIOError ( SlruCtl  ctl,
int64  pageno,
TransactionId  xid 
)
static

Definition at line 959 of file slru.c.

960 {
961  int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
962  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
963  int offset = rpageno * BLCKSZ;
964  char path[MAXPGPATH];
965 
966  SlruFileName(ctl, path, segno);
967  errno = slru_errno;
968  switch (slru_errcause)
969  {
970  case SLRU_OPEN_FAILED:
971  ereport(ERROR,
973  errmsg("could not access status of transaction %u", xid),
974  errdetail("Could not open file \"%s\": %m.", path)));
975  break;
976  case SLRU_SEEK_FAILED:
977  ereport(ERROR,
979  errmsg("could not access status of transaction %u", xid),
980  errdetail("Could not seek in file \"%s\" to offset %d: %m.",
981  path, offset)));
982  break;
983  case SLRU_READ_FAILED:
984  if (errno)
985  ereport(ERROR,
987  errmsg("could not access status of transaction %u", xid),
988  errdetail("Could not read from file \"%s\" at offset %d: %m.",
989  path, offset)));
990  else
991  ereport(ERROR,
992  (errmsg("could not access status of transaction %u", xid),
993  errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
994  break;
995  case SLRU_WRITE_FAILED:
996  if (errno)
997  ereport(ERROR,
999  errmsg("could not access status of transaction %u", xid),
1000  errdetail("Could not write to file \"%s\" at offset %d: %m.",
1001  path, offset)));
1002  else
1003  ereport(ERROR,
1004  (errmsg("could not access status of transaction %u", xid),
1005  errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1006  path, offset)));
1007  break;
1008  case SLRU_FSYNC_FAILED:
1011  errmsg("could not access status of transaction %u", xid),
1012  errdetail("Could not fsync file \"%s\": %m.",
1013  path)));
1014  break;
1015  case SLRU_CLOSE_FAILED:
1016  ereport(ERROR,
1018  errmsg("could not access status of transaction %u", xid),
1019  errdetail("Could not close file \"%s\": %m.",
1020  path)));
1021  break;
1022  default:
1023  /* can't get here, we trust */
1024  elog(ERROR, "unrecognized SimpleLru error cause: %d",
1025  (int) slru_errcause);
1026  break;
1027  }
1028 }
int errcode_for_file_access(void)
Definition: elog.c:883
int errdetail(const char *fmt,...)
Definition: elog.c:1208
#define ERROR
Definition: elog.h:39
int data_sync_elevel(int elevel)
Definition: fd.c:3936

References data_sync_elevel(), elog(), ereport, errcode_for_file_access(), errdetail(), errmsg(), ERROR, MAXPGPATH, SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName().

Referenced by SimpleLruDoesPhysicalPageExist(), SimpleLruReadPage(), SimpleLruWriteAll(), and SlruInternalWritePage().

◆ SlruScanDirCbDeleteAll()

bool SlruScanDirCbDeleteAll ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)

Definition at line 1566 of file slru.c.

1567 {
1569 
1570  return false; /* keep going */
1571 }

References SLRU_PAGES_PER_SEGMENT, and SlruInternalDeleteSegment().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), and test_slru_scan_cb().

◆ SlruScanDirCbDeleteCutoff()

static bool SlruScanDirCbDeleteCutoff ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)
static

Definition at line 1550 of file slru.c.

1552 {
1553  int64 cutoffPage = *(int64 *) data;
1554 
1555  if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1557 
1558  return false; /* keep going */
1559 }
const void * data
static bool SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage)
Definition: slru.c:1425

References data, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruMayDeleteSegment().

Referenced by SimpleLruTruncate().

◆ SlruScanDirCbReportPresence()

bool SlruScanDirCbReportPresence ( SlruCtl  ctl,
char *  filename,
int64  segpage,
void *  data 
)

Definition at line 1534 of file slru.c.

1536 {
1537  int64 cutoffPage = *(int64 *) data;
1538 
1539  if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1540  return true; /* found one; don't iterate any more */
1541 
1542  return false; /* keep going */
1543 }

References data, and SlruMayDeleteSegment().

Referenced by TruncateCLOG(), and TruncateCommitTs().

◆ SlruScanDirectory()

bool SlruScanDirectory ( SlruCtl  ctl,
SlruScanCallback  callback,
void *  data 
)

Definition at line 1613 of file slru.c.

1614 {
1615  bool retval = false;
1616  DIR *cldir;
1617  struct dirent *clde;
1618  int64 segno;
1619  int64 segpage;
1620 
1621  cldir = AllocateDir(ctl->Dir);
1622  while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1623  {
1624  size_t len;
1625 
1626  len = strlen(clde->d_name);
1627 
1629  strspn(clde->d_name, "0123456789ABCDEF") == len)
1630  {
1631  segno = strtoi64(clde->d_name, NULL, 16);
1632  segpage = segno * SLRU_PAGES_PER_SEGMENT;
1633 
1634  elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1635  ctl->Dir, clde->d_name);
1636  retval = callback(ctl, clde->d_name, segpage, data);
1637  if (retval)
1638  break;
1639  }
1640  }
1641  FreeDir(cldir);
1642 
1643  return retval;
1644 }
#define strtoi64(str, endptr, base)
Definition: c.h:1286
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int FreeDir(DIR *dir)
Definition: fd.c:2961
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
static bool SlruCorrectSegmentFilenameLength(SlruCtl ctl, size_t len)
Definition: slru.c:1580
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References AllocateDir(), callback(), dirent::d_name, data, DEBUG2, SlruCtlData::Dir, elog(), FreeDir(), len, ReadDir(), SLRU_PAGES_PER_SEGMENT, SlruCorrectSegmentFilenameLength(), and strtoi64.

Referenced by AsyncShmemInit(), DeactivateCommitTs(), SimpleLruTruncate(), test_slru_delete_all(), TruncateCLOG(), TruncateCommitTs(), and TruncateMultiXact().

◆ SlruSelectLRUPage()

static int SlruSelectLRUPage ( SlruCtl  ctl,
int64  pageno 
)
static

Definition at line 1044 of file slru.c.

1045 {
1046  SlruShared shared = ctl->shared;
1047 
1048  /* Outer loop handles restart after I/O */
1049  for (;;)
1050  {
1051  int cur_count;
1052  int bestvalidslot = 0; /* keep compiler quiet */
1053  int best_valid_delta = -1;
1054  int64 best_valid_page_number = 0; /* keep compiler quiet */
1055  int bestinvalidslot = 0; /* keep compiler quiet */
1056  int best_invalid_delta = -1;
1057  int64 best_invalid_page_number = 0; /* keep compiler quiet */
1058 
1059  /* See if page already has a buffer assigned */
1060  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1061  {
1062  if (shared->page_number[slotno] == pageno &&
1063  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
1064  return slotno;
1065  }
1066 
1067  /*
1068  * If we find any EMPTY slot, just select that one. Else choose a
1069  * victim page to replace. We normally take the least recently used
1070  * valid page, but we will never take the slot containing
1071  * latest_page_number, even if it appears least recently used. We
1072  * will select a slot that is already I/O busy only if there is no
1073  * other choice: a read-busy slot will not be least recently used once
1074  * the read finishes, and waiting for an I/O on a write-busy slot is
1075  * inferior to just picking some other slot. Testing shows the slot
1076  * we pick instead will often be clean, allowing us to begin a read at
1077  * once.
1078  *
1079  * Normally the page_lru_count values will all be different and so
1080  * there will be a well-defined LRU page. But since we allow
1081  * concurrent execution of SlruRecentlyUsed() within
1082  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1083  * acquire the same lru_count values. In that case we break ties by
1084  * choosing the furthest-back page.
1085  *
1086  * Notice that this next line forcibly advances cur_lru_count to a
1087  * value that is certainly beyond any value that will be in the
1088  * page_lru_count array after the loop finishes. This ensures that
1089  * the next execution of SlruRecentlyUsed will mark the page newly
1090  * used, even if it's for a page that has the current counter value.
1091  * That gets us back on the path to having good data when there are
1092  * multiple pages with the same lru_count.
1093  */
1094  cur_count = (shared->cur_lru_count)++;
1095  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1096  {
1097  int this_delta;
1098  int64 this_page_number;
1099 
1100  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1101  return slotno;
1102  this_delta = cur_count - shared->page_lru_count[slotno];
1103  if (this_delta < 0)
1104  {
1105  /*
1106  * Clean up in case shared updates have caused cur_count
1107  * increments to get "lost". We back off the page counts,
1108  * rather than trying to increase cur_count, to avoid any
1109  * question of infinite loops or failure in the presence of
1110  * wrapped-around counts.
1111  */
1112  shared->page_lru_count[slotno] = cur_count;
1113  this_delta = 0;
1114  }
1115 
1116  /*
1117  * If this page is the one most recently zeroed, don't consider it
1118  * an eviction candidate. See comments in SimpleLruZeroPage for an
1119  * explanation about the lack of a memory barrier here.
1120  */
1121  this_page_number = shared->page_number[slotno];
1122  if (this_page_number ==
1124  continue;
1125 
1126  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1127  {
1128  if (this_delta > best_valid_delta ||
1129  (this_delta == best_valid_delta &&
1130  ctl->PagePrecedes(this_page_number,
1131  best_valid_page_number)))
1132  {
1133  bestvalidslot = slotno;
1134  best_valid_delta = this_delta;
1135  best_valid_page_number = this_page_number;
1136  }
1137  }
1138  else
1139  {
1140  if (this_delta > best_invalid_delta ||
1141  (this_delta == best_invalid_delta &&
1142  ctl->PagePrecedes(this_page_number,
1143  best_invalid_page_number)))
1144  {
1145  bestinvalidslot = slotno;
1146  best_invalid_delta = this_delta;
1147  best_invalid_page_number = this_page_number;
1148  }
1149  }
1150  }
1151 
1152  /*
1153  * If all pages (except possibly the latest one) are I/O busy, we'll
1154  * have to wait for an I/O to complete and then retry. In that
1155  * unhappy case, we choose to wait for the I/O on the least recently
1156  * used slot, on the assumption that it was likely initiated first of
1157  * all the I/Os in progress and may therefore finish first.
1158  */
1159  if (best_valid_delta < 0)
1160  {
1161  SimpleLruWaitIO(ctl, bestinvalidslot);
1162  continue;
1163  }
1164 
1165  /*
1166  * If the selected page is clean, we're set.
1167  */
1168  if (!shared->page_dirty[bestvalidslot])
1169  return bestvalidslot;
1170 
1171  /*
1172  * Write the page.
1173  */
1174  SlruInternalWritePage(ctl, bestvalidslot, NULL);
1175 
1176  /*
1177  * Now loop back and try again. This is the easiest way of dealing
1178  * with corner cases such as the victim page being re-dirtied while we
1179  * wrote it.
1180  */
1181  }
1182 }

References SlruSharedData::cur_lru_count, SlruSharedData::latest_page_number, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, pg_atomic_read_u64(), SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SlruSyncFileTag()

int SlruSyncFileTag ( SlruCtl  ctl,
const FileTag ftag,
char *  path 
)

Definition at line 1653 of file slru.c.

1654 {
1655  int fd;
1656  int save_errno;
1657  int result;
1658 
1659  SlruFileName(ctl, path, ftag->segno);
1660 
1661  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
1662  if (fd < 0)
1663  return -1;
1664 
1665  pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
1666  result = pg_fsync(fd);
1668  save_errno = errno;
1669 
1671 
1672  errno = save_errno;
1673  return result;
1674 }
uint64 segno
Definition: sync.h:55

References CloseTransientFile(), fd(), OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), FileTag::segno, and SlruFileName().

Referenced by clogsyncfiletag(), committssyncfiletag(), multixactmemberssyncfiletag(), multixactoffsetssyncfiletag(), and test_slru_page_sync().

Variable Documentation

◆ slru_errcause

◆ slru_errno