PostgreSQL Source Code  git master
slru.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruWriteAllData
 

Macros

#define SlruFileName(ctl, path, seg)    snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
 
#define MAX_WRITEALL_BUFFERS   16
 
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
 
#define SlruRecentlyUsed(shared, slotno)
 

Typedefs

typedef struct SlruWriteAllData SlruWriteAllData
 
typedef struct SlruWriteAllDataSlruWriteAll
 

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED , SLRU_SEEK_FAILED , SLRU_READ_FAILED , SLRU_WRITE_FAILED ,
  SLRU_FSYNC_FAILED , SLRU_CLOSE_FAILED
}
 

Functions

static void SimpleLruZeroLSNs (SlruCtl ctl, int slotno)
 
static void SimpleLruWaitIO (SlruCtl ctl, int slotno)
 
static void SlruInternalWritePage (SlruCtl ctl, int slotno, SlruWriteAll fdata)
 
static bool SlruPhysicalReadPage (SlruCtl ctl, int pageno, int slotno)
 
static bool SlruPhysicalWritePage (SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
 
static void SlruReportIOError (SlruCtl ctl, int pageno, TransactionId xid)
 
static int SlruSelectLRUPage (SlruCtl ctl, int pageno)
 
static bool SlruScanDirCbDeleteCutoff (SlruCtl ctl, char *filename, int segpage, void *data)
 
static void SlruInternalDeleteSegment (SlruCtl ctl, int segno)
 
Size SimpleLruShmemSize (int nslots, int nlsns)
 
void SimpleLruInit (SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id, SyncRequestHandler sync_handler)
 
int SimpleLruZeroPage (SlruCtl ctl, int pageno)
 
int SimpleLruReadPage (SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
 
int SimpleLruReadPage_ReadOnly (SlruCtl ctl, int pageno, TransactionId xid)
 
void SimpleLruWritePage (SlruCtl ctl, int slotno)
 
bool SimpleLruDoesPhysicalPageExist (SlruCtl ctl, int pageno)
 
void SimpleLruWriteAll (SlruCtl ctl, bool allow_redirtied)
 
void SimpleLruTruncate (SlruCtl ctl, int cutoffPage)
 
void SlruDeleteSegment (SlruCtl ctl, int segno)
 
static bool SlruMayDeleteSegment (SlruCtl ctl, int segpage, int cutoffPage)
 
bool SlruScanDirCbReportPresence (SlruCtl ctl, char *filename, int segpage, void *data)
 
bool SlruScanDirCbDeleteAll (SlruCtl ctl, char *filename, int segpage, void *data)
 
bool SlruScanDirectory (SlruCtl ctl, SlruScanCallback callback, void *data)
 
int SlruSyncFileTag (SlruCtl ctl, const FileTag *ftag, char *path)
 

Variables

static SlruErrorCause slru_errcause
 
static int slru_errno
 

Macro Definition Documentation

◆ INIT_SLRUFILETAG

#define INIT_SLRUFILETAG (   a,
  xx_handler,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = (xx_handler), \
(a).segno = (xx_segno) \
)
int a
Definition: isn.c:69
Definition: sync.h:51

Definition at line 88 of file slru.c.

◆ MAX_WRITEALL_BUFFERS

#define MAX_WRITEALL_BUFFERS   16

Definition at line 72 of file slru.c.

◆ SlruFileName

#define SlruFileName (   ctl,
  path,
  seg 
)     snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)

Definition at line 63 of file slru.c.

◆ SlruRecentlyUsed

#define SlruRecentlyUsed (   shared,
  slotno 
)
Value:
do { \
int new_lru_count = (shared)->cur_lru_count; \
if (new_lru_count != (shared)->page_lru_count[slotno]) { \
(shared)->cur_lru_count = ++new_lru_count; \
(shared)->page_lru_count[slotno] = new_lru_count; \
} \
} while (0)

Definition at line 114 of file slru.c.

Typedef Documentation

◆ SlruWriteAll

typedef struct SlruWriteAllData* SlruWriteAll

Definition at line 81 of file slru.c.

◆ SlruWriteAllData

Enumeration Type Documentation

◆ SlruErrorCause

Enumerator
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 124 of file slru.c.

125 {
SlruErrorCause
Definition: slru.c:125
@ SLRU_WRITE_FAILED
Definition: slru.c:129
@ SLRU_FSYNC_FAILED
Definition: slru.c:130
@ SLRU_SEEK_FAILED
Definition: slru.c:127
@ SLRU_OPEN_FAILED
Definition: slru.c:126
@ SLRU_CLOSE_FAILED
Definition: slru.c:131
@ SLRU_READ_FAILED
Definition: slru.c:128

Function Documentation

◆ SimpleLruDoesPhysicalPageExist()

bool SimpleLruDoesPhysicalPageExist ( SlruCtl  ctl,
int  pageno 
)

Definition at line 627 of file slru.c.

628 {
629  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
630  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
631  int offset = rpageno * BLCKSZ;
632  char path[MAXPGPATH];
633  int fd;
634  bool result;
635  off_t endpos;
636 
637  /* update the stats counter of checked pages */
639 
640  SlruFileName(ctl, path, segno);
641 
642  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
643  if (fd < 0)
644  {
645  /* expected: file doesn't exist */
646  if (errno == ENOENT)
647  return false;
648 
649  /* report error normally */
651  slru_errno = errno;
652  SlruReportIOError(ctl, pageno, 0);
653  }
654 
655  if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
656  {
658  slru_errno = errno;
659  SlruReportIOError(ctl, pageno, 0);
660  }
661 
662  result = endpos >= (off_t) (offset + BLCKSZ);
663 
664  if (CloseTransientFile(fd) != 0)
665  {
667  slru_errno = errno;
668  return false;
669  }
670 
671  return result;
672 }
#define PG_BINARY
Definition: c.h:1283
int CloseTransientFile(int fd)
Definition: fd.c:2754
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2578
#define MAXPGPATH
static XLogRecPtr endpos
Definition: pg_receivewal.c:56
void pgstat_count_slru_page_exists(int slru_idx)
Definition: pgstat_slru.c:71
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
static SlruErrorCause slru_errcause
Definition: slru.c:134
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:932
static int slru_errno
Definition: slru.c:135
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:34
SlruShared shared
Definition: slru.h:112
int slru_stats_idx
Definition: slru.h:101

References CloseTransientFile(), endpos, fd(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pgstat_count_slru_page_exists(), SlruWriteAllData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_SEEK_FAILED, SlruSharedData::slru_stats_idx, SlruFileName, and SlruReportIOError().

Referenced by ActivateCommitTs(), find_multixact_start(), MaybeExtendOffsetSlru(), and test_slru_page_exists().

◆ SimpleLruInit()

void SimpleLruInit ( SlruCtl  ctl,
const char *  name,
int  nslots,
int  nlsns,
LWLock ctllock,
const char *  subdir,
int  tranche_id,
SyncRequestHandler  sync_handler 
)

Definition at line 188 of file slru.c.

191 {
192  SlruShared shared;
193  bool found;
194 
195  shared = (SlruShared) ShmemInitStruct(name,
196  SimpleLruShmemSize(nslots, nlsns),
197  &found);
198 
199  if (!IsUnderPostmaster)
200  {
201  /* Initialize locks and shared memory area */
202  char *ptr;
203  Size offset;
204  int slotno;
205 
206  Assert(!found);
207 
208  memset(shared, 0, sizeof(SlruSharedData));
209 
210  shared->ControlLock = ctllock;
211 
212  shared->num_slots = nslots;
213  shared->lsn_groups_per_page = nlsns;
214 
215  shared->cur_lru_count = 0;
216 
217  /* shared->latest_page_number will be set later */
218 
220 
221  ptr = (char *) shared;
222  offset = MAXALIGN(sizeof(SlruSharedData));
223  shared->page_buffer = (char **) (ptr + offset);
224  offset += MAXALIGN(nslots * sizeof(char *));
225  shared->page_status = (SlruPageStatus *) (ptr + offset);
226  offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
227  shared->page_dirty = (bool *) (ptr + offset);
228  offset += MAXALIGN(nslots * sizeof(bool));
229  shared->page_number = (int *) (ptr + offset);
230  offset += MAXALIGN(nslots * sizeof(int));
231  shared->page_lru_count = (int *) (ptr + offset);
232  offset += MAXALIGN(nslots * sizeof(int));
233 
234  /* Initialize LWLocks */
235  shared->buffer_locks = (LWLockPadded *) (ptr + offset);
236  offset += MAXALIGN(nslots * sizeof(LWLockPadded));
237 
238  if (nlsns > 0)
239  {
240  shared->group_lsn = (XLogRecPtr *) (ptr + offset);
241  offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
242  }
243 
244  ptr += BUFFERALIGN(offset);
245  for (slotno = 0; slotno < nslots; slotno++)
246  {
247  LWLockInitialize(&shared->buffer_locks[slotno].lock,
248  tranche_id);
249 
250  shared->page_buffer[slotno] = ptr;
251  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
252  shared->page_dirty[slotno] = false;
253  shared->page_lru_count[slotno] = 0;
254  ptr += BLCKSZ;
255  }
256 
257  /* Should fit to estimated shmem size */
258  Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
259  }
260  else
261  Assert(found);
262 
263  /*
264  * Initialize the unshared control struct, including directory path. We
265  * assume caller set PagePrecedes.
266  */
267  ctl->shared = shared;
268  ctl->sync_handler = sync_handler;
269  strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
270 }
#define MAXALIGN(LEN)
Definition: c.h:800
#define BUFFERALIGN(LEN)
Definition: c.h:802
size_t Size
Definition: c.h:594
bool IsUnderPostmaster
Definition: globals.c:113
Assert(fmt[strlen(fmt) - 1] !='\n')
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:730
int pgstat_get_slru_index(const char *name)
Definition: pgstat_slru.c:132
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:396
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:156
SlruSharedData * SlruShared
Definition: slru.h:104
SlruPageStatus
Definition: slru.h:43
@ SLRU_PAGE_EMPTY
Definition: slru.h:44
SyncRequestHandler sync_handler
Definition: slru.h:118
char Dir[64]
Definition: slru.h:136
int num_slots
Definition: slru.h:58
int * page_lru_count
Definition: slru.h:68
XLogRecPtr * group_lsn
Definition: slru.h:79
int cur_lru_count
Definition: slru.h:91
int * page_number
Definition: slru.h:67
int lsn_groups_per_page
Definition: slru.h:80
SlruPageStatus * page_status
Definition: slru.h:65
LWLock * ControlLock
Definition: slru.h:55
bool * page_dirty
Definition: slru.h:66
LWLockPadded * buffer_locks
Definition: slru.h:69
char ** page_buffer
Definition: slru.h:64
LWLock lock
Definition: lwlock.h:69
const char * name
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), SlruSharedData::buffer_locks, BUFFERALIGN, SlruSharedData::ControlLock, SlruSharedData::cur_lru_count, SlruCtlData::Dir, SlruSharedData::group_lsn, IsUnderPostmaster, LWLockPadded::lock, SlruSharedData::lsn_groups_per_page, LWLockInitialize(), MAXALIGN, name, SlruSharedData::num_slots, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_get_slru_index(), SlruCtlData::shared, ShmemInitStruct(), SimpleLruShmemSize(), SLRU_PAGE_EMPTY, SlruSharedData::slru_stats_idx, strlcpy(), and SlruCtlData::sync_handler.

Referenced by AsyncShmemInit(), CLOGShmemInit(), CommitTsShmemInit(), MultiXactShmemInit(), SerialInit(), SUBTRANSShmemInit(), and test_slru_shmem_startup().

◆ SimpleLruReadPage()

int SimpleLruReadPage ( SlruCtl  ctl,
int  pageno,
bool  write_ok,
TransactionId  xid 
)

Definition at line 396 of file slru.c.

398 {
399  SlruShared shared = ctl->shared;
400 
401  /* Outer loop handles restart if we must wait for someone else's I/O */
402  for (;;)
403  {
404  int slotno;
405  bool ok;
406 
407  /* See if page already is in memory; if not, pick victim slot */
408  slotno = SlruSelectLRUPage(ctl, pageno);
409 
410  /* Did we find the page in memory? */
411  if (shared->page_number[slotno] == pageno &&
412  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
413  {
414  /*
415  * If page is still being read in, we must wait for I/O. Likewise
416  * if the page is being written and the caller said that's not OK.
417  */
418  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
419  (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
420  !write_ok))
421  {
422  SimpleLruWaitIO(ctl, slotno);
423  /* Now we must recheck state from the top */
424  continue;
425  }
426  /* Otherwise, it's ready to use */
427  SlruRecentlyUsed(shared, slotno);
428 
429  /* update the stats counter of pages found in the SLRU */
431 
432  return slotno;
433  }
434 
435  /* We found no match; assert we selected a freeable slot */
436  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
437  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
438  !shared->page_dirty[slotno]));
439 
440  /* Mark the slot read-busy */
441  shared->page_number[slotno] = pageno;
442  shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
443  shared->page_dirty[slotno] = false;
444 
445  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
446  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
447 
448  /* Release control lock while doing I/O */
449  LWLockRelease(shared->ControlLock);
450 
451  /* Do the read */
452  ok = SlruPhysicalReadPage(ctl, pageno, slotno);
453 
454  /* Set the LSNs for this newly read-in page to zero */
455  SimpleLruZeroLSNs(ctl, slotno);
456 
457  /* Re-acquire control lock and update page state */
459 
460  Assert(shared->page_number[slotno] == pageno &&
461  shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
462  !shared->page_dirty[slotno]);
463 
464  shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
465 
466  LWLockRelease(&shared->buffer_locks[slotno].lock);
467 
468  /* Now it's okay to ereport if we failed */
469  if (!ok)
470  SlruReportIOError(ctl, pageno, xid);
471 
472  SlruRecentlyUsed(shared, slotno);
473 
474  /* update the stats counter of pages not found in SLRU */
476 
477  return slotno;
478  }
479 }
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1195
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1808
@ LW_EXCLUSIVE
Definition: lwlock.h:116
void pgstat_count_slru_page_read(int slru_idx)
Definition: pgstat_slru.c:77
void pgstat_count_slru_page_hit(int slru_idx)
Definition: pgstat_slru.c:65
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
Definition: slru.c:685
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:114
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:325
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:342
static int SlruSelectLRUPage(SlruCtl ctl, int pageno)
Definition: slru.c:1017
@ SLRU_PAGE_VALID
Definition: slru.h:46
@ SLRU_PAGE_WRITE_IN_PROGRESS
Definition: slru.h:47
@ SLRU_PAGE_READ_IN_PROGRESS
Definition: slru.h:45

References Assert(), SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_hit(), pgstat_count_slru_page_read(), SlruCtlData::shared, SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruSharedData::slru_stats_idx, SlruPhysicalReadPage(), SlruRecentlyUsed, SlruReportIOError(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), GetMultiXactIdMembers(), RecordNewMultiXact(), SerialAdd(), SetXidCommitTsInPage(), SimpleLruReadPage_ReadOnly(), SubTransSetParent(), test_slru_page_read(), TransactionIdSetPageStatusInternal(), TrimCLOG(), and TrimMultiXact().

◆ SimpleLruReadPage_ReadOnly()

int SimpleLruReadPage_ReadOnly ( SlruCtl  ctl,
int  pageno,
TransactionId  xid 
)

Definition at line 496 of file slru.c.

497 {
498  SlruShared shared = ctl->shared;
499  int slotno;
500 
501  /* Try to find the page while holding only shared lock */
503 
504  /* See if page is already in a buffer */
505  for (slotno = 0; slotno < shared->num_slots; slotno++)
506  {
507  if (shared->page_number[slotno] == pageno &&
508  shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
509  shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
510  {
511  /* See comments for SlruRecentlyUsed macro */
512  SlruRecentlyUsed(shared, slotno);
513 
514  /* update the stats counter of pages found in the SLRU */
516 
517  return slotno;
518  }
519  }
520 
521  /* No luck, so switch to normal exclusive lock and do regular read */
522  LWLockRelease(shared->ControlLock);
524 
525  return SimpleLruReadPage(ctl, pageno, true, xid);
526 }
@ LW_SHARED
Definition: lwlock.h:117
int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
Definition: slru.c:396

References SlruSharedData::ControlLock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_hit(), SlruCtlData::shared, SimpleLruReadPage(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SlruSharedData::slru_stats_idx, and SlruRecentlyUsed.

Referenced by asyncQueueReadAllNotifications(), find_multixact_start(), SerialGetMinConflictCommitSeqNo(), SubTransGetParent(), test_slru_page_readonly(), TransactionIdGetCommitTsData(), and TransactionIdGetStatus().

◆ SimpleLruShmemSize()

Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)

Definition at line 156 of file slru.c.

157 {
158  Size sz;
159 
160  /* we assume nslots isn't so large as to risk overflow */
161  sz = MAXALIGN(sizeof(SlruSharedData));
162  sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
163  sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
164  sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
165  sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
166  sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
167  sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
168 
169  if (nlsns > 0)
170  sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
171 
172  return BUFFERALIGN(sz) + BLCKSZ * nslots;
173 }

References BUFFERALIGN, and MAXALIGN.

Referenced by AsyncShmemSize(), CLOGShmemSize(), CommitTsShmemSize(), MultiXactShmemSize(), PredicateLockShmemSize(), SimpleLruInit(), SUBTRANSShmemSize(), and test_slru_shmem_request().

◆ SimpleLruTruncate()

void SimpleLruTruncate ( SlruCtl  ctl,
int  cutoffPage 
)

Definition at line 1227 of file slru.c.

1228 {
1229  SlruShared shared = ctl->shared;
1230  int slotno;
1231 
1232  /* update the stats counter of truncates */
1234 
1235  /*
1236  * Scan shared memory and remove any pages preceding the cutoff page, to
1237  * ensure we won't rewrite them later. (Since this is normally called in
1238  * or just after a checkpoint, any dirty pages should have been flushed
1239  * already ... we're just being extra careful here.)
1240  */
1242 
1243 restart:
1244 
1245  /*
1246  * While we are holding the lock, make an important safety check: the
1247  * current endpoint page must not be eligible for removal.
1248  */
1249  if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
1250  {
1251  LWLockRelease(shared->ControlLock);
1252  ereport(LOG,
1253  (errmsg("could not truncate directory \"%s\": apparent wraparound",
1254  ctl->Dir)));
1255  return;
1256  }
1257 
1258  for (slotno = 0; slotno < shared->num_slots; slotno++)
1259  {
1260  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1261  continue;
1262  if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1263  continue;
1264 
1265  /*
1266  * If page is clean, just change state to EMPTY (expected case).
1267  */
1268  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1269  !shared->page_dirty[slotno])
1270  {
1271  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1272  continue;
1273  }
1274 
1275  /*
1276  * Hmm, we have (or may have) I/O operations acting on the page, so
1277  * we've got to wait for them to finish and then start again. This is
1278  * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1279  * wouldn't it be OK to just discard it without writing it?
1280  * SlruMayDeleteSegment() uses a stricter qualification, so we might
1281  * not delete this page in the end; even if we don't delete it, we
1282  * won't have cause to read its data again. For now, keep the logic
1283  * the same as it was.)
1284  */
1285  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1286  SlruInternalWritePage(ctl, slotno, NULL);
1287  else
1288  SimpleLruWaitIO(ctl, slotno);
1289  goto restart;
1290  }
1291 
1292  LWLockRelease(shared->ControlLock);
1293 
1294  /* Now we can remove the old segment(s) */
1295  (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1296 }
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define LOG
Definition: elog.h:31
#define ereport(elevel,...)
Definition: elog.h:149
void pgstat_count_slru_truncate(int slru_idx)
Definition: pgstat_slru.c:95
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
Definition: slru.c:540
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1554
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1516
bool(* PagePrecedes)(int, int)
Definition: slru.h:130
int latest_page_number
Definition: slru.h:98

References SlruSharedData::ControlLock, SlruCtlData::Dir, ereport, errmsg(), SlruSharedData::latest_page_number, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, pgstat_count_slru_truncate(), SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), commit_ts_redo(), PerformOffsetsTruncation(), test_slru_page_truncate(), TruncateCLOG(), TruncateCommitTs(), and TruncateSUBTRANS().

◆ SimpleLruWaitIO()

static void SimpleLruWaitIO ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 342 of file slru.c.

343 {
344  SlruShared shared = ctl->shared;
345 
346  /* See notes at top of file */
347  LWLockRelease(shared->ControlLock);
348  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
349  LWLockRelease(&shared->buffer_locks[slotno].lock);
351 
352  /*
353  * If the slot is still in an io-in-progress state, then either someone
354  * already started a new I/O on the slot, or a previous I/O failed and
355  * neglected to reset the page state. That shouldn't happen, really, but
356  * it seems worth a few extra cycles to check and recover from it. We can
357  * cheaply test for failure by seeing if the buffer lock is still held (we
358  * assume that transaction abort would release the lock).
359  */
360  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
361  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
362  {
363  if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
364  {
365  /* indeed, the I/O must have failed */
366  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
367  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
368  else /* write_in_progress */
369  {
370  shared->page_status[slotno] = SLRU_PAGE_VALID;
371  shared->page_dirty[slotno] = true;
372  }
373  LWLockRelease(&shared->buffer_locks[slotno].lock);
374  }
375  }
376 }
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1366

References SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlruCtlData::shared, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruDeleteSegment(), SlruInternalWritePage(), and SlruSelectLRUPage().

◆ SimpleLruWriteAll()

void SimpleLruWriteAll ( SlruCtl  ctl,
bool  allow_redirtied 
)

Definition at line 1157 of file slru.c.

1158 {
1159  SlruShared shared = ctl->shared;
1160  SlruWriteAllData fdata;
1161  int slotno;
1162  int pageno = 0;
1163  int i;
1164  bool ok;
1165 
1166  /* update the stats counter of flushes */
1168 
1169  /*
1170  * Find and write dirty pages
1171  */
1172  fdata.num_files = 0;
1173 
1175 
1176  for (slotno = 0; slotno < shared->num_slots; slotno++)
1177  {
1178  SlruInternalWritePage(ctl, slotno, &fdata);
1179 
1180  /*
1181  * In some places (e.g. checkpoints), we cannot assert that the slot
1182  * is clean now, since another process might have re-dirtied it
1183  * already. That's okay.
1184  */
1185  Assert(allow_redirtied ||
1186  shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1187  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1188  !shared->page_dirty[slotno]));
1189  }
1190 
1191  LWLockRelease(shared->ControlLock);
1192 
1193  /*
1194  * Now close any files that were open
1195  */
1196  ok = true;
1197  for (i = 0; i < fdata.num_files; i++)
1198  {
1199  if (CloseTransientFile(fdata.fd[i]) != 0)
1200  {
1202  slru_errno = errno;
1203  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1204  ok = false;
1205  }
1206  }
1207  if (!ok)
1209 
1210  /* Ensure that directory entries for new files are on disk. */
1211  if (ctl->sync_handler != SYNC_HANDLER_NONE)
1212  fsync_fname(ctl->Dir, true);
1213 }
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:708
int i
Definition: isn.c:73
void pgstat_count_slru_flush(int slru_idx)
Definition: pgstat_slru.c:89
int num_files
Definition: slru.c:76
int fd[MAX_WRITEALL_BUFFERS]
Definition: slru.c:77
int segno[MAX_WRITEALL_BUFFERS]
Definition: slru.c:78
@ SYNC_HANDLER_NONE
Definition: sync.h:42
#define InvalidTransactionId
Definition: transam.h:31

References Assert(), CloseTransientFile(), SlruSharedData::ControlLock, SlruCtlData::Dir, SlruWriteAllData::fd, fsync_fname(), i, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruWriteAllData::num_files, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pgstat_count_slru_flush(), SlruWriteAllData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruReportIOError(), SlruCtlData::sync_handler, and SYNC_HANDLER_NONE.

Referenced by CheckPointCLOG(), CheckPointCommitTs(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), find_multixact_start(), and test_slru_page_writeall().

◆ SimpleLruWritePage()

void SimpleLruWritePage ( SlruCtl  ctl,
int  slotno 
)

◆ SimpleLruZeroLSNs()

static void SimpleLruZeroLSNs ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 325 of file slru.c.

326 {
327  SlruShared shared = ctl->shared;
328 
329  if (shared->lsn_groups_per_page > 0)
330  MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
331  shared->lsn_groups_per_page * sizeof(XLogRecPtr));
332 }
#define MemSet(start, val, len)
Definition: c.h:1009

References SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, MemSet, and SlruCtlData::shared.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SimpleLruZeroPage()

int SimpleLruZeroPage ( SlruCtl  ctl,
int  pageno 
)

Definition at line 281 of file slru.c.

282 {
283  SlruShared shared = ctl->shared;
284  int slotno;
285 
286  /* Find a suitable buffer slot for the page */
287  slotno = SlruSelectLRUPage(ctl, pageno);
288  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
289  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
290  !shared->page_dirty[slotno]) ||
291  shared->page_number[slotno] == pageno);
292 
293  /* Mark the slot as containing this page */
294  shared->page_number[slotno] = pageno;
295  shared->page_status[slotno] = SLRU_PAGE_VALID;
296  shared->page_dirty[slotno] = true;
297  SlruRecentlyUsed(shared, slotno);
298 
299  /* Set the buffer to zeroes */
300  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
301 
302  /* Set the LSNs for this new page to zero */
303  SimpleLruZeroLSNs(ctl, slotno);
304 
305  /* Assume this page is now the latest active page */
306  shared->latest_page_number = pageno;
307 
308  /* update the stats counter of zeroed pages */
310 
311  return slotno;
312 }
void pgstat_count_slru_page_zeroed(int slru_idx)
Definition: pgstat_slru.c:59

References Assert(), SlruSharedData::latest_page_number, MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_page_zeroed(), SlruCtlData::shared, SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruRecentlyUsed, and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), SerialAdd(), test_slru_page_write(), ZeroCLOGPage(), ZeroCommitTsPage(), ZeroMultiXactMemberPage(), ZeroMultiXactOffsetPage(), and ZeroSUBTRANSPage().

◆ SlruDeleteSegment()

void SlruDeleteSegment ( SlruCtl  ctl,
int  segno 
)

Definition at line 1328 of file slru.c.

1329 {
1330  SlruShared shared = ctl->shared;
1331  int slotno;
1332  bool did_write;
1333 
1334  /* Clean out any possibly existing references to the segment. */
1336 restart:
1337  did_write = false;
1338  for (slotno = 0; slotno < shared->num_slots; slotno++)
1339  {
1340  int pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1341 
1342  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1343  continue;
1344 
1345  /* not the segment we're looking for */
1346  if (pagesegno != segno)
1347  continue;
1348 
1349  /* If page is clean, just change state to EMPTY (expected case). */
1350  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1351  !shared->page_dirty[slotno])
1352  {
1353  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1354  continue;
1355  }
1356 
1357  /* Same logic as SimpleLruTruncate() */
1358  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1359  SlruInternalWritePage(ctl, slotno, NULL);
1360  else
1361  SimpleLruWaitIO(ctl, slotno);
1362 
1363  did_write = true;
1364  }
1365 
1366  /*
1367  * Be extra careful and re-check. The IO functions release the control
1368  * lock, so new pages could have been read in.
1369  */
1370  if (did_write)
1371  goto restart;
1372 
1373  SlruInternalDeleteSegment(ctl, segno);
1374 
1375  LWLockRelease(shared->ControlLock);
1376 }
static void SlruInternalDeleteSegment(SlruCtl ctl, int segno)
Definition: slru.c:1305

References SlruSharedData::ControlLock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruWriteAllData::segno, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruInternalWritePage().

Referenced by PerformMembersTruncation(), and test_slru_page_delete().

◆ SlruInternalDeleteSegment()

static void SlruInternalDeleteSegment ( SlruCtl  ctl,
int  segno 
)
static

Definition at line 1305 of file slru.c.

1306 {
1307  char path[MAXPGPATH];
1308 
1309  /* Forget any fsync requests queued for this segment. */
1310  if (ctl->sync_handler != SYNC_HANDLER_NONE)
1311  {
1312  FileTag tag;
1313 
1314  INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1316  }
1317 
1318  /* Unlink the file. */
1319  SlruFileName(ctl, path, segno);
1320  ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1321  unlink(path);
1322 }
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
#define DEBUG2
Definition: elog.h:29
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
Definition: slru.c:88
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:585
@ SYNC_FORGET_REQUEST
Definition: sync.h:27

References DEBUG2, ereport, errmsg_internal(), INIT_SLRUFILETAG, MAXPGPATH, RegisterSyncRequest(), SlruWriteAllData::segno, SlruFileName, SYNC_FORGET_REQUEST, SlruCtlData::sync_handler, and SYNC_HANDLER_NONE.

Referenced by SlruDeleteSegment(), SlruScanDirCbDeleteAll(), and SlruScanDirCbDeleteCutoff().

◆ SlruInternalWritePage()

static void SlruInternalWritePage ( SlruCtl  ctl,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 540 of file slru.c.

541 {
542  SlruShared shared = ctl->shared;
543  int pageno = shared->page_number[slotno];
544  bool ok;
545 
546  /* If a write is in progress, wait for it to finish */
547  while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
548  shared->page_number[slotno] == pageno)
549  {
550  SimpleLruWaitIO(ctl, slotno);
551  }
552 
553  /*
554  * Do nothing if page is not dirty, or if buffer no longer contains the
555  * same page we were called for.
556  */
557  if (!shared->page_dirty[slotno] ||
558  shared->page_status[slotno] != SLRU_PAGE_VALID ||
559  shared->page_number[slotno] != pageno)
560  return;
561 
562  /*
563  * Mark the slot write-busy, and clear the dirtybit. After this point, a
564  * transaction status update on this page will mark it dirty again.
565  */
566  shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
567  shared->page_dirty[slotno] = false;
568 
569  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
570  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
571 
572  /* Release control lock while doing I/O */
573  LWLockRelease(shared->ControlLock);
574 
575  /* Do the write */
576  ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
577 
578  /* If we failed, and we're in a flush, better close the files */
579  if (!ok && fdata)
580  {
581  int i;
582 
583  for (i = 0; i < fdata->num_files; i++)
584  CloseTransientFile(fdata->fd[i]);
585  }
586 
587  /* Re-acquire control lock and update page state */
589 
590  Assert(shared->page_number[slotno] == pageno &&
591  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
592 
593  /* If we failed to write, mark the page dirty again */
594  if (!ok)
595  shared->page_dirty[slotno] = true;
596 
597  shared->page_status[slotno] = SLRU_PAGE_VALID;
598 
599  LWLockRelease(&shared->buffer_locks[slotno].lock);
600 
601  /* Now it's okay to ereport if we failed */
602  if (!ok)
604 
605  /* If part of a checkpoint, count this as a buffer written. */
606  if (fdata)
608 }
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
Definition: slru.c:757
int ckpt_bufs_written
Definition: xlog.h:162
CheckpointStatsData CheckpointStats
Definition: xlog.c:212

References Assert(), SlruSharedData::buffer_locks, CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CloseTransientFile(), SlruSharedData::ControlLock, SlruWriteAllData::fd, i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruWriteAllData::num_files, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruTruncate(), SimpleLruWriteAll(), SimpleLruWritePage(), SlruDeleteSegment(), and SlruSelectLRUPage().

◆ SlruMayDeleteSegment()

static bool SlruMayDeleteSegment ( SlruCtl  ctl,
int  segpage,
int  cutoffPage 
)
static

Definition at line 1392 of file slru.c.

1393 {
1394  int seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
1395 
1396  Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
1397 
1398  return (ctl->PagePrecedes(segpage, cutoffPage) &&
1399  ctl->PagePrecedes(seg_last_page, cutoffPage));
1400 }

References Assert(), SlruCtlData::PagePrecedes, and SLRU_PAGES_PER_SEGMENT.

Referenced by SlruScanDirCbDeleteCutoff(), and SlruScanDirCbReportPresence().

◆ SlruPhysicalReadPage()

static bool SlruPhysicalReadPage ( SlruCtl  ctl,
int  pageno,
int  slotno 
)
static

Definition at line 685 of file slru.c.

686 {
687  SlruShared shared = ctl->shared;
688  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
689  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
690  off_t offset = rpageno * BLCKSZ;
691  char path[MAXPGPATH];
692  int fd;
693 
694  SlruFileName(ctl, path, segno);
695 
696  /*
697  * In a crash-and-restart situation, it's possible for us to receive
698  * commands to set the commit status of transactions whose bits are in
699  * already-truncated segments of the commit log (see notes in
700  * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
701  * where the file doesn't exist, and return zeroes instead.
702  */
703  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
704  if (fd < 0)
705  {
706  if (errno != ENOENT || !InRecovery)
707  {
709  slru_errno = errno;
710  return false;
711  }
712 
713  ereport(LOG,
714  (errmsg("file \"%s\" doesn't exist, reading as zeroes",
715  path)));
716  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
717  return true;
718  }
719 
720  errno = 0;
721  pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
722  if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
723  {
726  slru_errno = errno;
728  return false;
729  }
731 
732  if (CloseTransientFile(fd) != 0)
733  {
735  slru_errno = errno;
736  return false;
737  }
738 
739  return true;
740 }
#define pg_pread
Definition: port.h:225
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:88
static void pgstat_report_wait_end(void)
Definition: wait_event.h:104
bool InRecovery
Definition: xlogutils.c:53

References CloseTransientFile(), ereport, errmsg(), fd(), InRecovery, LOG, MAXPGPATH, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_pread, pgstat_report_wait_end(), pgstat_report_wait_start(), SlruWriteAllData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, and SlruFileName.

Referenced by SimpleLruReadPage().

◆ SlruPhysicalWritePage()

static bool SlruPhysicalWritePage ( SlruCtl  ctl,
int  pageno,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 757 of file slru.c.

758 {
759  SlruShared shared = ctl->shared;
760  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
761  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
762  off_t offset = rpageno * BLCKSZ;
763  char path[MAXPGPATH];
764  int fd = -1;
765 
766  /* update the stats counter of written pages */
768 
769  /*
770  * Honor the write-WAL-before-data rule, if appropriate, so that we do not
771  * write out data before associated WAL records. This is the same action
772  * performed during FlushBuffer() in the main buffer manager.
773  */
774  if (shared->group_lsn != NULL)
775  {
776  /*
777  * We must determine the largest async-commit LSN for the page. This
778  * is a bit tedious, but since this entire function is a slow path
779  * anyway, it seems better to do this here than to maintain a per-page
780  * LSN variable (which'd need an extra comparison in the
781  * transaction-commit path).
782  */
783  XLogRecPtr max_lsn;
784  int lsnindex,
785  lsnoff;
786 
787  lsnindex = slotno * shared->lsn_groups_per_page;
788  max_lsn = shared->group_lsn[lsnindex++];
789  for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
790  {
791  XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
792 
793  if (max_lsn < this_lsn)
794  max_lsn = this_lsn;
795  }
796 
797  if (!XLogRecPtrIsInvalid(max_lsn))
798  {
799  /*
800  * As noted above, elog(ERROR) is not acceptable here, so if
801  * XLogFlush were to fail, we must PANIC. This isn't much of a
802  * restriction because XLogFlush is just about all critical
803  * section anyway, but let's make sure.
804  */
806  XLogFlush(max_lsn);
808  }
809  }
810 
811  /*
812  * During a SimpleLruWriteAll, we may already have the desired file open.
813  */
814  if (fdata)
815  {
816  int i;
817 
818  for (i = 0; i < fdata->num_files; i++)
819  {
820  if (fdata->segno[i] == segno)
821  {
822  fd = fdata->fd[i];
823  break;
824  }
825  }
826  }
827 
828  if (fd < 0)
829  {
830  /*
831  * If the file doesn't already exist, we should create it. It is
832  * possible for this to need to happen when writing a page that's not
833  * first in its segment; we assume the OS can cope with that. (Note:
834  * it might seem that it'd be okay to create files only when
835  * SimpleLruZeroPage is called for the first page of a segment.
836  * However, if after a crash and restart the REDO logic elects to
837  * replay the log from a checkpoint before the latest one, then it's
838  * possible that we will get commands to set transaction status of
839  * transactions that have already been truncated from the commit log.
840  * Easiest way to deal with that is to accept references to
841  * nonexistent files here and in SlruPhysicalReadPage.)
842  *
843  * Note: it is possible for more than one backend to be executing this
844  * code simultaneously for different pages of the same file. Hence,
845  * don't use O_EXCL or O_TRUNC or anything like that.
846  */
847  SlruFileName(ctl, path, segno);
848  fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
849  if (fd < 0)
850  {
852  slru_errno = errno;
853  return false;
854  }
855 
856  if (fdata)
857  {
858  if (fdata->num_files < MAX_WRITEALL_BUFFERS)
859  {
860  fdata->fd[fdata->num_files] = fd;
861  fdata->segno[fdata->num_files] = segno;
862  fdata->num_files++;
863  }
864  else
865  {
866  /*
867  * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
868  * fall back to treating it as a standalone write.
869  */
870  fdata = NULL;
871  }
872  }
873  }
874 
875  errno = 0;
876  pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
877  if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
878  {
880  /* if write didn't set errno, assume problem is no disk space */
881  if (errno == 0)
882  errno = ENOSPC;
884  slru_errno = errno;
885  if (!fdata)
887  return false;
888  }
890 
891  /* Queue up a sync request for the checkpointer. */
892  if (ctl->sync_handler != SYNC_HANDLER_NONE)
893  {
894  FileTag tag;
895 
896  INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
897  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
898  {
899  /* No space to enqueue sync request. Do it synchronously. */
900  pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
901  if (pg_fsync(fd) != 0)
902  {
905  slru_errno = errno;
907  return false;
908  }
910  }
911  }
912 
913  /* Close file, unless part of flush request. */
914  if (!fdata)
915  {
916  if (CloseTransientFile(fd) != 0)
917  {
919  slru_errno = errno;
920  return false;
921  }
922  }
923 
924  return true;
925 }
int pg_fsync(int fd)
Definition: fd.c:361
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
void pgstat_count_slru_page_written(int slru_idx)
Definition: pgstat_slru.c:83
#define pg_pwrite
Definition: port.h:226
#define MAX_WRITEALL_BUFFERS
Definition: slru.c:72
@ SYNC_REQUEST
Definition: sync.h:25
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2535
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29

References CloseTransientFile(), END_CRIT_SECTION, fd(), SlruWriteAllData::fd, SlruSharedData::group_lsn, i, INIT_SLRUFILETAG, SlruSharedData::lsn_groups_per_page, MAX_WRITEALL_BUFFERS, MAXPGPATH, SlruWriteAllData::num_files, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), pg_pwrite, pgstat_count_slru_page_written(), pgstat_report_wait_end(), pgstat_report_wait_start(), RegisterSyncRequest(), SlruWriteAllData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SLRU_WRITE_FAILED, SlruFileName, START_CRIT_SECTION, SlruCtlData::sync_handler, SYNC_HANDLER_NONE, SYNC_REQUEST, XLogFlush(), and XLogRecPtrIsInvalid.

Referenced by SlruInternalWritePage().

◆ SlruReportIOError()

static void SlruReportIOError ( SlruCtl  ctl,
int  pageno,
TransactionId  xid 
)
static

Definition at line 932 of file slru.c.

933 {
934  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
935  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
936  int offset = rpageno * BLCKSZ;
937  char path[MAXPGPATH];
938 
939  SlruFileName(ctl, path, segno);
940  errno = slru_errno;
941  switch (slru_errcause)
942  {
943  case SLRU_OPEN_FAILED:
944  ereport(ERROR,
946  errmsg("could not access status of transaction %u", xid),
947  errdetail("Could not open file \"%s\": %m.", path)));
948  break;
949  case SLRU_SEEK_FAILED:
950  ereport(ERROR,
952  errmsg("could not access status of transaction %u", xid),
953  errdetail("Could not seek in file \"%s\" to offset %d: %m.",
954  path, offset)));
955  break;
956  case SLRU_READ_FAILED:
957  if (errno)
958  ereport(ERROR,
960  errmsg("could not access status of transaction %u", xid),
961  errdetail("Could not read from file \"%s\" at offset %d: %m.",
962  path, offset)));
963  else
964  ereport(ERROR,
965  (errmsg("could not access status of transaction %u", xid),
966  errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
967  break;
968  case SLRU_WRITE_FAILED:
969  if (errno)
970  ereport(ERROR,
972  errmsg("could not access status of transaction %u", xid),
973  errdetail("Could not write to file \"%s\" at offset %d: %m.",
974  path, offset)));
975  else
976  ereport(ERROR,
977  (errmsg("could not access status of transaction %u", xid),
978  errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
979  path, offset)));
980  break;
981  case SLRU_FSYNC_FAILED:
984  errmsg("could not access status of transaction %u", xid),
985  errdetail("Could not fsync file \"%s\": %m.",
986  path)));
987  break;
988  case SLRU_CLOSE_FAILED:
989  ereport(ERROR,
991  errmsg("could not access status of transaction %u", xid),
992  errdetail("Could not close file \"%s\": %m.",
993  path)));
994  break;
995  default:
996  /* can't get here, we trust */
997  elog(ERROR, "unrecognized SimpleLru error cause: %d",
998  (int) slru_errcause);
999  break;
1000  }
1001 }
int errcode_for_file_access(void)
Definition: elog.c:881
int errdetail(const char *fmt,...)
Definition: elog.c:1202
#define ERROR
Definition: elog.h:39
int data_sync_elevel(int elevel)
Definition: fd.c:3881

References data_sync_elevel(), elog(), ereport, errcode_for_file_access(), errdetail(), errmsg(), ERROR, MAXPGPATH, SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName.

Referenced by SimpleLruDoesPhysicalPageExist(), SimpleLruReadPage(), SimpleLruWriteAll(), and SlruInternalWritePage().

◆ SlruScanDirCbDeleteAll()

bool SlruScanDirCbDeleteAll ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)

Definition at line 1531 of file slru.c.

1532 {
1534 
1535  return false; /* keep going */
1536 }

References SLRU_PAGES_PER_SEGMENT, and SlruInternalDeleteSegment().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), and test_slru_scan_cb().

◆ SlruScanDirCbDeleteCutoff()

static bool SlruScanDirCbDeleteCutoff ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)
static

Definition at line 1516 of file slru.c.

1517 {
1518  int cutoffPage = *(int *) data;
1519 
1520  if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1522 
1523  return false; /* keep going */
1524 }
const void * data
static bool SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage)
Definition: slru.c:1392

References data, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruMayDeleteSegment().

Referenced by SimpleLruTruncate().

◆ SlruScanDirCbReportPresence()

bool SlruScanDirCbReportPresence ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)

Definition at line 1501 of file slru.c.

1502 {
1503  int cutoffPage = *(int *) data;
1504 
1505  if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1506  return true; /* found one; don't iterate any more */
1507 
1508  return false; /* keep going */
1509 }

References data, and SlruMayDeleteSegment().

Referenced by TruncateCLOG(), and TruncateCommitTs().

◆ SlruScanDirectory()

bool SlruScanDirectory ( SlruCtl  ctl,
SlruScanCallback  callback,
void *  data 
)

Definition at line 1554 of file slru.c.

1555 {
1556  bool retval = false;
1557  DIR *cldir;
1558  struct dirent *clde;
1559  int segno;
1560  int segpage;
1561 
1562  cldir = AllocateDir(ctl->Dir);
1563  while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1564  {
1565  size_t len;
1566 
1567  len = strlen(clde->d_name);
1568 
1569  if ((len == 4 || len == 5 || len == 6) &&
1570  strspn(clde->d_name, "0123456789ABCDEF") == len)
1571  {
1572  segno = (int) strtol(clde->d_name, NULL, 16);
1573  segpage = segno * SLRU_PAGES_PER_SEGMENT;
1574 
1575  elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1576  ctl->Dir, clde->d_name);
1577  retval = callback(ctl, clde->d_name, segpage, data);
1578  if (retval)
1579  break;
1580  }
1581  }
1582  FreeDir(cldir);
1583 
1584  return retval;
1585 }
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2854
int FreeDir(DIR *dir)
Definition: fd.c:2906
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2788
const void size_t len
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

References AllocateDir(), callback(), dirent::d_name, data, DEBUG2, SlruCtlData::Dir, elog(), FreeDir(), len, ReadDir(), and SLRU_PAGES_PER_SEGMENT.

Referenced by AsyncShmemInit(), DeactivateCommitTs(), SimpleLruTruncate(), test_slru_delete_all(), TruncateCLOG(), TruncateCommitTs(), and TruncateMultiXact().

◆ SlruSelectLRUPage()

static int SlruSelectLRUPage ( SlruCtl  ctl,
int  pageno 
)
static

Definition at line 1017 of file slru.c.

1018 {
1019  SlruShared shared = ctl->shared;
1020 
1021  /* Outer loop handles restart after I/O */
1022  for (;;)
1023  {
1024  int slotno;
1025  int cur_count;
1026  int bestvalidslot = 0; /* keep compiler quiet */
1027  int best_valid_delta = -1;
1028  int best_valid_page_number = 0; /* keep compiler quiet */
1029  int bestinvalidslot = 0; /* keep compiler quiet */
1030  int best_invalid_delta = -1;
1031  int best_invalid_page_number = 0; /* keep compiler quiet */
1032 
1033  /* See if page already has a buffer assigned */
1034  for (slotno = 0; slotno < shared->num_slots; slotno++)
1035  {
1036  if (shared->page_number[slotno] == pageno &&
1037  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
1038  return slotno;
1039  }
1040 
1041  /*
1042  * If we find any EMPTY slot, just select that one. Else choose a
1043  * victim page to replace. We normally take the least recently used
1044  * valid page, but we will never take the slot containing
1045  * latest_page_number, even if it appears least recently used. We
1046  * will select a slot that is already I/O busy only if there is no
1047  * other choice: a read-busy slot will not be least recently used once
1048  * the read finishes, and waiting for an I/O on a write-busy slot is
1049  * inferior to just picking some other slot. Testing shows the slot
1050  * we pick instead will often be clean, allowing us to begin a read at
1051  * once.
1052  *
1053  * Normally the page_lru_count values will all be different and so
1054  * there will be a well-defined LRU page. But since we allow
1055  * concurrent execution of SlruRecentlyUsed() within
1056  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1057  * acquire the same lru_count values. In that case we break ties by
1058  * choosing the furthest-back page.
1059  *
1060  * Notice that this next line forcibly advances cur_lru_count to a
1061  * value that is certainly beyond any value that will be in the
1062  * page_lru_count array after the loop finishes. This ensures that
1063  * the next execution of SlruRecentlyUsed will mark the page newly
1064  * used, even if it's for a page that has the current counter value.
1065  * That gets us back on the path to having good data when there are
1066  * multiple pages with the same lru_count.
1067  */
1068  cur_count = (shared->cur_lru_count)++;
1069  for (slotno = 0; slotno < shared->num_slots; slotno++)
1070  {
1071  int this_delta;
1072  int this_page_number;
1073 
1074  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1075  return slotno;
1076  this_delta = cur_count - shared->page_lru_count[slotno];
1077  if (this_delta < 0)
1078  {
1079  /*
1080  * Clean up in case shared updates have caused cur_count
1081  * increments to get "lost". We back off the page counts,
1082  * rather than trying to increase cur_count, to avoid any
1083  * question of infinite loops or failure in the presence of
1084  * wrapped-around counts.
1085  */
1086  shared->page_lru_count[slotno] = cur_count;
1087  this_delta = 0;
1088  }
1089  this_page_number = shared->page_number[slotno];
1090  if (this_page_number == shared->latest_page_number)
1091  continue;
1092  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1093  {
1094  if (this_delta > best_valid_delta ||
1095  (this_delta == best_valid_delta &&
1096  ctl->PagePrecedes(this_page_number,
1097  best_valid_page_number)))
1098  {
1099  bestvalidslot = slotno;
1100  best_valid_delta = this_delta;
1101  best_valid_page_number = this_page_number;
1102  }
1103  }
1104  else
1105  {
1106  if (this_delta > best_invalid_delta ||
1107  (this_delta == best_invalid_delta &&
1108  ctl->PagePrecedes(this_page_number,
1109  best_invalid_page_number)))
1110  {
1111  bestinvalidslot = slotno;
1112  best_invalid_delta = this_delta;
1113  best_invalid_page_number = this_page_number;
1114  }
1115  }
1116  }
1117 
1118  /*
1119  * If all pages (except possibly the latest one) are I/O busy, we'll
1120  * have to wait for an I/O to complete and then retry. In that
1121  * unhappy case, we choose to wait for the I/O on the least recently
1122  * used slot, on the assumption that it was likely initiated first of
1123  * all the I/Os in progress and may therefore finish first.
1124  */
1125  if (best_valid_delta < 0)
1126  {
1127  SimpleLruWaitIO(ctl, bestinvalidslot);
1128  continue;
1129  }
1130 
1131  /*
1132  * If the selected page is clean, we're set.
1133  */
1134  if (!shared->page_dirty[bestvalidslot])
1135  return bestvalidslot;
1136 
1137  /*
1138  * Write the page.
1139  */
1140  SlruInternalWritePage(ctl, bestvalidslot, NULL);
1141 
1142  /*
1143  * Now loop back and try again. This is the easiest way of dealing
1144  * with corner cases such as the victim page being re-dirtied while we
1145  * wrote it.
1146  */
1147  }
1148 }

References SlruSharedData::cur_lru_count, SlruSharedData::latest_page_number, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SlruSyncFileTag()

int SlruSyncFileTag ( SlruCtl  ctl,
const FileTag ftag,
char *  path 
)

Definition at line 1594 of file slru.c.

1595 {
1596  int fd;
1597  int save_errno;
1598  int result;
1599 
1600  SlruFileName(ctl, path, ftag->segno);
1601 
1602  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
1603  if (fd < 0)
1604  return -1;
1605 
1606  pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
1607  result = pg_fsync(fd);
1609  save_errno = errno;
1610 
1612 
1613  errno = save_errno;
1614  return result;
1615 }
uint32 segno
Definition: sync.h:55

References CloseTransientFile(), fd(), OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), FileTag::segno, and SlruFileName.

Referenced by clogsyncfiletag(), committssyncfiletag(), multixactmemberssyncfiletag(), multixactoffsetssyncfiletag(), and test_slru_page_sync().

Variable Documentation

◆ slru_errcause

◆ slru_errno