PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
slru.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "miscadmin.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruFlushData
 

Macros

#define SlruFileName(ctl, path, seg)   snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
 
#define MAX_FLUSH_BUFFERS   16
 
#define SlruRecentlyUsed(shared, slotno)
 

Typedefs

typedef struct SlruFlushData SlruFlushData
 
typedef struct SlruFlushDataSlruFlush
 

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED, SLRU_SEEK_FAILED, SLRU_READ_FAILED, SLRU_WRITE_FAILED,
  SLRU_FSYNC_FAILED, SLRU_CLOSE_FAILED
}
 

Functions

static void SimpleLruZeroLSNs (SlruCtl ctl, int slotno)
 
static void SimpleLruWaitIO (SlruCtl ctl, int slotno)
 
static void SlruInternalWritePage (SlruCtl ctl, int slotno, SlruFlush fdata)
 
static bool SlruPhysicalReadPage (SlruCtl ctl, int pageno, int slotno)
 
static bool SlruPhysicalWritePage (SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 
static void SlruReportIOError (SlruCtl ctl, int pageno, TransactionId xid)
 
static int SlruSelectLRUPage (SlruCtl ctl, int pageno)
 
static bool SlruScanDirCbDeleteCutoff (SlruCtl ctl, char *filename, int segpage, void *data)
 
static void SlruInternalDeleteSegment (SlruCtl ctl, char *filename)
 
Size SimpleLruShmemSize (int nslots, int nlsns)
 
void SimpleLruInit (SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id)
 
int SimpleLruZeroPage (SlruCtl ctl, int pageno)
 
int SimpleLruReadPage (SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
 
int SimpleLruReadPage_ReadOnly (SlruCtl ctl, int pageno, TransactionId xid)
 
void SimpleLruWritePage (SlruCtl ctl, int slotno)
 
bool SimpleLruDoesPhysicalPageExist (SlruCtl ctl, int pageno)
 
void SimpleLruFlush (SlruCtl ctl, bool allow_redirtied)
 
void SimpleLruTruncate (SlruCtl ctl, int cutoffPage)
 
void SlruDeleteSegment (SlruCtl ctl, int segno)
 
bool SlruScanDirCbReportPresence (SlruCtl ctl, char *filename, int segpage, void *data)
 
bool SlruScanDirCbDeleteAll (SlruCtl ctl, char *filename, int segpage, void *data)
 
bool SlruScanDirectory (SlruCtl ctl, SlruScanCallback callback, void *data)
 

Variables

static SlruErrorCause slru_errcause
 
static int slru_errno
 

Macro Definition Documentation

#define MAX_FLUSH_BUFFERS   16

Definition at line 73 of file slru.c.

Referenced by SlruPhysicalWritePage().

#define SlruFileName (   ctl,
  path,
  seg 
)    snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
#define SlruRecentlyUsed (   shared,
  slotno 
)
Value:
do { \
int new_lru_count = (shared)->cur_lru_count; \
if (new_lru_count != (shared)->page_lru_count[slotno]) { \
(shared)->cur_lru_count = ++new_lru_count; \
(shared)->page_lru_count[slotno] = new_lru_count; \
} \
} while (0)

Definition at line 103 of file slru.c.

Referenced by SimpleLruReadPage(), SimpleLruReadPage_ReadOnly(), and SimpleLruZeroPage().

Typedef Documentation

Definition at line 82 of file slru.c.

Enumeration Type Documentation

Enumerator
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 113 of file slru.c.

Function Documentation

bool SimpleLruDoesPhysicalPageExist ( SlruCtl  ctl,
int  pageno 
)

Definition at line 590 of file slru.c.

References CloseTransientFile(), endpos, fd(), MAXPGPATH, OpenTransientFile(), PG_BINARY, result, SlruFlushData::segno, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SlruFileName, and SlruReportIOError().

Referenced by ActivateCommitTs(), find_multixact_start(), and MaybeExtendOffsetSlru().

591 {
592  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
593  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
594  int offset = rpageno * BLCKSZ;
595  char path[MAXPGPATH];
596  int fd;
597  bool result;
598  off_t endpos;
599 
600  SlruFileName(ctl, path, segno);
601 
602  fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
603  if (fd < 0)
604  {
605  /* expected: file doesn't exist */
606  if (errno == ENOENT)
607  return false;
608 
609  /* report error normally */
611  slru_errno = errno;
612  SlruReportIOError(ctl, pageno, 0);
613  }
614 
615  if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
616  {
618  slru_errno = errno;
619  SlruReportIOError(ctl, pageno, 0);
620  }
621 
622  result = endpos >= (off_t) (offset + BLCKSZ);
623 
624  CloseTransientFile(fd);
625  return result;
626 }
static SlruErrorCause slru_errcause
Definition: slru.c:123
return result
Definition: formatting.c:1633
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:893
#define PG_BINARY
Definition: c.h:1039
static XLogRecPtr endpos
#define MAXPGPATH
int OpenTransientFile(FileName fileName, int fileFlags, int fileMode)
Definition: fd.c:2144
int CloseTransientFile(int fd)
Definition: fd.c:2305
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
static int slru_errno
Definition: slru.c:124
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
void SimpleLruFlush ( SlruCtl  ctl,
bool  allow_redirtied 
)

Definition at line 1104 of file slru.c.

References Assert, CloseTransientFile(), SlruSharedData::ControlLock, SlruCtlData::do_fsync, SlruFlushData::fd, i, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruFlushData::num_files, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), SlruFlushData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalWritePage(), SlruReportIOError(), and WAIT_EVENT_SLRU_FLUSH_SYNC.

Referenced by CheckPointCLOG(), CheckPointCommitTs(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), find_multixact_start(), ShutdownCLOG(), ShutdownCommitTs(), ShutdownMultiXact(), and ShutdownSUBTRANS().

1105 {
1106  SlruShared shared = ctl->shared;
1107  SlruFlushData fdata;
1108  int slotno;
1109  int pageno = 0;
1110  int i;
1111  bool ok;
1112 
1113  /*
1114  * Find and write dirty pages
1115  */
1116  fdata.num_files = 0;
1117 
1119 
1120  for (slotno = 0; slotno < shared->num_slots; slotno++)
1121  {
1122  SlruInternalWritePage(ctl, slotno, &fdata);
1123 
1124  /*
1125  * In some places (e.g. checkpoints), we cannot assert that the slot
1126  * is clean now, since another process might have re-dirtied it
1127  * already. That's okay.
1128  */
1129  Assert(allow_redirtied ||
1130  shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1131  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1132  !shared->page_dirty[slotno]));
1133  }
1134 
1135  LWLockRelease(shared->ControlLock);
1136 
1137  /*
1138  * Now fsync and close any files that were open
1139  */
1140  ok = true;
1141  for (i = 0; i < fdata.num_files; i++)
1142  {
1144  if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
1145  {
1147  slru_errno = errno;
1148  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1149  ok = false;
1150  }
1152 
1153  if (CloseTransientFile(fdata.fd[i]))
1154  {
1156  slru_errno = errno;
1157  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1158  ok = false;
1159  }
1160  }
1161  if (!ok)
1163 }
LWLock * ControlLock
Definition: slru.h:57
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
static SlruErrorCause slru_errcause
Definition: slru.c:123
int segno[MAX_FLUSH_BUFFERS]
Definition: slru.c:79
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:893
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
SlruPageStatus * page_status
Definition: slru.h:67
#define InvalidTransactionId
Definition: transam.h:31
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1235
bool do_fsync
Definition: slru.h:121
int CloseTransientFile(int fd)
Definition: fd.c:2305
int num_files
Definition: slru.c:77
#define Assert(condition)
Definition: c.h:676
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1211
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
int num_slots
Definition: slru.h:60
static int slru_errno
Definition: slru.c:124
bool * page_dirty
Definition: slru.h:68
int i
SlruShared shared
Definition: slru.h:115
int pg_fsync(int fd)
Definition: fd.c:333
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:78
void SimpleLruInit ( SlruCtl  ctl,
const char *  name,
int  nslots,
int  nlsns,
LWLock ctllock,
const char *  subdir,
int  tranche_id 
)

Definition at line 165 of file slru.c.

References Assert, SlruSharedData::buffer_locks, BUFFERALIGN, SlruSharedData::ControlLock, SlruSharedData::cur_lru_count, SlruCtlData::Dir, SlruCtlData::do_fsync, SlruSharedData::group_lsn, IsUnderPostmaster, LWLockPadded::lock, SlruSharedData::lsn_groups_per_page, SlruSharedData::lwlock_tranche_id, SlruSharedData::lwlock_tranche_name, LWLockInitialize(), LWLockRegisterTranche(), MAXALIGN, SlruSharedData::num_slots, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, ShmemInitStruct(), SimpleLruShmemSize(), SLRU_MAX_NAME_LENGTH, SLRU_PAGE_EMPTY, strlcpy(), and StrNCpy.

Referenced by AsyncShmemInit(), CLOGShmemInit(), CommitTsShmemInit(), MultiXactShmemInit(), OldSerXidInit(), and SUBTRANSShmemInit().

167 {
168  SlruShared shared;
169  bool found;
170 
171  shared = (SlruShared) ShmemInitStruct(name,
172  SimpleLruShmemSize(nslots, nlsns),
173  &found);
174 
175  if (!IsUnderPostmaster)
176  {
177  /* Initialize locks and shared memory area */
178  char *ptr;
179  Size offset;
180  int slotno;
181 
182  Assert(!found);
183 
184  memset(shared, 0, sizeof(SlruSharedData));
185 
186  shared->ControlLock = ctllock;
187 
188  shared->num_slots = nslots;
189  shared->lsn_groups_per_page = nlsns;
190 
191  shared->cur_lru_count = 0;
192 
193  /* shared->latest_page_number will be set later */
194 
195  ptr = (char *) shared;
196  offset = MAXALIGN(sizeof(SlruSharedData));
197  shared->page_buffer = (char **) (ptr + offset);
198  offset += MAXALIGN(nslots * sizeof(char *));
199  shared->page_status = (SlruPageStatus *) (ptr + offset);
200  offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
201  shared->page_dirty = (bool *) (ptr + offset);
202  offset += MAXALIGN(nslots * sizeof(bool));
203  shared->page_number = (int *) (ptr + offset);
204  offset += MAXALIGN(nslots * sizeof(int));
205  shared->page_lru_count = (int *) (ptr + offset);
206  offset += MAXALIGN(nslots * sizeof(int));
207 
208  /* Initialize LWLocks */
209  shared->buffer_locks = (LWLockPadded *) (ptr + offset);
210  offset += MAXALIGN(nslots * sizeof(LWLockPadded));
211 
212  if (nlsns > 0)
213  {
214  shared->group_lsn = (XLogRecPtr *) (ptr + offset);
215  offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
216  }
217 
218  Assert(strlen(name) + 1 < SLRU_MAX_NAME_LENGTH);
220  shared->lwlock_tranche_id = tranche_id;
221 
222  ptr += BUFFERALIGN(offset);
223  for (slotno = 0; slotno < nslots; slotno++)
224  {
225  LWLockInitialize(&shared->buffer_locks[slotno].lock,
226  shared->lwlock_tranche_id);
227 
228  shared->page_buffer[slotno] = ptr;
229  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
230  shared->page_dirty[slotno] = false;
231  shared->page_lru_count[slotno] = 0;
232  ptr += BLCKSZ;
233  }
234 
235  /* Should fit to estimated shmem size */
236  Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
237  }
238  else
239  Assert(found);
240 
241  /* Register SLRU tranche in the main tranches array */
243  shared->lwlock_tranche_name);
244 
245  /*
246  * Initialize the unshared control struct, including directory path. We
247  * assume caller set PagePrecedes.
248  */
249  ctl->shared = shared;
250  ctl->do_fsync = true; /* default behavior */
251  StrNCpy(ctl->Dir, subdir, sizeof(ctl->Dir));
252 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
SlruPageStatus
Definition: slru.h:44
char ** page_buffer
Definition: slru.h:66
int cur_lru_count
Definition: slru.h:92
int lsn_groups_per_page
Definition: slru.h:81
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:145
SlruPageStatus * page_status
Definition: slru.h:67
char lwlock_tranche_name[SLRU_MAX_NAME_LENGTH]
Definition: slru.h:103
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
bool IsUnderPostmaster
Definition: globals.c:101
LWLockPadded * buffer_locks
Definition: slru.h:104
XLogRecPtr * group_lsn
Definition: slru.h:80
bool do_fsync
Definition: slru.h:121
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:667
SlruSharedData * SlruShared
Definition: slru.h:107
char Dir[64]
Definition: slru.h:134
#define SLRU_MAX_NAME_LENGTH
Definition: slru.h:36
LWLock lock
Definition: lwlock.h:79
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int * page_lru_count
Definition: slru.h:70
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:676
#define StrNCpy(dst, src, len)
Definition: c.h:831
size_t Size
Definition: c.h:356
#define MAXALIGN(LEN)
Definition: c.h:588
void LWLockRegisterTranche(int tranche_id, char *tranche_name)
Definition: lwlock.c:592
int num_slots
Definition: slru.h:60
const char * name
Definition: encode.c:521
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define BUFFERALIGN(LEN)
Definition: c.h:590
int lwlock_tranche_id
Definition: slru.h:102
int SimpleLruReadPage ( SlruCtl  ctl,
int  pageno,
bool  write_ok,
TransactionId  xid 
)

Definition at line 375 of file slru.c.

References Assert, SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruPhysicalReadPage(), SlruRecentlyUsed, SlruReportIOError(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), GetMultiXactIdMembers(), OldSerXidAdd(), RecordNewMultiXact(), SetXidCommitTsInPage(), SimpleLruReadPage_ReadOnly(), SubTransSetParent(), TransactionIdSetPageStatus(), TrimCLOG(), and TrimMultiXact().

377 {
378  SlruShared shared = ctl->shared;
379 
380  /* Outer loop handles restart if we must wait for someone else's I/O */
381  for (;;)
382  {
383  int slotno;
384  bool ok;
385 
386  /* See if page already is in memory; if not, pick victim slot */
387  slotno = SlruSelectLRUPage(ctl, pageno);
388 
389  /* Did we find the page in memory? */
390  if (shared->page_number[slotno] == pageno &&
391  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
392  {
393  /*
394  * If page is still being read in, we must wait for I/O. Likewise
395  * if the page is being written and the caller said that's not OK.
396  */
397  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
398  (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
399  !write_ok))
400  {
401  SimpleLruWaitIO(ctl, slotno);
402  /* Now we must recheck state from the top */
403  continue;
404  }
405  /* Otherwise, it's ready to use */
406  SlruRecentlyUsed(shared, slotno);
407  return slotno;
408  }
409 
410  /* We found no match; assert we selected a freeable slot */
411  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
412  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
413  !shared->page_dirty[slotno]));
414 
415  /* Mark the slot read-busy */
416  shared->page_number[slotno] = pageno;
417  shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
418  shared->page_dirty[slotno] = false;
419 
420  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
421  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
422 
423  /* Release control lock while doing I/O */
424  LWLockRelease(shared->ControlLock);
425 
426  /* Do the read */
427  ok = SlruPhysicalReadPage(ctl, pageno, slotno);
428 
429  /* Set the LSNs for this newly read-in page to zero */
430  SimpleLruZeroLSNs(ctl, slotno);
431 
432  /* Re-acquire control lock and update page state */
434 
435  Assert(shared->page_number[slotno] == pageno &&
436  shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
437  !shared->page_dirty[slotno]);
438 
439  shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
440 
441  LWLockRelease(&shared->buffer_locks[slotno].lock);
442 
443  /* Now it's okay to ereport if we failed */
444  if (!ok)
445  SlruReportIOError(ctl, pageno, xid);
446 
447  SlruRecentlyUsed(shared, slotno);
448  return slotno;
449  }
450 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:304
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:893
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
SlruPageStatus * page_status
Definition: slru.h:67
LWLockPadded * buffer_locks
Definition: slru.h:104
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
LWLock lock
Definition: lwlock.h:79
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
Definition: slru.c:639
#define Assert(condition)
Definition: c.h:676
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
static int SlruSelectLRUPage(SlruCtl ctl, int pageno)
Definition: slru.c:967
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:103
int SimpleLruReadPage_ReadOnly ( SlruCtl  ctl,
int  pageno,
TransactionId  xid 
)

Definition at line 467 of file slru.c.

References SlruSharedData::ControlLock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruReadPage(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, and SlruRecentlyUsed.

Referenced by asyncQueueReadAllNotifications(), find_multixact_start(), OldSerXidGetMinConflictCommitSeqNo(), SubTransGetParent(), TransactionIdGetCommitTsData(), and TransactionIdGetStatus().

468 {
469  SlruShared shared = ctl->shared;
470  int slotno;
471 
472  /* Try to find the page while holding only shared lock */
474 
475  /* See if page is already in a buffer */
476  for (slotno = 0; slotno < shared->num_slots; slotno++)
477  {
478  if (shared->page_number[slotno] == pageno &&
479  shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
480  shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
481  {
482  /* See comments for SlruRecentlyUsed macro */
483  SlruRecentlyUsed(shared, slotno);
484  return slotno;
485  }
486  }
487 
488  /* No luck, so switch to normal exclusive lock and do regular read */
489  LWLockRelease(shared->ControlLock);
491 
492  return SimpleLruReadPage(ctl, pageno, true, xid);
493 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
SlruPageStatus * page_status
Definition: slru.h:67
int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
Definition: slru.c:375
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
int num_slots
Definition: slru.h:60
SlruShared shared
Definition: slru.h:115
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:103
Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)

Definition at line 145 of file slru.c.

References BUFFERALIGN, and MAXALIGN.

Referenced by AsyncShmemSize(), CLOGShmemSize(), CommitTsShmemSize(), MultiXactShmemSize(), PredicateLockShmemSize(), SimpleLruInit(), and SUBTRANSShmemSize().

146 {
147  Size sz;
148 
149  /* we assume nslots isn't so large as to risk overflow */
150  sz = MAXALIGN(sizeof(SlruSharedData));
151  sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
152  sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
153  sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
154  sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
155  sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
156  sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
157 
158  if (nlsns > 0)
159  sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
160 
161  return BUFFERALIGN(sz) + BLCKSZ * nslots;
162 }
SlruPageStatus
Definition: slru.h:44
uint64 XLogRecPtr
Definition: xlogdefs.h:21
size_t Size
Definition: c.h:356
#define MAXALIGN(LEN)
Definition: c.h:588
#define BUFFERALIGN(LEN)
Definition: c.h:590
void SimpleLruTruncate ( SlruCtl  ctl,
int  cutoffPage 
)

Definition at line 1169 of file slru.c.

References SlruSharedData::ControlLock, SlruCtlData::Dir, ereport, errmsg(), SlruSharedData::latest_page_number, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NULL, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), commit_ts_redo(), PerformOffsetsTruncation(), TruncateCLOG(), TruncateCommitTs(), and TruncateSUBTRANS().

1170 {
1171  SlruShared shared = ctl->shared;
1172  int slotno;
1173 
1174  /*
1175  * The cutoff point is the start of the segment containing cutoffPage.
1176  */
1177  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1178 
1179  /*
1180  * Scan shared memory and remove any pages preceding the cutoff page, to
1181  * ensure we won't rewrite them later. (Since this is normally called in
1182  * or just after a checkpoint, any dirty pages should have been flushed
1183  * already ... we're just being extra careful here.)
1184  */
1186 
1187 restart:;
1188 
1189  /*
1190  * While we are holding the lock, make an important safety check: the
1191  * planned cutoff point must be <= the current endpoint page. Otherwise we
1192  * have already wrapped around, and proceeding with the truncation would
1193  * risk removing the current segment.
1194  */
1195  if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
1196  {
1197  LWLockRelease(shared->ControlLock);
1198  ereport(LOG,
1199  (errmsg("could not truncate directory \"%s\": apparent wraparound",
1200  ctl->Dir)));
1201  return;
1202  }
1203 
1204  for (slotno = 0; slotno < shared->num_slots; slotno++)
1205  {
1206  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1207  continue;
1208  if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1209  continue;
1210 
1211  /*
1212  * If page is clean, just change state to EMPTY (expected case).
1213  */
1214  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1215  !shared->page_dirty[slotno])
1216  {
1217  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1218  continue;
1219  }
1220 
1221  /*
1222  * Hmm, we have (or may have) I/O operations acting on the page, so
1223  * we've got to wait for them to finish and then start again. This is
1224  * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1225  * wouldn't it be OK to just discard it without writing it? For now,
1226  * keep the logic the same as it was.)
1227  */
1228  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1229  SlruInternalWritePage(ctl, slotno, NULL);
1230  else
1231  SimpleLruWaitIO(ctl, slotno);
1232  goto restart;
1233  }
1234 
1235  LWLockRelease(shared->ControlLock);
1236 
1237  /* Now we can remove the old segment(s) */
1238  (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1239 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
int latest_page_number
Definition: slru.h:99
#define LOG
Definition: elog.h:26
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
SlruPageStatus * page_status
Definition: slru.h:67
#define ereport(elevel, rest)
Definition: elog.h:122
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1339
char Dir[64]
Definition: slru.h:134
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
#define NULL
Definition: c.h:229
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1377
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
int num_slots
Definition: slru.h:60
int errmsg(const char *fmt,...)
Definition: elog.c:797
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
static void SimpleLruWaitIO ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 321 of file slru.c.

References SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlruCtlData::shared, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruDeleteSegment(), SlruInternalWritePage(), and SlruSelectLRUPage().

322 {
323  SlruShared shared = ctl->shared;
324 
325  /* See notes at top of file */
326  LWLockRelease(shared->ControlLock);
327  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
328  LWLockRelease(&shared->buffer_locks[slotno].lock);
330 
331  /*
332  * If the slot is still in an io-in-progress state, then either someone
333  * already started a new I/O on the slot, or a previous I/O failed and
334  * neglected to reset the page state. That shouldn't happen, really, but
335  * it seems worth a few extra cycles to check and recover from it. We can
336  * cheaply test for failure by seeing if the buffer lock is still held (we
337  * assume that transaction abort would release the lock).
338  */
339  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
340  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
341  {
342  if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
343  {
344  /* indeed, the I/O must have failed */
345  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
346  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
347  else /* write_in_progress */
348  {
349  shared->page_status[slotno] = SLRU_PAGE_VALID;
350  shared->page_dirty[slotno] = true;
351  }
352  LWLockRelease(&shared->buffer_locks[slotno].lock);
353  }
354  }
355 }
LWLock * ControlLock
Definition: slru.h:57
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
SlruPageStatus * page_status
Definition: slru.h:67
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1283
LWLockPadded * buffer_locks
Definition: slru.h:104
LWLock lock
Definition: lwlock.h:79
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
void SimpleLruWritePage ( SlruCtl  ctl,
int  slotno 
)

Definition at line 578 of file slru.c.

References NULL, and SlruInternalWritePage().

Referenced by ActivateCommitTs(), AsyncShmemInit(), BootStrapCLOG(), BootStrapMultiXact(), BootStrapSUBTRANS(), clog_redo(), commit_ts_redo(), MaybeExtendOffsetSlru(), and multixact_redo().

579 {
580  SlruInternalWritePage(ctl, slotno, NULL);
581 }
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
#define NULL
Definition: c.h:229
static void SimpleLruZeroLSNs ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 304 of file slru.c.

References SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, MemSet, and SlruCtlData::shared.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

305 {
306  SlruShared shared = ctl->shared;
307 
308  if (shared->lsn_groups_per_page > 0)
309  MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
310  shared->lsn_groups_per_page * sizeof(XLogRecPtr));
311 }
#define MemSet(start, val, len)
Definition: c.h:858
int lsn_groups_per_page
Definition: slru.h:81
XLogRecPtr * group_lsn
Definition: slru.h:80
uint64 XLogRecPtr
Definition: xlogdefs.h:21
SlruShared shared
Definition: slru.h:115
int SimpleLruZeroPage ( SlruCtl  ctl,
int  pageno 
)

Definition at line 263 of file slru.c.

References Assert, SlruSharedData::latest_page_number, MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruRecentlyUsed, and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), AsyncShmemInit(), OldSerXidAdd(), ZeroCLOGPage(), ZeroCommitTsPage(), ZeroMultiXactMemberPage(), ZeroMultiXactOffsetPage(), and ZeroSUBTRANSPage().

264 {
265  SlruShared shared = ctl->shared;
266  int slotno;
267 
268  /* Find a suitable buffer slot for the page */
269  slotno = SlruSelectLRUPage(ctl, pageno);
270  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
271  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
272  !shared->page_dirty[slotno]) ||
273  shared->page_number[slotno] == pageno);
274 
275  /* Mark the slot as containing this page */
276  shared->page_number[slotno] = pageno;
277  shared->page_status[slotno] = SLRU_PAGE_VALID;
278  shared->page_dirty[slotno] = true;
279  SlruRecentlyUsed(shared, slotno);
280 
281  /* Set the buffer to zeroes */
282  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
283 
284  /* Set the LSNs for this new page to zero */
285  SimpleLruZeroLSNs(ctl, slotno);
286 
287  /* Assume this page is now the latest active page */
288  shared->latest_page_number = pageno;
289 
290  return slotno;
291 }
int * page_number
Definition: slru.h:69
int latest_page_number
Definition: slru.h:99
char ** page_buffer
Definition: slru.h:66
#define MemSet(start, val, len)
Definition: c.h:858
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:304
SlruPageStatus * page_status
Definition: slru.h:67
#define Assert(condition)
Definition: c.h:676
static int SlruSelectLRUPage(SlruCtl ctl, int pageno)
Definition: slru.c:967
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:103
void SlruDeleteSegment ( SlruCtl  ctl,
int  segno 
)

Definition at line 1262 of file slru.c.

References SlruSharedData::ControlLock, DEBUG2, SlruCtlData::Dir, ereport, errmsg(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, NULL, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalWritePage(), snprintf(), and unlink().

Referenced by PerformMembersTruncation().

1263 {
1264  SlruShared shared = ctl->shared;
1265  int slotno;
1266  char path[MAXPGPATH];
1267  bool did_write;
1268 
1269  /* Clean out any possibly existing references to the segment. */
1271 restart:
1272  did_write = false;
1273  for (slotno = 0; slotno < shared->num_slots; slotno++)
1274  {
1275  int pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1276 
1277  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1278  continue;
1279 
1280  /* not the segment we're looking for */
1281  if (pagesegno != segno)
1282  continue;
1283 
1284  /* If page is clean, just change state to EMPTY (expected case). */
1285  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1286  !shared->page_dirty[slotno])
1287  {
1288  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1289  continue;
1290  }
1291 
1292  /* Same logic as SimpleLruTruncate() */
1293  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1294  SlruInternalWritePage(ctl, slotno, NULL);
1295  else
1296  SimpleLruWaitIO(ctl, slotno);
1297 
1298  did_write = true;
1299  }
1300 
1301  /*
1302  * Be extra careful and re-check. The IO functions release the control
1303  * lock, so new pages could have been read in.
1304  */
1305  if (did_write)
1306  goto restart;
1307 
1308  snprintf(path, MAXPGPATH, "%s/%04X", ctl->Dir, segno);
1309  ereport(DEBUG2,
1310  (errmsg("removing file \"%s\"", path)));
1311  unlink(path);
1312 
1313  LWLockRelease(shared->ControlLock);
1314 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
SlruPageStatus * page_status
Definition: slru.h:67
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
char Dir[64]
Definition: slru.h:134
#define NULL
Definition: c.h:229
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
int num_slots
Definition: slru.h:60
int errmsg(const char *fmt,...)
Definition: elog.c:797
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
static void SlruInternalDeleteSegment ( SlruCtl  ctl,
char *  filename 
)
static

Definition at line 1248 of file slru.c.

References DEBUG2, SlruCtlData::Dir, ereport, errmsg(), MAXPGPATH, snprintf(), and unlink().

Referenced by SlruScanDirCbDeleteAll(), and SlruScanDirCbDeleteCutoff().

1249 {
1250  char path[MAXPGPATH];
1251 
1252  snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
1253  ereport(DEBUG2,
1254  (errmsg("removing file \"%s\"", path)));
1255  unlink(path);
1256 }
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
char Dir[64]
Definition: slru.h:134
static char * filename
Definition: pg_dumpall.c:90
int errmsg(const char *fmt,...)
Definition: elog.c:797
static void SlruInternalWritePage ( SlruCtl  ctl,
int  slotno,
SlruFlush  fdata 
)
static

Definition at line 507 of file slru.c.

References Assert, SlruSharedData::buffer_locks, CloseTransientFile(), SlruSharedData::ControlLock, SlruFlushData::fd, i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruFlushData::num_files, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruFlush(), SimpleLruTruncate(), SimpleLruWritePage(), SlruDeleteSegment(), and SlruSelectLRUPage().

508 {
509  SlruShared shared = ctl->shared;
510  int pageno = shared->page_number[slotno];
511  bool ok;
512 
513  /* If a write is in progress, wait for it to finish */
514  while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
515  shared->page_number[slotno] == pageno)
516  {
517  SimpleLruWaitIO(ctl, slotno);
518  }
519 
520  /*
521  * Do nothing if page is not dirty, or if buffer no longer contains the
522  * same page we were called for.
523  */
524  if (!shared->page_dirty[slotno] ||
525  shared->page_status[slotno] != SLRU_PAGE_VALID ||
526  shared->page_number[slotno] != pageno)
527  return;
528 
529  /*
530  * Mark the slot write-busy, and clear the dirtybit. After this point, a
531  * transaction status update on this page will mark it dirty again.
532  */
533  shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
534  shared->page_dirty[slotno] = false;
535 
536  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
537  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
538 
539  /* Release control lock while doing I/O */
540  LWLockRelease(shared->ControlLock);
541 
542  /* Do the write */
543  ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
544 
545  /* If we failed, and we're in a flush, better close the files */
546  if (!ok && fdata)
547  {
548  int i;
549 
550  for (i = 0; i < fdata->num_files; i++)
551  CloseTransientFile(fdata->fd[i]);
552  }
553 
554  /* Re-acquire control lock and update page state */
556 
557  Assert(shared->page_number[slotno] == pageno &&
558  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
559 
560  /* If we failed to write, mark the page dirty again */
561  if (!ok)
562  shared->page_dirty[slotno] = true;
563 
564  shared->page_status[slotno] = SLRU_PAGE_VALID;
565 
566  LWLockRelease(&shared->buffer_locks[slotno].lock);
567 
568  /* Now it's okay to ereport if we failed */
569  if (!ok)
571 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
Definition: slru.c:719
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:893
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
SlruPageStatus * page_status
Definition: slru.h:67
LWLockPadded * buffer_locks
Definition: slru.h:104
#define InvalidTransactionId
Definition: transam.h:31
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
int CloseTransientFile(int fd)
Definition: fd.c:2305
int num_files
Definition: slru.c:77
LWLock lock
Definition: lwlock.h:79
#define Assert(condition)
Definition: c.h:676
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
bool * page_dirty
Definition: slru.h:68
int i
SlruShared shared
Definition: slru.h:115
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:78
static bool SlruPhysicalReadPage ( SlruCtl  ctl,
int  pageno,
int  slotno 
)
static

Definition at line 639 of file slru.c.

References CloseTransientFile(), ereport, errmsg(), fd(), InRecovery, LOG, MAXPGPATH, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pgstat_report_wait_end(), pgstat_report_wait_start(), read, SlruFlushData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SlruFileName, and WAIT_EVENT_SLRU_READ.

Referenced by SimpleLruReadPage().

640 {
641  SlruShared shared = ctl->shared;
642  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
643  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
644  int offset = rpageno * BLCKSZ;
645  char path[MAXPGPATH];
646  int fd;
647 
648  SlruFileName(ctl, path, segno);
649 
650  /*
651  * In a crash-and-restart situation, it's possible for us to receive
652  * commands to set the commit status of transactions whose bits are in
653  * already-truncated segments of the commit log (see notes in
654  * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
655  * where the file doesn't exist, and return zeroes instead.
656  */
657  fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
658  if (fd < 0)
659  {
660  if (errno != ENOENT || !InRecovery)
661  {
663  slru_errno = errno;
664  return false;
665  }
666 
667  ereport(LOG,
668  (errmsg("file \"%s\" doesn't exist, reading as zeroes",
669  path)));
670  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
671  return true;
672  }
673 
674  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
675  {
677  slru_errno = errno;
678  CloseTransientFile(fd);
679  return false;
680  }
681 
682  errno = 0;
684  if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
685  {
688  slru_errno = errno;
689  CloseTransientFile(fd);
690  return false;
691  }
693 
694  if (CloseTransientFile(fd))
695  {
697  slru_errno = errno;
698  return false;
699  }
700 
701  return true;
702 }
char ** page_buffer
Definition: slru.h:66
bool InRecovery
Definition: xlog.c:192
#define MemSet(start, val, len)
Definition: c.h:858
static SlruErrorCause slru_errcause
Definition: slru.c:123
#define LOG
Definition: elog.h:26
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1039
#define MAXPGPATH
int OpenTransientFile(FileName fileName, int fileFlags, int fileMode)
Definition: fd.c:2144
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1235
#define ereport(elevel, rest)
Definition: elog.h:122
int CloseTransientFile(int fd)
Definition: fd.c:2305
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1211
static int slru_errno
Definition: slru.c:124
int errmsg(const char *fmt,...)
Definition: elog.c:797
SlruShared shared
Definition: slru.h:115
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
#define read(a, b, c)
Definition: win32.h:13
static bool SlruPhysicalWritePage ( SlruCtl  ctl,
int  pageno,
int  slotno,
SlruFlush  fdata 
)
static

Definition at line 719 of file slru.c.

References CloseTransientFile(), SlruCtlData::do_fsync, END_CRIT_SECTION, SlruFlushData::fd, fd(), SlruSharedData::group_lsn, i, SlruSharedData::lsn_groups_per_page, MAX_FLUSH_BUFFERS, MAXPGPATH, NULL, SlruFlushData::num_files, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), SlruFlushData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, SlruFileName, START_CRIT_SECTION, WAIT_EVENT_SLRU_SYNC, WAIT_EVENT_SLRU_WRITE, write, XLogFlush(), and XLogRecPtrIsInvalid.

Referenced by SlruInternalWritePage().

720 {
721  SlruShared shared = ctl->shared;
722  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
723  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
724  int offset = rpageno * BLCKSZ;
725  char path[MAXPGPATH];
726  int fd = -1;
727 
728  /*
729  * Honor the write-WAL-before-data rule, if appropriate, so that we do not
730  * write out data before associated WAL records. This is the same action
731  * performed during FlushBuffer() in the main buffer manager.
732  */
733  if (shared->group_lsn != NULL)
734  {
735  /*
736  * We must determine the largest async-commit LSN for the page. This
737  * is a bit tedious, but since this entire function is a slow path
738  * anyway, it seems better to do this here than to maintain a per-page
739  * LSN variable (which'd need an extra comparison in the
740  * transaction-commit path).
741  */
742  XLogRecPtr max_lsn;
743  int lsnindex,
744  lsnoff;
745 
746  lsnindex = slotno * shared->lsn_groups_per_page;
747  max_lsn = shared->group_lsn[lsnindex++];
748  for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
749  {
750  XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
751 
752  if (max_lsn < this_lsn)
753  max_lsn = this_lsn;
754  }
755 
756  if (!XLogRecPtrIsInvalid(max_lsn))
757  {
758  /*
759  * As noted above, elog(ERROR) is not acceptable here, so if
760  * XLogFlush were to fail, we must PANIC. This isn't much of a
761  * restriction because XLogFlush is just about all critical
762  * section anyway, but let's make sure.
763  */
765  XLogFlush(max_lsn);
767  }
768  }
769 
770  /*
771  * During a Flush, we may already have the desired file open.
772  */
773  if (fdata)
774  {
775  int i;
776 
777  for (i = 0; i < fdata->num_files; i++)
778  {
779  if (fdata->segno[i] == segno)
780  {
781  fd = fdata->fd[i];
782  break;
783  }
784  }
785  }
786 
787  if (fd < 0)
788  {
789  /*
790  * If the file doesn't already exist, we should create it. It is
791  * possible for this to need to happen when writing a page that's not
792  * first in its segment; we assume the OS can cope with that. (Note:
793  * it might seem that it'd be okay to create files only when
794  * SimpleLruZeroPage is called for the first page of a segment.
795  * However, if after a crash and restart the REDO logic elects to
796  * replay the log from a checkpoint before the latest one, then it's
797  * possible that we will get commands to set transaction status of
798  * transactions that have already been truncated from the commit log.
799  * Easiest way to deal with that is to accept references to
800  * nonexistent files here and in SlruPhysicalReadPage.)
801  *
802  * Note: it is possible for more than one backend to be executing this
803  * code simultaneously for different pages of the same file. Hence,
804  * don't use O_EXCL or O_TRUNC or anything like that.
805  */
806  SlruFileName(ctl, path, segno);
807  fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY,
808  S_IRUSR | S_IWUSR);
809  if (fd < 0)
810  {
812  slru_errno = errno;
813  return false;
814  }
815 
816  if (fdata)
817  {
818  if (fdata->num_files < MAX_FLUSH_BUFFERS)
819  {
820  fdata->fd[fdata->num_files] = fd;
821  fdata->segno[fdata->num_files] = segno;
822  fdata->num_files++;
823  }
824  else
825  {
826  /*
827  * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
828  * fall back to treating it as a standalone write.
829  */
830  fdata = NULL;
831  }
832  }
833  }
834 
835  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
836  {
838  slru_errno = errno;
839  if (!fdata)
840  CloseTransientFile(fd);
841  return false;
842  }
843 
844  errno = 0;
846  if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
847  {
849  /* if write didn't set errno, assume problem is no disk space */
850  if (errno == 0)
851  errno = ENOSPC;
853  slru_errno = errno;
854  if (!fdata)
855  CloseTransientFile(fd);
856  return false;
857  }
859 
860  /*
861  * If not part of Flush, need to fsync now. We assume this happens
862  * infrequently enough that it's not a performance issue.
863  */
864  if (!fdata)
865  {
867  if (ctl->do_fsync && pg_fsync(fd))
868  {
871  slru_errno = errno;
872  CloseTransientFile(fd);
873  return false;
874  }
876 
877  if (CloseTransientFile(fd))
878  {
880  slru_errno = errno;
881  return false;
882  }
883  }
884 
885  return true;
886 }
#define write(a, b, c)
Definition: win32.h:14
char ** page_buffer
Definition: slru.h:66
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
static SlruErrorCause slru_errcause
Definition: slru.c:123
int lsn_groups_per_page
Definition: slru.h:81
int segno[MAX_FLUSH_BUFFERS]
Definition: slru.c:79
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2757
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1039
#define MAXPGPATH
#define MAX_FLUSH_BUFFERS
Definition: slru.c:73
int OpenTransientFile(FileName fileName, int fileFlags, int fileMode)
Definition: fd.c:2144
XLogRecPtr * group_lsn
Definition: slru.h:80
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1235
bool do_fsync
Definition: slru.h:121
int CloseTransientFile(int fd)
Definition: fd.c:2305
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
int num_files
Definition: slru.c:77
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1211
static int slru_errno
Definition: slru.c:124
int i
SlruShared shared
Definition: slru.h:115
int pg_fsync(int fd)
Definition: fd.c:333
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:78
static void SlruReportIOError ( SlruCtl  ctl,
int  pageno,
TransactionId  xid 
)
static

Definition at line 893 of file slru.c.

References elog, ereport, errcode_for_file_access(), errdetail(), errmsg(), ERROR, MAXPGPATH, SlruFlushData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName.

Referenced by SimpleLruDoesPhysicalPageExist(), SimpleLruFlush(), SimpleLruReadPage(), and SlruInternalWritePage().

894 {
895  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
896  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
897  int offset = rpageno * BLCKSZ;
898  char path[MAXPGPATH];
899 
900  SlruFileName(ctl, path, segno);
901  errno = slru_errno;
902  switch (slru_errcause)
903  {
904  case SLRU_OPEN_FAILED:
905  ereport(ERROR,
907  errmsg("could not access status of transaction %u", xid),
908  errdetail("Could not open file \"%s\": %m.", path)));
909  break;
910  case SLRU_SEEK_FAILED:
911  ereport(ERROR,
913  errmsg("could not access status of transaction %u", xid),
914  errdetail("Could not seek in file \"%s\" to offset %u: %m.",
915  path, offset)));
916  break;
917  case SLRU_READ_FAILED:
918  ereport(ERROR,
920  errmsg("could not access status of transaction %u", xid),
921  errdetail("Could not read from file \"%s\" at offset %u: %m.",
922  path, offset)));
923  break;
924  case SLRU_WRITE_FAILED:
925  ereport(ERROR,
927  errmsg("could not access status of transaction %u", xid),
928  errdetail("Could not write to file \"%s\" at offset %u: %m.",
929  path, offset)));
930  break;
931  case SLRU_FSYNC_FAILED:
932  ereport(ERROR,
934  errmsg("could not access status of transaction %u", xid),
935  errdetail("Could not fsync file \"%s\": %m.",
936  path)));
937  break;
938  case SLRU_CLOSE_FAILED:
939  ereport(ERROR,
941  errmsg("could not access status of transaction %u", xid),
942  errdetail("Could not close file \"%s\": %m.",
943  path)));
944  break;
945  default:
946  /* can't get here, we trust */
947  elog(ERROR, "unrecognized SimpleLru error cause: %d",
948  (int) slru_errcause);
949  break;
950  }
951 }
static SlruErrorCause slru_errcause
Definition: slru.c:123
#define ERROR
Definition: elog.h:43
#define MAXPGPATH
int errdetail(const char *fmt,...)
Definition: elog.c:873
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
static int slru_errno
Definition: slru.c:124
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
bool SlruScanDirCbDeleteAll ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)

Definition at line 1354 of file slru.c.

References SlruInternalDeleteSegment().

Referenced by AsyncShmemInit(), and DeactivateCommitTs().

1355 {
1357 
1358  return false; /* keep going */
1359 }
static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename)
Definition: slru.c:1248
static char * filename
Definition: pg_dumpall.c:90
static bool SlruScanDirCbDeleteCutoff ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)
static

Definition at line 1339 of file slru.c.

References SlruCtlData::PagePrecedes, and SlruInternalDeleteSegment().

Referenced by SimpleLruTruncate().

1340 {
1341  int cutoffPage = *(int *) data;
1342 
1343  if (ctl->PagePrecedes(segpage, cutoffPage))
1345 
1346  return false; /* keep going */
1347 }
static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename)
Definition: slru.c:1248
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
static char * filename
Definition: pg_dumpall.c:90
bool SlruScanDirCbReportPresence ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)

Definition at line 1322 of file slru.c.

References SlruCtlData::PagePrecedes, and SLRU_PAGES_PER_SEGMENT.

Referenced by TruncateCLOG(), and TruncateCommitTs().

1323 {
1324  int cutoffPage = *(int *) data;
1325 
1326  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1327 
1328  if (ctl->PagePrecedes(segpage, cutoffPage))
1329  return true; /* found one; don't iterate any more */
1330 
1331  return false; /* keep going */
1332 }
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
bool SlruScanDirectory ( SlruCtl  ctl,
SlruScanCallback  callback,
void *  data 
)

Definition at line 1377 of file slru.c.

References AllocateDir(), callback(), dirent::d_name, DEBUG2, SlruCtlData::Dir, elog, FreeDir(), NULL, ReadDir(), and SLRU_PAGES_PER_SEGMENT.

Referenced by AsyncShmemInit(), DeactivateCommitTs(), SimpleLruTruncate(), TruncateCLOG(), TruncateCommitTs(), and TruncateMultiXact().

1378 {
1379  bool retval = false;
1380  DIR *cldir;
1381  struct dirent *clde;
1382  int segno;
1383  int segpage;
1384 
1385  cldir = AllocateDir(ctl->Dir);
1386  while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1387  {
1388  size_t len;
1389 
1390  len = strlen(clde->d_name);
1391 
1392  if ((len == 4 || len == 5 || len == 6) &&
1393  strspn(clde->d_name, "0123456789ABCDEF") == len)
1394  {
1395  segno = (int) strtol(clde->d_name, NULL, 16);
1396  segpage = segno * SLRU_PAGES_PER_SEGMENT;
1397 
1398  elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1399  ctl->Dir, clde->d_name);
1400  retval = callback(ctl, clde->d_name, segpage, data);
1401  if (retval)
1402  break;
1403  }
1404  }
1405  FreeDir(cldir);
1406 
1407  return retval;
1408 }
Definition: dirent.h:9
Definition: dirent.c:25
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define DEBUG2
Definition: elog.h:24
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2335
char Dir[64]
Definition: slru.h:134
#define NULL
Definition: c.h:229
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2401
char d_name[MAX_PATH]
Definition: dirent.h:14
#define elog
Definition: elog.h:219
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
int FreeDir(DIR *dir)
Definition: fd.c:2444
static int SlruSelectLRUPage ( SlruCtl  ctl,
int  pageno 
)
static

Definition at line 967 of file slru.c.

References SlruSharedData::cur_lru_count, SlruSharedData::latest_page_number, NULL, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

968 {
969  SlruShared shared = ctl->shared;
970 
971  /* Outer loop handles restart after I/O */
972  for (;;)
973  {
974  int slotno;
975  int cur_count;
976  int bestvalidslot = 0; /* keep compiler quiet */
977  int best_valid_delta = -1;
978  int best_valid_page_number = 0; /* keep compiler quiet */
979  int bestinvalidslot = 0; /* keep compiler quiet */
980  int best_invalid_delta = -1;
981  int best_invalid_page_number = 0; /* keep compiler quiet */
982 
983  /* See if page already has a buffer assigned */
984  for (slotno = 0; slotno < shared->num_slots; slotno++)
985  {
986  if (shared->page_number[slotno] == pageno &&
987  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
988  return slotno;
989  }
990 
991  /*
992  * If we find any EMPTY slot, just select that one. Else choose a
993  * victim page to replace. We normally take the least recently used
994  * valid page, but we will never take the slot containing
995  * latest_page_number, even if it appears least recently used. We
996  * will select a slot that is already I/O busy only if there is no
997  * other choice: a read-busy slot will not be least recently used once
998  * the read finishes, and waiting for an I/O on a write-busy slot is
999  * inferior to just picking some other slot. Testing shows the slot
1000  * we pick instead will often be clean, allowing us to begin a read at
1001  * once.
1002  *
1003  * Normally the page_lru_count values will all be different and so
1004  * there will be a well-defined LRU page. But since we allow
1005  * concurrent execution of SlruRecentlyUsed() within
1006  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1007  * acquire the same lru_count values. In that case we break ties by
1008  * choosing the furthest-back page.
1009  *
1010  * Notice that this next line forcibly advances cur_lru_count to a
1011  * value that is certainly beyond any value that will be in the
1012  * page_lru_count array after the loop finishes. This ensures that
1013  * the next execution of SlruRecentlyUsed will mark the page newly
1014  * used, even if it's for a page that has the current counter value.
1015  * That gets us back on the path to having good data when there are
1016  * multiple pages with the same lru_count.
1017  */
1018  cur_count = (shared->cur_lru_count)++;
1019  for (slotno = 0; slotno < shared->num_slots; slotno++)
1020  {
1021  int this_delta;
1022  int this_page_number;
1023 
1024  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1025  return slotno;
1026  this_delta = cur_count - shared->page_lru_count[slotno];
1027  if (this_delta < 0)
1028  {
1029  /*
1030  * Clean up in case shared updates have caused cur_count
1031  * increments to get "lost". We back off the page counts,
1032  * rather than trying to increase cur_count, to avoid any
1033  * question of infinite loops or failure in the presence of
1034  * wrapped-around counts.
1035  */
1036  shared->page_lru_count[slotno] = cur_count;
1037  this_delta = 0;
1038  }
1039  this_page_number = shared->page_number[slotno];
1040  if (this_page_number == shared->latest_page_number)
1041  continue;
1042  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1043  {
1044  if (this_delta > best_valid_delta ||
1045  (this_delta == best_valid_delta &&
1046  ctl->PagePrecedes(this_page_number,
1047  best_valid_page_number)))
1048  {
1049  bestvalidslot = slotno;
1050  best_valid_delta = this_delta;
1051  best_valid_page_number = this_page_number;
1052  }
1053  }
1054  else
1055  {
1056  if (this_delta > best_invalid_delta ||
1057  (this_delta == best_invalid_delta &&
1058  ctl->PagePrecedes(this_page_number,
1059  best_invalid_page_number)))
1060  {
1061  bestinvalidslot = slotno;
1062  best_invalid_delta = this_delta;
1063  best_invalid_page_number = this_page_number;
1064  }
1065  }
1066  }
1067 
1068  /*
1069  * If all pages (except possibly the latest one) are I/O busy, we'll
1070  * have to wait for an I/O to complete and then retry. In that
1071  * unhappy case, we choose to wait for the I/O on the least recently
1072  * used slot, on the assumption that it was likely initiated first of
1073  * all the I/Os in progress and may therefore finish first.
1074  */
1075  if (best_valid_delta < 0)
1076  {
1077  SimpleLruWaitIO(ctl, bestinvalidslot);
1078  continue;
1079  }
1080 
1081  /*
1082  * If the selected page is clean, we're set.
1083  */
1084  if (!shared->page_dirty[bestvalidslot])
1085  return bestvalidslot;
1086 
1087  /*
1088  * Write the page.
1089  */
1090  SlruInternalWritePage(ctl, bestvalidslot, NULL);
1091 
1092  /*
1093  * Now loop back and try again. This is the easiest way of dealing
1094  * with corner cases such as the victim page being re-dirtied while we
1095  * wrote it.
1096  */
1097  }
1098 }
int * page_number
Definition: slru.h:69
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
int latest_page_number
Definition: slru.h:99
int cur_lru_count
Definition: slru.h:92
SlruPageStatus * page_status
Definition: slru.h:67
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
int * page_lru_count
Definition: slru.h:70
#define NULL
Definition: c.h:229
int num_slots
Definition: slru.h:60
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115

Variable Documentation

int slru_errno
static