PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
slru.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "miscadmin.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruFlushData
 

Macros

#define SlruFileName(ctl, path, seg)   snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
 
#define MAX_FLUSH_BUFFERS   16
 
#define SlruRecentlyUsed(shared, slotno)
 

Typedefs

typedef struct SlruFlushData SlruFlushData
 
typedef struct SlruFlushDataSlruFlush
 

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED, SLRU_SEEK_FAILED, SLRU_READ_FAILED, SLRU_WRITE_FAILED,
  SLRU_FSYNC_FAILED, SLRU_CLOSE_FAILED
}
 

Functions

static void SimpleLruZeroLSNs (SlruCtl ctl, int slotno)
 
static void SimpleLruWaitIO (SlruCtl ctl, int slotno)
 
static void SlruInternalWritePage (SlruCtl ctl, int slotno, SlruFlush fdata)
 
static bool SlruPhysicalReadPage (SlruCtl ctl, int pageno, int slotno)
 
static bool SlruPhysicalWritePage (SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 
static void SlruReportIOError (SlruCtl ctl, int pageno, TransactionId xid)
 
static int SlruSelectLRUPage (SlruCtl ctl, int pageno)
 
static bool SlruScanDirCbDeleteCutoff (SlruCtl ctl, char *filename, int segpage, void *data)
 
static void SlruInternalDeleteSegment (SlruCtl ctl, char *filename)
 
Size SimpleLruShmemSize (int nslots, int nlsns)
 
void SimpleLruInit (SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id)
 
int SimpleLruZeroPage (SlruCtl ctl, int pageno)
 
int SimpleLruReadPage (SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
 
int SimpleLruReadPage_ReadOnly (SlruCtl ctl, int pageno, TransactionId xid)
 
void SimpleLruWritePage (SlruCtl ctl, int slotno)
 
bool SimpleLruDoesPhysicalPageExist (SlruCtl ctl, int pageno)
 
void SimpleLruFlush (SlruCtl ctl, bool allow_redirtied)
 
void SimpleLruTruncate (SlruCtl ctl, int cutoffPage)
 
void SlruDeleteSegment (SlruCtl ctl, int segno)
 
bool SlruScanDirCbReportPresence (SlruCtl ctl, char *filename, int segpage, void *data)
 
bool SlruScanDirCbDeleteAll (SlruCtl ctl, char *filename, int segpage, void *data)
 
bool SlruScanDirectory (SlruCtl ctl, SlruScanCallback callback, void *data)
 

Variables

static SlruErrorCause slru_errcause
 
static int slru_errno
 

Macro Definition Documentation

#define MAX_FLUSH_BUFFERS   16

Definition at line 73 of file slru.c.

Referenced by SlruPhysicalWritePage().

#define SlruFileName (   ctl,
  path,
  seg 
)    snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
#define SlruRecentlyUsed (   shared,
  slotno 
)
Value:
do { \
int new_lru_count = (shared)->cur_lru_count; \
if (new_lru_count != (shared)->page_lru_count[slotno]) { \
(shared)->cur_lru_count = ++new_lru_count; \
(shared)->page_lru_count[slotno] = new_lru_count; \
} \
} while (0)

Definition at line 103 of file slru.c.

Referenced by SimpleLruReadPage(), SimpleLruReadPage_ReadOnly(), and SimpleLruZeroPage().

Typedef Documentation

Definition at line 82 of file slru.c.

Enumeration Type Documentation

Enumerator
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 113 of file slru.c.

Function Documentation

bool SimpleLruDoesPhysicalPageExist ( SlruCtl  ctl,
int  pageno 
)

Definition at line 590 of file slru.c.

References CloseTransientFile(), endpos, fd(), MAXPGPATH, OpenTransientFile(), PG_BINARY, SlruFlushData::segno, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SlruFileName, and SlruReportIOError().

Referenced by ActivateCommitTs(), find_multixact_start(), and MaybeExtendOffsetSlru().

591 {
592  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
593  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
594  int offset = rpageno * BLCKSZ;
595  char path[MAXPGPATH];
596  int fd;
597  bool result;
598  off_t endpos;
599 
600  SlruFileName(ctl, path, segno);
601 
602  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
603  if (fd < 0)
604  {
605  /* expected: file doesn't exist */
606  if (errno == ENOENT)
607  return false;
608 
609  /* report error normally */
611  slru_errno = errno;
612  SlruReportIOError(ctl, pageno, 0);
613  }
614 
615  if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
616  {
618  slru_errno = errno;
619  SlruReportIOError(ctl, pageno, 0);
620  }
621 
622  result = endpos >= (off_t) (offset + BLCKSZ);
623 
624  CloseTransientFile(fd);
625  return result;
626 }
static SlruErrorCause slru_errcause
Definition: slru.c:123
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:892
#define PG_BINARY
Definition: c.h:1044
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2167
#define MAXPGPATH
static XLogRecPtr endpos
Definition: pg_receivewal.c:45
int CloseTransientFile(int fd)
Definition: fd.c:2337
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
static int slru_errno
Definition: slru.c:124
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
void SimpleLruFlush ( SlruCtl  ctl,
bool  allow_redirtied 
)

Definition at line 1103 of file slru.c.

References Assert, CloseTransientFile(), SlruSharedData::ControlLock, SlruCtlData::do_fsync, SlruFlushData::fd, i, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruFlushData::num_files, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), SlruFlushData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalWritePage(), SlruReportIOError(), and WAIT_EVENT_SLRU_FLUSH_SYNC.

Referenced by CheckPointCLOG(), CheckPointCommitTs(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), find_multixact_start(), ShutdownCLOG(), ShutdownCommitTs(), ShutdownMultiXact(), and ShutdownSUBTRANS().

1104 {
1105  SlruShared shared = ctl->shared;
1106  SlruFlushData fdata;
1107  int slotno;
1108  int pageno = 0;
1109  int i;
1110  bool ok;
1111 
1112  /*
1113  * Find and write dirty pages
1114  */
1115  fdata.num_files = 0;
1116 
1118 
1119  for (slotno = 0; slotno < shared->num_slots; slotno++)
1120  {
1121  SlruInternalWritePage(ctl, slotno, &fdata);
1122 
1123  /*
1124  * In some places (e.g. checkpoints), we cannot assert that the slot
1125  * is clean now, since another process might have re-dirtied it
1126  * already. That's okay.
1127  */
1128  Assert(allow_redirtied ||
1129  shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1130  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1131  !shared->page_dirty[slotno]));
1132  }
1133 
1134  LWLockRelease(shared->ControlLock);
1135 
1136  /*
1137  * Now fsync and close any files that were open
1138  */
1139  ok = true;
1140  for (i = 0; i < fdata.num_files; i++)
1141  {
1143  if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
1144  {
1146  slru_errno = errno;
1147  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1148  ok = false;
1149  }
1151 
1152  if (CloseTransientFile(fdata.fd[i]))
1153  {
1155  slru_errno = errno;
1156  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1157  ok = false;
1158  }
1159  }
1160  if (!ok)
1162 }
LWLock * ControlLock
Definition: slru.h:57
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
static SlruErrorCause slru_errcause
Definition: slru.c:123
int segno[MAX_FLUSH_BUFFERS]
Definition: slru.c:79
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:892
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
SlruPageStatus * page_status
Definition: slru.h:67
#define InvalidTransactionId
Definition: transam.h:31
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1244
bool do_fsync
Definition: slru.h:121
int CloseTransientFile(int fd)
Definition: fd.c:2337
int num_files
Definition: slru.c:77
#define Assert(condition)
Definition: c.h:681
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1220
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
int num_slots
Definition: slru.h:60
static int slru_errno
Definition: slru.c:124
bool * page_dirty
Definition: slru.h:68
int i
SlruShared shared
Definition: slru.h:115
int pg_fsync(int fd)
Definition: fd.c:338
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:78
void SimpleLruInit ( SlruCtl  ctl,
const char *  name,
int  nslots,
int  nlsns,
LWLock ctllock,
const char *  subdir,
int  tranche_id 
)

Definition at line 165 of file slru.c.

References Assert, SlruSharedData::buffer_locks, BUFFERALIGN, SlruSharedData::ControlLock, SlruSharedData::cur_lru_count, SlruCtlData::Dir, SlruCtlData::do_fsync, SlruSharedData::group_lsn, IsUnderPostmaster, LWLockPadded::lock, SlruSharedData::lsn_groups_per_page, SlruSharedData::lwlock_tranche_id, SlruSharedData::lwlock_tranche_name, LWLockInitialize(), LWLockRegisterTranche(), MAXALIGN, SlruSharedData::num_slots, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, ShmemInitStruct(), SimpleLruShmemSize(), SLRU_MAX_NAME_LENGTH, SLRU_PAGE_EMPTY, strlcpy(), and StrNCpy.

Referenced by AsyncShmemInit(), CLOGShmemInit(), CommitTsShmemInit(), MultiXactShmemInit(), OldSerXidInit(), and SUBTRANSShmemInit().

167 {
168  SlruShared shared;
169  bool found;
170 
171  shared = (SlruShared) ShmemInitStruct(name,
172  SimpleLruShmemSize(nslots, nlsns),
173  &found);
174 
175  if (!IsUnderPostmaster)
176  {
177  /* Initialize locks and shared memory area */
178  char *ptr;
179  Size offset;
180  int slotno;
181 
182  Assert(!found);
183 
184  memset(shared, 0, sizeof(SlruSharedData));
185 
186  shared->ControlLock = ctllock;
187 
188  shared->num_slots = nslots;
189  shared->lsn_groups_per_page = nlsns;
190 
191  shared->cur_lru_count = 0;
192 
193  /* shared->latest_page_number will be set later */
194 
195  ptr = (char *) shared;
196  offset = MAXALIGN(sizeof(SlruSharedData));
197  shared->page_buffer = (char **) (ptr + offset);
198  offset += MAXALIGN(nslots * sizeof(char *));
199  shared->page_status = (SlruPageStatus *) (ptr + offset);
200  offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
201  shared->page_dirty = (bool *) (ptr + offset);
202  offset += MAXALIGN(nslots * sizeof(bool));
203  shared->page_number = (int *) (ptr + offset);
204  offset += MAXALIGN(nslots * sizeof(int));
205  shared->page_lru_count = (int *) (ptr + offset);
206  offset += MAXALIGN(nslots * sizeof(int));
207 
208  /* Initialize LWLocks */
209  shared->buffer_locks = (LWLockPadded *) (ptr + offset);
210  offset += MAXALIGN(nslots * sizeof(LWLockPadded));
211 
212  if (nlsns > 0)
213  {
214  shared->group_lsn = (XLogRecPtr *) (ptr + offset);
215  offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
216  }
217 
218  Assert(strlen(name) + 1 < SLRU_MAX_NAME_LENGTH);
220  shared->lwlock_tranche_id = tranche_id;
221 
222  ptr += BUFFERALIGN(offset);
223  for (slotno = 0; slotno < nslots; slotno++)
224  {
225  LWLockInitialize(&shared->buffer_locks[slotno].lock,
226  shared->lwlock_tranche_id);
227 
228  shared->page_buffer[slotno] = ptr;
229  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
230  shared->page_dirty[slotno] = false;
231  shared->page_lru_count[slotno] = 0;
232  ptr += BLCKSZ;
233  }
234 
235  /* Should fit to estimated shmem size */
236  Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
237  }
238  else
239  Assert(found);
240 
241  /* Register SLRU tranche in the main tranches array */
243  shared->lwlock_tranche_name);
244 
245  /*
246  * Initialize the unshared control struct, including directory path. We
247  * assume caller set PagePrecedes.
248  */
249  ctl->shared = shared;
250  ctl->do_fsync = true; /* default behavior */
251  StrNCpy(ctl->Dir, subdir, sizeof(ctl->Dir));
252 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
SlruPageStatus
Definition: slru.h:44
char ** page_buffer
Definition: slru.h:66
int cur_lru_count
Definition: slru.h:92
int lsn_groups_per_page
Definition: slru.h:81
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:145
SlruPageStatus * page_status
Definition: slru.h:67
char lwlock_tranche_name[SLRU_MAX_NAME_LENGTH]
Definition: slru.h:103
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
bool IsUnderPostmaster
Definition: globals.c:101
LWLockPadded * buffer_locks
Definition: slru.h:104
XLogRecPtr * group_lsn
Definition: slru.h:80
bool do_fsync
Definition: slru.h:121
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:673
SlruSharedData * SlruShared
Definition: slru.h:107
char Dir[64]
Definition: slru.h:134
#define SLRU_MAX_NAME_LENGTH
Definition: slru.h:36
LWLock lock
Definition: lwlock.h:79
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int * page_lru_count
Definition: slru.h:70
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:681
#define StrNCpy(dst, src, len)
Definition: c.h:836
size_t Size
Definition: c.h:350
#define MAXALIGN(LEN)
Definition: c.h:576
void LWLockRegisterTranche(int tranche_id, char *tranche_name)
Definition: lwlock.c:598
int num_slots
Definition: slru.h:60
const char * name
Definition: encode.c:521
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define BUFFERALIGN(LEN)
Definition: c.h:578
int lwlock_tranche_id
Definition: slru.h:102
int SimpleLruReadPage ( SlruCtl  ctl,
int  pageno,
bool  write_ok,
TransactionId  xid 
)

Definition at line 375 of file slru.c.

References Assert, SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruPhysicalReadPage(), SlruRecentlyUsed, SlruReportIOError(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), GetMultiXactIdMembers(), OldSerXidAdd(), RecordNewMultiXact(), SetXidCommitTsInPage(), SimpleLruReadPage_ReadOnly(), SubTransSetParent(), TransactionIdSetPageStatusInternal(), TrimCLOG(), and TrimMultiXact().

377 {
378  SlruShared shared = ctl->shared;
379 
380  /* Outer loop handles restart if we must wait for someone else's I/O */
381  for (;;)
382  {
383  int slotno;
384  bool ok;
385 
386  /* See if page already is in memory; if not, pick victim slot */
387  slotno = SlruSelectLRUPage(ctl, pageno);
388 
389  /* Did we find the page in memory? */
390  if (shared->page_number[slotno] == pageno &&
391  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
392  {
393  /*
394  * If page is still being read in, we must wait for I/O. Likewise
395  * if the page is being written and the caller said that's not OK.
396  */
397  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
398  (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
399  !write_ok))
400  {
401  SimpleLruWaitIO(ctl, slotno);
402  /* Now we must recheck state from the top */
403  continue;
404  }
405  /* Otherwise, it's ready to use */
406  SlruRecentlyUsed(shared, slotno);
407  return slotno;
408  }
409 
410  /* We found no match; assert we selected a freeable slot */
411  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
412  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
413  !shared->page_dirty[slotno]));
414 
415  /* Mark the slot read-busy */
416  shared->page_number[slotno] = pageno;
417  shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
418  shared->page_dirty[slotno] = false;
419 
420  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
421  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
422 
423  /* Release control lock while doing I/O */
424  LWLockRelease(shared->ControlLock);
425 
426  /* Do the read */
427  ok = SlruPhysicalReadPage(ctl, pageno, slotno);
428 
429  /* Set the LSNs for this newly read-in page to zero */
430  SimpleLruZeroLSNs(ctl, slotno);
431 
432  /* Re-acquire control lock and update page state */
434 
435  Assert(shared->page_number[slotno] == pageno &&
436  shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
437  !shared->page_dirty[slotno]);
438 
439  shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
440 
441  LWLockRelease(&shared->buffer_locks[slotno].lock);
442 
443  /* Now it's okay to ereport if we failed */
444  if (!ok)
445  SlruReportIOError(ctl, pageno, xid);
446 
447  SlruRecentlyUsed(shared, slotno);
448  return slotno;
449  }
450 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:304
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:892
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
SlruPageStatus * page_status
Definition: slru.h:67
LWLockPadded * buffer_locks
Definition: slru.h:104
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
LWLock lock
Definition: lwlock.h:79
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
Definition: slru.c:639
#define Assert(condition)
Definition: c.h:681
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
static int SlruSelectLRUPage(SlruCtl ctl, int pageno)
Definition: slru.c:966
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:103
int SimpleLruReadPage_ReadOnly ( SlruCtl  ctl,
int  pageno,
TransactionId  xid 
)

Definition at line 467 of file slru.c.

References SlruSharedData::ControlLock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruReadPage(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, and SlruRecentlyUsed.

Referenced by asyncQueueReadAllNotifications(), find_multixact_start(), OldSerXidGetMinConflictCommitSeqNo(), SubTransGetParent(), TransactionIdGetCommitTsData(), and TransactionIdGetStatus().

468 {
469  SlruShared shared = ctl->shared;
470  int slotno;
471 
472  /* Try to find the page while holding only shared lock */
474 
475  /* See if page is already in a buffer */
476  for (slotno = 0; slotno < shared->num_slots; slotno++)
477  {
478  if (shared->page_number[slotno] == pageno &&
479  shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
480  shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
481  {
482  /* See comments for SlruRecentlyUsed macro */
483  SlruRecentlyUsed(shared, slotno);
484  return slotno;
485  }
486  }
487 
488  /* No luck, so switch to normal exclusive lock and do regular read */
489  LWLockRelease(shared->ControlLock);
491 
492  return SimpleLruReadPage(ctl, pageno, true, xid);
493 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
SlruPageStatus * page_status
Definition: slru.h:67
int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
Definition: slru.c:375
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
int num_slots
Definition: slru.h:60
SlruShared shared
Definition: slru.h:115
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:103
Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)

Definition at line 145 of file slru.c.

References BUFFERALIGN, and MAXALIGN.

Referenced by AsyncShmemSize(), CLOGShmemSize(), CommitTsShmemSize(), MultiXactShmemSize(), PredicateLockShmemSize(), SimpleLruInit(), and SUBTRANSShmemSize().

146 {
147  Size sz;
148 
149  /* we assume nslots isn't so large as to risk overflow */
150  sz = MAXALIGN(sizeof(SlruSharedData));
151  sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
152  sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
153  sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
154  sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
155  sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
156  sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
157 
158  if (nlsns > 0)
159  sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
160 
161  return BUFFERALIGN(sz) + BLCKSZ * nslots;
162 }
SlruPageStatus
Definition: slru.h:44
uint64 XLogRecPtr
Definition: xlogdefs.h:21
size_t Size
Definition: c.h:350
#define MAXALIGN(LEN)
Definition: c.h:576
#define BUFFERALIGN(LEN)
Definition: c.h:578
void SimpleLruTruncate ( SlruCtl  ctl,
int  cutoffPage 
)

Definition at line 1168 of file slru.c.

References SlruSharedData::ControlLock, SlruCtlData::Dir, ereport, errmsg(), SlruSharedData::latest_page_number, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), commit_ts_redo(), PerformOffsetsTruncation(), TruncateCLOG(), TruncateCommitTs(), and TruncateSUBTRANS().

1169 {
1170  SlruShared shared = ctl->shared;
1171  int slotno;
1172 
1173  /*
1174  * The cutoff point is the start of the segment containing cutoffPage.
1175  */
1176  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1177 
1178  /*
1179  * Scan shared memory and remove any pages preceding the cutoff page, to
1180  * ensure we won't rewrite them later. (Since this is normally called in
1181  * or just after a checkpoint, any dirty pages should have been flushed
1182  * already ... we're just being extra careful here.)
1183  */
1185 
1186 restart:;
1187 
1188  /*
1189  * While we are holding the lock, make an important safety check: the
1190  * planned cutoff point must be <= the current endpoint page. Otherwise we
1191  * have already wrapped around, and proceeding with the truncation would
1192  * risk removing the current segment.
1193  */
1194  if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
1195  {
1196  LWLockRelease(shared->ControlLock);
1197  ereport(LOG,
1198  (errmsg("could not truncate directory \"%s\": apparent wraparound",
1199  ctl->Dir)));
1200  return;
1201  }
1202 
1203  for (slotno = 0; slotno < shared->num_slots; slotno++)
1204  {
1205  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1206  continue;
1207  if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1208  continue;
1209 
1210  /*
1211  * If page is clean, just change state to EMPTY (expected case).
1212  */
1213  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1214  !shared->page_dirty[slotno])
1215  {
1216  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1217  continue;
1218  }
1219 
1220  /*
1221  * Hmm, we have (or may have) I/O operations acting on the page, so
1222  * we've got to wait for them to finish and then start again. This is
1223  * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1224  * wouldn't it be OK to just discard it without writing it? For now,
1225  * keep the logic the same as it was.)
1226  */
1227  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1228  SlruInternalWritePage(ctl, slotno, NULL);
1229  else
1230  SimpleLruWaitIO(ctl, slotno);
1231  goto restart;
1232  }
1233 
1234  LWLockRelease(shared->ControlLock);
1235 
1236  /* Now we can remove the old segment(s) */
1237  (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1238 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
int latest_page_number
Definition: slru.h:99
#define LOG
Definition: elog.h:26
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
SlruPageStatus * page_status
Definition: slru.h:67
#define ereport(elevel, rest)
Definition: elog.h:122
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1338
char Dir[64]
Definition: slru.h:134
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1376
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
int num_slots
Definition: slru.h:60
int errmsg(const char *fmt,...)
Definition: elog.c:797
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
static void SimpleLruWaitIO ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 321 of file slru.c.

References SlruSharedData::buffer_locks, SlruSharedData::ControlLock, LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlruCtlData::shared, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruDeleteSegment(), SlruInternalWritePage(), and SlruSelectLRUPage().

322 {
323  SlruShared shared = ctl->shared;
324 
325  /* See notes at top of file */
326  LWLockRelease(shared->ControlLock);
327  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
328  LWLockRelease(&shared->buffer_locks[slotno].lock);
330 
331  /*
332  * If the slot is still in an io-in-progress state, then either someone
333  * already started a new I/O on the slot, or a previous I/O failed and
334  * neglected to reset the page state. That shouldn't happen, really, but
335  * it seems worth a few extra cycles to check and recover from it. We can
336  * cheaply test for failure by seeing if the buffer lock is still held (we
337  * assume that transaction abort would release the lock).
338  */
339  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
340  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
341  {
342  if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
343  {
344  /* indeed, the I/O must have failed */
345  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
346  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
347  else /* write_in_progress */
348  {
349  shared->page_status[slotno] = SLRU_PAGE_VALID;
350  shared->page_dirty[slotno] = true;
351  }
352  LWLockRelease(&shared->buffer_locks[slotno].lock);
353  }
354  }
355 }
LWLock * ControlLock
Definition: slru.h:57
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
SlruPageStatus * page_status
Definition: slru.h:67
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1289
LWLockPadded * buffer_locks
Definition: slru.h:104
LWLock lock
Definition: lwlock.h:79
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
void SimpleLruWritePage ( SlruCtl  ctl,
int  slotno 
)

Definition at line 578 of file slru.c.

References SlruInternalWritePage().

Referenced by ActivateCommitTs(), AsyncShmemInit(), BootStrapCLOG(), BootStrapMultiXact(), BootStrapSUBTRANS(), clog_redo(), commit_ts_redo(), MaybeExtendOffsetSlru(), and multixact_redo().

579 {
580  SlruInternalWritePage(ctl, slotno, NULL);
581 }
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
static void SimpleLruZeroLSNs ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 304 of file slru.c.

References SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, MemSet, and SlruCtlData::shared.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

305 {
306  SlruShared shared = ctl->shared;
307 
308  if (shared->lsn_groups_per_page > 0)
309  MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
310  shared->lsn_groups_per_page * sizeof(XLogRecPtr));
311 }
#define MemSet(start, val, len)
Definition: c.h:863
int lsn_groups_per_page
Definition: slru.h:81
XLogRecPtr * group_lsn
Definition: slru.h:80
uint64 XLogRecPtr
Definition: xlogdefs.h:21
SlruShared shared
Definition: slru.h:115
int SimpleLruZeroPage ( SlruCtl  ctl,
int  pageno 
)

Definition at line 263 of file slru.c.

References Assert, SlruSharedData::latest_page_number, MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruRecentlyUsed, and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), AsyncShmemInit(), OldSerXidAdd(), ZeroCLOGPage(), ZeroCommitTsPage(), ZeroMultiXactMemberPage(), ZeroMultiXactOffsetPage(), and ZeroSUBTRANSPage().

264 {
265  SlruShared shared = ctl->shared;
266  int slotno;
267 
268  /* Find a suitable buffer slot for the page */
269  slotno = SlruSelectLRUPage(ctl, pageno);
270  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
271  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
272  !shared->page_dirty[slotno]) ||
273  shared->page_number[slotno] == pageno);
274 
275  /* Mark the slot as containing this page */
276  shared->page_number[slotno] = pageno;
277  shared->page_status[slotno] = SLRU_PAGE_VALID;
278  shared->page_dirty[slotno] = true;
279  SlruRecentlyUsed(shared, slotno);
280 
281  /* Set the buffer to zeroes */
282  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
283 
284  /* Set the LSNs for this new page to zero */
285  SimpleLruZeroLSNs(ctl, slotno);
286 
287  /* Assume this page is now the latest active page */
288  shared->latest_page_number = pageno;
289 
290  return slotno;
291 }
int * page_number
Definition: slru.h:69
int latest_page_number
Definition: slru.h:99
char ** page_buffer
Definition: slru.h:66
#define MemSet(start, val, len)
Definition: c.h:863
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:304
SlruPageStatus * page_status
Definition: slru.h:67
#define Assert(condition)
Definition: c.h:681
static int SlruSelectLRUPage(SlruCtl ctl, int pageno)
Definition: slru.c:966
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:103
void SlruDeleteSegment ( SlruCtl  ctl,
int  segno 
)

Definition at line 1261 of file slru.c.

References SlruSharedData::ControlLock, DEBUG2, SlruCtlData::Dir, ereport, errmsg(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalWritePage(), and snprintf().

Referenced by PerformMembersTruncation().

1262 {
1263  SlruShared shared = ctl->shared;
1264  int slotno;
1265  char path[MAXPGPATH];
1266  bool did_write;
1267 
1268  /* Clean out any possibly existing references to the segment. */
1270 restart:
1271  did_write = false;
1272  for (slotno = 0; slotno < shared->num_slots; slotno++)
1273  {
1274  int pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1275 
1276  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1277  continue;
1278 
1279  /* not the segment we're looking for */
1280  if (pagesegno != segno)
1281  continue;
1282 
1283  /* If page is clean, just change state to EMPTY (expected case). */
1284  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1285  !shared->page_dirty[slotno])
1286  {
1287  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1288  continue;
1289  }
1290 
1291  /* Same logic as SimpleLruTruncate() */
1292  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1293  SlruInternalWritePage(ctl, slotno, NULL);
1294  else
1295  SimpleLruWaitIO(ctl, slotno);
1296 
1297  did_write = true;
1298  }
1299 
1300  /*
1301  * Be extra careful and re-check. The IO functions release the control
1302  * lock, so new pages could have been read in.
1303  */
1304  if (did_write)
1305  goto restart;
1306 
1307  snprintf(path, MAXPGPATH, "%s/%04X", ctl->Dir, segno);
1308  ereport(DEBUG2,
1309  (errmsg("removing file \"%s\"", path)));
1310  unlink(path);
1311 
1312  LWLockRelease(shared->ControlLock);
1313 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
SlruPageStatus * page_status
Definition: slru.h:67
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
#define ereport(elevel, rest)
Definition: elog.h:122
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
char Dir[64]
Definition: slru.h:134
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
int num_slots
Definition: slru.h:60
int errmsg(const char *fmt,...)
Definition: elog.c:797
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
static void SlruInternalDeleteSegment ( SlruCtl  ctl,
char *  filename 
)
static

Definition at line 1247 of file slru.c.

References DEBUG2, SlruCtlData::Dir, ereport, errmsg(), MAXPGPATH, and snprintf().

Referenced by SlruScanDirCbDeleteAll(), and SlruScanDirCbDeleteCutoff().

1248 {
1249  char path[MAXPGPATH];
1250 
1251  snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
1252  ereport(DEBUG2,
1253  (errmsg("removing file \"%s\"", path)));
1254  unlink(path);
1255 }
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
#define MAXPGPATH
#define DEBUG2
Definition: elog.h:24
#define ereport(elevel, rest)
Definition: elog.h:122
char Dir[64]
Definition: slru.h:134
static char * filename
Definition: pg_dumpall.c:90
int errmsg(const char *fmt,...)
Definition: elog.c:797
static void SlruInternalWritePage ( SlruCtl  ctl,
int  slotno,
SlruFlush  fdata 
)
static

Definition at line 507 of file slru.c.

References Assert, SlruSharedData::buffer_locks, CloseTransientFile(), SlruSharedData::ControlLock, SlruFlushData::fd, i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruFlushData::num_files, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruFlush(), SimpleLruTruncate(), SimpleLruWritePage(), SlruDeleteSegment(), and SlruSelectLRUPage().

508 {
509  SlruShared shared = ctl->shared;
510  int pageno = shared->page_number[slotno];
511  bool ok;
512 
513  /* If a write is in progress, wait for it to finish */
514  while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
515  shared->page_number[slotno] == pageno)
516  {
517  SimpleLruWaitIO(ctl, slotno);
518  }
519 
520  /*
521  * Do nothing if page is not dirty, or if buffer no longer contains the
522  * same page we were called for.
523  */
524  if (!shared->page_dirty[slotno] ||
525  shared->page_status[slotno] != SLRU_PAGE_VALID ||
526  shared->page_number[slotno] != pageno)
527  return;
528 
529  /*
530  * Mark the slot write-busy, and clear the dirtybit. After this point, a
531  * transaction status update on this page will mark it dirty again.
532  */
533  shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
534  shared->page_dirty[slotno] = false;
535 
536  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
537  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
538 
539  /* Release control lock while doing I/O */
540  LWLockRelease(shared->ControlLock);
541 
542  /* Do the write */
543  ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
544 
545  /* If we failed, and we're in a flush, better close the files */
546  if (!ok && fdata)
547  {
548  int i;
549 
550  for (i = 0; i < fdata->num_files; i++)
551  CloseTransientFile(fdata->fd[i]);
552  }
553 
554  /* Re-acquire control lock and update page state */
556 
557  Assert(shared->page_number[slotno] == pageno &&
558  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
559 
560  /* If we failed to write, mark the page dirty again */
561  if (!ok)
562  shared->page_dirty[slotno] = true;
563 
564  shared->page_status[slotno] = SLRU_PAGE_VALID;
565 
566  LWLockRelease(&shared->buffer_locks[slotno].lock);
567 
568  /* Now it's okay to ereport if we failed */
569  if (!ok)
571 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
Definition: slru.c:719
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:892
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1721
SlruPageStatus * page_status
Definition: slru.h:67
LWLockPadded * buffer_locks
Definition: slru.h:104
#define InvalidTransactionId
Definition: transam.h:31
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
int CloseTransientFile(int fd)
Definition: fd.c:2337
int num_files
Definition: slru.c:77
LWLock lock
Definition: lwlock.h:79
#define Assert(condition)
Definition: c.h:681
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1117
bool * page_dirty
Definition: slru.h:68
int i
SlruShared shared
Definition: slru.h:115
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:78
static bool SlruPhysicalReadPage ( SlruCtl  ctl,
int  pageno,
int  slotno 
)
static

Definition at line 639 of file slru.c.

References CloseTransientFile(), ereport, errmsg(), fd(), InRecovery, LOG, MAXPGPATH, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pgstat_report_wait_end(), pgstat_report_wait_start(), read, SlruFlushData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SlruFileName, and WAIT_EVENT_SLRU_READ.

Referenced by SimpleLruReadPage().

640 {
641  SlruShared shared = ctl->shared;
642  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
643  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
644  int offset = rpageno * BLCKSZ;
645  char path[MAXPGPATH];
646  int fd;
647 
648  SlruFileName(ctl, path, segno);
649 
650  /*
651  * In a crash-and-restart situation, it's possible for us to receive
652  * commands to set the commit status of transactions whose bits are in
653  * already-truncated segments of the commit log (see notes in
654  * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
655  * where the file doesn't exist, and return zeroes instead.
656  */
657  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
658  if (fd < 0)
659  {
660  if (errno != ENOENT || !InRecovery)
661  {
663  slru_errno = errno;
664  return false;
665  }
666 
667  ereport(LOG,
668  (errmsg("file \"%s\" doesn't exist, reading as zeroes",
669  path)));
670  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
671  return true;
672  }
673 
674  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
675  {
677  slru_errno = errno;
678  CloseTransientFile(fd);
679  return false;
680  }
681 
682  errno = 0;
684  if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
685  {
688  slru_errno = errno;
689  CloseTransientFile(fd);
690  return false;
691  }
693 
694  if (CloseTransientFile(fd))
695  {
697  slru_errno = errno;
698  return false;
699  }
700 
701  return true;
702 }
char ** page_buffer
Definition: slru.h:66
bool InRecovery
Definition: xlog.c:194
#define MemSet(start, val, len)
Definition: c.h:863
static SlruErrorCause slru_errcause
Definition: slru.c:123
#define LOG
Definition: elog.h:26
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1044
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2167
#define MAXPGPATH
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1244
#define ereport(elevel, rest)
Definition: elog.h:122
int CloseTransientFile(int fd)
Definition: fd.c:2337
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1220
static int slru_errno
Definition: slru.c:124
int errmsg(const char *fmt,...)
Definition: elog.c:797
SlruShared shared
Definition: slru.h:115
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
#define read(a, b, c)
Definition: win32.h:13
static bool SlruPhysicalWritePage ( SlruCtl  ctl,
int  pageno,
int  slotno,
SlruFlush  fdata 
)
static

Definition at line 719 of file slru.c.

References CloseTransientFile(), SlruCtlData::do_fsync, END_CRIT_SECTION, SlruFlushData::fd, fd(), SlruSharedData::group_lsn, i, SlruSharedData::lsn_groups_per_page, MAX_FLUSH_BUFFERS, MAXPGPATH, SlruFlushData::num_files, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), SlruFlushData::segno, SlruCtlData::shared, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, SlruFileName, START_CRIT_SECTION, WAIT_EVENT_SLRU_SYNC, WAIT_EVENT_SLRU_WRITE, write, XLogFlush(), and XLogRecPtrIsInvalid.

Referenced by SlruInternalWritePage().

720 {
721  SlruShared shared = ctl->shared;
722  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
723  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
724  int offset = rpageno * BLCKSZ;
725  char path[MAXPGPATH];
726  int fd = -1;
727 
728  /*
729  * Honor the write-WAL-before-data rule, if appropriate, so that we do not
730  * write out data before associated WAL records. This is the same action
731  * performed during FlushBuffer() in the main buffer manager.
732  */
733  if (shared->group_lsn != NULL)
734  {
735  /*
736  * We must determine the largest async-commit LSN for the page. This
737  * is a bit tedious, but since this entire function is a slow path
738  * anyway, it seems better to do this here than to maintain a per-page
739  * LSN variable (which'd need an extra comparison in the
740  * transaction-commit path).
741  */
742  XLogRecPtr max_lsn;
743  int lsnindex,
744  lsnoff;
745 
746  lsnindex = slotno * shared->lsn_groups_per_page;
747  max_lsn = shared->group_lsn[lsnindex++];
748  for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
749  {
750  XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
751 
752  if (max_lsn < this_lsn)
753  max_lsn = this_lsn;
754  }
755 
756  if (!XLogRecPtrIsInvalid(max_lsn))
757  {
758  /*
759  * As noted above, elog(ERROR) is not acceptable here, so if
760  * XLogFlush were to fail, we must PANIC. This isn't much of a
761  * restriction because XLogFlush is just about all critical
762  * section anyway, but let's make sure.
763  */
765  XLogFlush(max_lsn);
767  }
768  }
769 
770  /*
771  * During a Flush, we may already have the desired file open.
772  */
773  if (fdata)
774  {
775  int i;
776 
777  for (i = 0; i < fdata->num_files; i++)
778  {
779  if (fdata->segno[i] == segno)
780  {
781  fd = fdata->fd[i];
782  break;
783  }
784  }
785  }
786 
787  if (fd < 0)
788  {
789  /*
790  * If the file doesn't already exist, we should create it. It is
791  * possible for this to need to happen when writing a page that's not
792  * first in its segment; we assume the OS can cope with that. (Note:
793  * it might seem that it'd be okay to create files only when
794  * SimpleLruZeroPage is called for the first page of a segment.
795  * However, if after a crash and restart the REDO logic elects to
796  * replay the log from a checkpoint before the latest one, then it's
797  * possible that we will get commands to set transaction status of
798  * transactions that have already been truncated from the commit log.
799  * Easiest way to deal with that is to accept references to
800  * nonexistent files here and in SlruPhysicalReadPage.)
801  *
802  * Note: it is possible for more than one backend to be executing this
803  * code simultaneously for different pages of the same file. Hence,
804  * don't use O_EXCL or O_TRUNC or anything like that.
805  */
806  SlruFileName(ctl, path, segno);
807  fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
808  if (fd < 0)
809  {
811  slru_errno = errno;
812  return false;
813  }
814 
815  if (fdata)
816  {
817  if (fdata->num_files < MAX_FLUSH_BUFFERS)
818  {
819  fdata->fd[fdata->num_files] = fd;
820  fdata->segno[fdata->num_files] = segno;
821  fdata->num_files++;
822  }
823  else
824  {
825  /*
826  * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
827  * fall back to treating it as a standalone write.
828  */
829  fdata = NULL;
830  }
831  }
832  }
833 
834  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
835  {
837  slru_errno = errno;
838  if (!fdata)
839  CloseTransientFile(fd);
840  return false;
841  }
842 
843  errno = 0;
845  if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
846  {
848  /* if write didn't set errno, assume problem is no disk space */
849  if (errno == 0)
850  errno = ENOSPC;
852  slru_errno = errno;
853  if (!fdata)
854  CloseTransientFile(fd);
855  return false;
856  }
858 
859  /*
860  * If not part of Flush, need to fsync now. We assume this happens
861  * infrequently enough that it's not a performance issue.
862  */
863  if (!fdata)
864  {
866  if (ctl->do_fsync && pg_fsync(fd))
867  {
870  slru_errno = errno;
871  CloseTransientFile(fd);
872  return false;
873  }
875 
876  if (CloseTransientFile(fd))
877  {
879  slru_errno = errno;
880  return false;
881  }
882  }
883 
884  return true;
885 }
#define write(a, b, c)
Definition: win32.h:14
char ** page_buffer
Definition: slru.h:66
#define END_CRIT_SECTION()
Definition: miscadmin.h:133
#define START_CRIT_SECTION()
Definition: miscadmin.h:131
static SlruErrorCause slru_errcause
Definition: slru.c:123
int lsn_groups_per_page
Definition: slru.h:81
int segno[MAX_FLUSH_BUFFERS]
Definition: slru.c:79
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2773
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1044
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2167
#define MAXPGPATH
#define MAX_FLUSH_BUFFERS
Definition: slru.c:73
XLogRecPtr * group_lsn
Definition: slru.h:80
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1244
bool do_fsync
Definition: slru.h:121
int CloseTransientFile(int fd)
Definition: fd.c:2337
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
int num_files
Definition: slru.c:77
uint64 XLogRecPtr
Definition: xlogdefs.h:21
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1220
static int slru_errno
Definition: slru.c:124
int i
SlruShared shared
Definition: slru.h:115
int pg_fsync(int fd)
Definition: fd.c:338
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:78
static void SlruReportIOError ( SlruCtl  ctl,
int  pageno,
TransactionId  xid 
)
static

Definition at line 892 of file slru.c.

References elog, ereport, errcode_for_file_access(), errdetail(), errmsg(), ERROR, MAXPGPATH, SlruFlushData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName.

Referenced by SimpleLruDoesPhysicalPageExist(), SimpleLruFlush(), SimpleLruReadPage(), and SlruInternalWritePage().

893 {
894  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
895  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
896  int offset = rpageno * BLCKSZ;
897  char path[MAXPGPATH];
898 
899  SlruFileName(ctl, path, segno);
900  errno = slru_errno;
901  switch (slru_errcause)
902  {
903  case SLRU_OPEN_FAILED:
904  ereport(ERROR,
906  errmsg("could not access status of transaction %u", xid),
907  errdetail("Could not open file \"%s\": %m.", path)));
908  break;
909  case SLRU_SEEK_FAILED:
910  ereport(ERROR,
912  errmsg("could not access status of transaction %u", xid),
913  errdetail("Could not seek in file \"%s\" to offset %u: %m.",
914  path, offset)));
915  break;
916  case SLRU_READ_FAILED:
917  ereport(ERROR,
919  errmsg("could not access status of transaction %u", xid),
920  errdetail("Could not read from file \"%s\" at offset %u: %m.",
921  path, offset)));
922  break;
923  case SLRU_WRITE_FAILED:
924  ereport(ERROR,
926  errmsg("could not access status of transaction %u", xid),
927  errdetail("Could not write to file \"%s\" at offset %u: %m.",
928  path, offset)));
929  break;
930  case SLRU_FSYNC_FAILED:
931  ereport(ERROR,
933  errmsg("could not access status of transaction %u", xid),
934  errdetail("Could not fsync file \"%s\": %m.",
935  path)));
936  break;
937  case SLRU_CLOSE_FAILED:
938  ereport(ERROR,
940  errmsg("could not access status of transaction %u", xid),
941  errdetail("Could not close file \"%s\": %m.",
942  path)));
943  break;
944  default:
945  /* can't get here, we trust */
946  elog(ERROR, "unrecognized SimpleLru error cause: %d",
947  (int) slru_errcause);
948  break;
949  }
950 }
static SlruErrorCause slru_errcause
Definition: slru.c:123
#define ERROR
Definition: elog.h:43
#define MAXPGPATH
int errdetail(const char *fmt,...)
Definition: elog.c:873
int errcode_for_file_access(void)
Definition: elog.c:598
#define ereport(elevel, rest)
Definition: elog.h:122
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
static int slru_errno
Definition: slru.c:124
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define elog
Definition: elog.h:219
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
bool SlruScanDirCbDeleteAll ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)

Definition at line 1353 of file slru.c.

References SlruInternalDeleteSegment().

Referenced by AsyncShmemInit(), and DeactivateCommitTs().

1354 {
1356 
1357  return false; /* keep going */
1358 }
static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename)
Definition: slru.c:1247
static char * filename
Definition: pg_dumpall.c:90
static bool SlruScanDirCbDeleteCutoff ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)
static

Definition at line 1338 of file slru.c.

References SlruCtlData::PagePrecedes, and SlruInternalDeleteSegment().

Referenced by SimpleLruTruncate().

1339 {
1340  int cutoffPage = *(int *) data;
1341 
1342  if (ctl->PagePrecedes(segpage, cutoffPage))
1344 
1345  return false; /* keep going */
1346 }
static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename)
Definition: slru.c:1247
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
static char * filename
Definition: pg_dumpall.c:90
bool SlruScanDirCbReportPresence ( SlruCtl  ctl,
char *  filename,
int  segpage,
void *  data 
)

Definition at line 1321 of file slru.c.

References SlruCtlData::PagePrecedes, and SLRU_PAGES_PER_SEGMENT.

Referenced by TruncateCLOG(), and TruncateCommitTs().

1322 {
1323  int cutoffPage = *(int *) data;
1324 
1325  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1326 
1327  if (ctl->PagePrecedes(segpage, cutoffPage))
1328  return true; /* found one; don't iterate any more */
1329 
1330  return false; /* keep going */
1331 }
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
bool SlruScanDirectory ( SlruCtl  ctl,
SlruScanCallback  callback,
void *  data 
)

Definition at line 1376 of file slru.c.

References AllocateDir(), callback(), dirent::d_name, DEBUG2, SlruCtlData::Dir, elog, FreeDir(), ReadDir(), and SLRU_PAGES_PER_SEGMENT.

Referenced by AsyncShmemInit(), DeactivateCommitTs(), SimpleLruTruncate(), TruncateCLOG(), TruncateCommitTs(), and TruncateMultiXact().

1377 {
1378  bool retval = false;
1379  DIR *cldir;
1380  struct dirent *clde;
1381  int segno;
1382  int segpage;
1383 
1384  cldir = AllocateDir(ctl->Dir);
1385  while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1386  {
1387  size_t len;
1388 
1389  len = strlen(clde->d_name);
1390 
1391  if ((len == 4 || len == 5 || len == 6) &&
1392  strspn(clde->d_name, "0123456789ABCDEF") == len)
1393  {
1394  segno = (int) strtol(clde->d_name, NULL, 16);
1395  segpage = segno * SLRU_PAGES_PER_SEGMENT;
1396 
1397  elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1398  ctl->Dir, clde->d_name);
1399  retval = callback(ctl, clde->d_name, segpage, data);
1400  if (retval)
1401  break;
1402  }
1403  }
1404  FreeDir(cldir);
1405 
1406  return retval;
1407 }
Definition: dirent.h:9
Definition: dirent.c:25
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define DEBUG2
Definition: elog.h:24
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2367
char Dir[64]
Definition: slru.h:134
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2433
char d_name[MAX_PATH]
Definition: dirent.h:14
#define elog
Definition: elog.h:219
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
int FreeDir(DIR *dir)
Definition: fd.c:2476
static int SlruSelectLRUPage ( SlruCtl  ctl,
int  pageno 
)
static

Definition at line 966 of file slru.c.

References SlruSharedData::cur_lru_count, SlruSharedData::latest_page_number, SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, SlruCtlData::PagePrecedes, SlruCtlData::shared, SimpleLruWaitIO(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

967 {
968  SlruShared shared = ctl->shared;
969 
970  /* Outer loop handles restart after I/O */
971  for (;;)
972  {
973  int slotno;
974  int cur_count;
975  int bestvalidslot = 0; /* keep compiler quiet */
976  int best_valid_delta = -1;
977  int best_valid_page_number = 0; /* keep compiler quiet */
978  int bestinvalidslot = 0; /* keep compiler quiet */
979  int best_invalid_delta = -1;
980  int best_invalid_page_number = 0; /* keep compiler quiet */
981 
982  /* See if page already has a buffer assigned */
983  for (slotno = 0; slotno < shared->num_slots; slotno++)
984  {
985  if (shared->page_number[slotno] == pageno &&
986  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
987  return slotno;
988  }
989 
990  /*
991  * If we find any EMPTY slot, just select that one. Else choose a
992  * victim page to replace. We normally take the least recently used
993  * valid page, but we will never take the slot containing
994  * latest_page_number, even if it appears least recently used. We
995  * will select a slot that is already I/O busy only if there is no
996  * other choice: a read-busy slot will not be least recently used once
997  * the read finishes, and waiting for an I/O on a write-busy slot is
998  * inferior to just picking some other slot. Testing shows the slot
999  * we pick instead will often be clean, allowing us to begin a read at
1000  * once.
1001  *
1002  * Normally the page_lru_count values will all be different and so
1003  * there will be a well-defined LRU page. But since we allow
1004  * concurrent execution of SlruRecentlyUsed() within
1005  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1006  * acquire the same lru_count values. In that case we break ties by
1007  * choosing the furthest-back page.
1008  *
1009  * Notice that this next line forcibly advances cur_lru_count to a
1010  * value that is certainly beyond any value that will be in the
1011  * page_lru_count array after the loop finishes. This ensures that
1012  * the next execution of SlruRecentlyUsed will mark the page newly
1013  * used, even if it's for a page that has the current counter value.
1014  * That gets us back on the path to having good data when there are
1015  * multiple pages with the same lru_count.
1016  */
1017  cur_count = (shared->cur_lru_count)++;
1018  for (slotno = 0; slotno < shared->num_slots; slotno++)
1019  {
1020  int this_delta;
1021  int this_page_number;
1022 
1023  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1024  return slotno;
1025  this_delta = cur_count - shared->page_lru_count[slotno];
1026  if (this_delta < 0)
1027  {
1028  /*
1029  * Clean up in case shared updates have caused cur_count
1030  * increments to get "lost". We back off the page counts,
1031  * rather than trying to increase cur_count, to avoid any
1032  * question of infinite loops or failure in the presence of
1033  * wrapped-around counts.
1034  */
1035  shared->page_lru_count[slotno] = cur_count;
1036  this_delta = 0;
1037  }
1038  this_page_number = shared->page_number[slotno];
1039  if (this_page_number == shared->latest_page_number)
1040  continue;
1041  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1042  {
1043  if (this_delta > best_valid_delta ||
1044  (this_delta == best_valid_delta &&
1045  ctl->PagePrecedes(this_page_number,
1046  best_valid_page_number)))
1047  {
1048  bestvalidslot = slotno;
1049  best_valid_delta = this_delta;
1050  best_valid_page_number = this_page_number;
1051  }
1052  }
1053  else
1054  {
1055  if (this_delta > best_invalid_delta ||
1056  (this_delta == best_invalid_delta &&
1057  ctl->PagePrecedes(this_page_number,
1058  best_invalid_page_number)))
1059  {
1060  bestinvalidslot = slotno;
1061  best_invalid_delta = this_delta;
1062  best_invalid_page_number = this_page_number;
1063  }
1064  }
1065  }
1066 
1067  /*
1068  * If all pages (except possibly the latest one) are I/O busy, we'll
1069  * have to wait for an I/O to complete and then retry. In that
1070  * unhappy case, we choose to wait for the I/O on the least recently
1071  * used slot, on the assumption that it was likely initiated first of
1072  * all the I/Os in progress and may therefore finish first.
1073  */
1074  if (best_valid_delta < 0)
1075  {
1076  SimpleLruWaitIO(ctl, bestinvalidslot);
1077  continue;
1078  }
1079 
1080  /*
1081  * If the selected page is clean, we're set.
1082  */
1083  if (!shared->page_dirty[bestvalidslot])
1084  return bestvalidslot;
1085 
1086  /*
1087  * Write the page.
1088  */
1089  SlruInternalWritePage(ctl, bestvalidslot, NULL);
1090 
1091  /*
1092  * Now loop back and try again. This is the easiest way of dealing
1093  * with corner cases such as the victim page being re-dirtied while we
1094  * wrote it.
1095  */
1096  }
1097 }
int * page_number
Definition: slru.h:69
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:507
int latest_page_number
Definition: slru.h:99
int cur_lru_count
Definition: slru.h:92
SlruPageStatus * page_status
Definition: slru.h:67
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:321
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
int * page_lru_count
Definition: slru.h:70
int num_slots
Definition: slru.h:60
bool * page_dirty
Definition: slru.h:68
SlruShared shared
Definition: slru.h:115

Variable Documentation

int slru_errno
static