PostgreSQL Source Code  git master
slru.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * slru.c
4  * Simple LRU buffering for transaction status logfiles
5  *
6  * We use a simple least-recently-used scheme to manage a pool of page
7  * buffers. Under ordinary circumstances we expect that write
8  * traffic will occur mostly to the latest page (and to the just-prior
9  * page, soon after a page transition). Read traffic will probably touch
10  * a larger span of pages, but in any case a fairly small number of page
11  * buffers should be sufficient. So, we just search the buffers using plain
12  * linear search; there's no need for a hashtable or anything fancy.
13  * The management algorithm is straight LRU except that we will never swap
14  * out the latest page (since we know it's going to be hit again eventually).
15  *
16  * We use a control LWLock to protect the shared data structures, plus
17  * per-buffer LWLocks that synchronize I/O for each buffer. The control lock
18  * must be held to examine or modify any shared state. A process that is
19  * reading in or writing out a page buffer does not hold the control lock,
20  * only the per-buffer lock for the buffer it is working on.
21  *
22  * "Holding the control lock" means exclusive lock in all cases except for
23  * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
24  * the implications of that.
25  *
26  * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
27  * before releasing the control lock. The per-buffer lock is released after
28  * completing the I/O, re-acquiring the control lock, and updating the shared
29  * state. (Deadlock is not possible here, because we never try to initiate
30  * I/O when someone else is already doing I/O on the same buffer.)
31  * To wait for I/O to complete, release the control lock, acquire the
32  * per-buffer lock in shared mode, immediately release the per-buffer lock,
33  * reacquire the control lock, and then recheck state (since arbitrary things
34  * could have happened while we didn't have the lock).
35  *
36  * As with the regular buffer manager, it is possible for another process
37  * to re-dirty a page that is currently being written out. This is handled
38  * by re-setting the page's page_dirty flag.
39  *
40  *
41  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
42  * Portions Copyright (c) 1994, Regents of the University of California
43  *
44  * src/backend/access/transam/slru.c
45  *
46  *-------------------------------------------------------------------------
47  */
48 #include "postgres.h"
49 
50 #include <fcntl.h>
51 #include <sys/stat.h>
52 #include <unistd.h>
53 
54 #include "access/slru.h"
55 #include "access/transam.h"
56 #include "access/xlog.h"
57 #include "miscadmin.h"
58 #include "pgstat.h"
59 #include "storage/fd.h"
60 #include "storage/shmem.h"
61 
62 #define SlruFileName(ctl, path, seg) \
63  snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
64 
65 /*
66  * During SimpleLruFlush(), we will usually not need to write/fsync more
67  * than one or two physical files, but we may need to write several pages
68  * per file. We can consolidate the I/O requests by leaving files open
69  * until control returns to SimpleLruFlush(). This data structure remembers
70  * which files are open.
71  */
72 #define MAX_FLUSH_BUFFERS 16
73 
74 typedef struct SlruFlushData
75 {
76  int num_files; /* # files actually open */
77  int fd[MAX_FLUSH_BUFFERS]; /* their FD's */
78  int segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */
80 
81 typedef struct SlruFlushData *SlruFlush;
82 
83 /*
84  * Macro to mark a buffer slot "most recently used". Note multiple evaluation
85  * of arguments!
86  *
87  * The reason for the if-test is that there are often many consecutive
88  * accesses to the same page (particularly the latest page). By suppressing
89  * useless increments of cur_lru_count, we reduce the probability that old
90  * pages' counts will "wrap around" and make them appear recently used.
91  *
92  * We allow this code to be executed concurrently by multiple processes within
93  * SimpleLruReadPage_ReadOnly(). As long as int reads and writes are atomic,
94  * this should not cause any completely-bogus values to enter the computation.
95  * However, it is possible for either cur_lru_count or individual
96  * page_lru_count entries to be "reset" to lower values than they should have,
97  * in case a process is delayed while it executes this macro. With care in
98  * SlruSelectLRUPage(), this does little harm, and in any case the absolute
99  * worst possible consequence is a nonoptimal choice of page to evict. The
100  * gain from allowing concurrent reads of SLRU pages seems worth it.
101  */
102 #define SlruRecentlyUsed(shared, slotno) \
103  do { \
104  int new_lru_count = (shared)->cur_lru_count; \
105  if (new_lru_count != (shared)->page_lru_count[slotno]) { \
106  (shared)->cur_lru_count = ++new_lru_count; \
107  (shared)->page_lru_count[slotno] = new_lru_count; \
108  } \
109  } while (0)
110 
111 /* Saved info for SlruReportIOError */
112 typedef enum
113 {
121 
123 static int slru_errno;
124 
125 
126 static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
127 static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
128 static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata);
129 static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
130 static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
131  SlruFlush fdata);
132 static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
133 static int SlruSelectLRUPage(SlruCtl ctl, int pageno);
134 
135 static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
136  int segpage, void *data);
137 static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename);
138 
139 /*
140  * Initialization of shared memory
141  */
142 
143 Size
144 SimpleLruShmemSize(int nslots, int nlsns)
145 {
146  Size sz;
147 
148  /* we assume nslots isn't so large as to risk overflow */
149  sz = MAXALIGN(sizeof(SlruSharedData));
150  sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
151  sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
152  sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
153  sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
154  sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
155  sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
156 
157  if (nlsns > 0)
158  sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
159 
160  return BUFFERALIGN(sz) + BLCKSZ * nslots;
161 }
162 
163 void
164 SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
165  LWLock *ctllock, const char *subdir, int tranche_id)
166 {
167  SlruShared shared;
168  bool found;
169 
170  shared = (SlruShared) ShmemInitStruct(name,
171  SimpleLruShmemSize(nslots, nlsns),
172  &found);
173 
174  if (!IsUnderPostmaster)
175  {
176  /* Initialize locks and shared memory area */
177  char *ptr;
178  Size offset;
179  int slotno;
180 
181  Assert(!found);
182 
183  memset(shared, 0, sizeof(SlruSharedData));
184 
185  shared->ControlLock = ctllock;
186 
187  shared->num_slots = nslots;
188  shared->lsn_groups_per_page = nlsns;
189 
190  shared->cur_lru_count = 0;
191 
192  /* shared->latest_page_number will be set later */
193 
194  ptr = (char *) shared;
195  offset = MAXALIGN(sizeof(SlruSharedData));
196  shared->page_buffer = (char **) (ptr + offset);
197  offset += MAXALIGN(nslots * sizeof(char *));
198  shared->page_status = (SlruPageStatus *) (ptr + offset);
199  offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
200  shared->page_dirty = (bool *) (ptr + offset);
201  offset += MAXALIGN(nslots * sizeof(bool));
202  shared->page_number = (int *) (ptr + offset);
203  offset += MAXALIGN(nslots * sizeof(int));
204  shared->page_lru_count = (int *) (ptr + offset);
205  offset += MAXALIGN(nslots * sizeof(int));
206 
207  /* Initialize LWLocks */
208  shared->buffer_locks = (LWLockPadded *) (ptr + offset);
209  offset += MAXALIGN(nslots * sizeof(LWLockPadded));
210 
211  if (nlsns > 0)
212  {
213  shared->group_lsn = (XLogRecPtr *) (ptr + offset);
214  offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
215  }
216 
217  Assert(strlen(name) + 1 < SLRU_MAX_NAME_LENGTH);
219  shared->lwlock_tranche_id = tranche_id;
220 
221  ptr += BUFFERALIGN(offset);
222  for (slotno = 0; slotno < nslots; slotno++)
223  {
224  LWLockInitialize(&shared->buffer_locks[slotno].lock,
225  shared->lwlock_tranche_id);
226 
227  shared->page_buffer[slotno] = ptr;
228  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
229  shared->page_dirty[slotno] = false;
230  shared->page_lru_count[slotno] = 0;
231  ptr += BLCKSZ;
232  }
233 
234  /* Should fit to estimated shmem size */
235  Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
236  }
237  else
238  Assert(found);
239 
240  /* Register SLRU tranche in the main tranches array */
242  shared->lwlock_tranche_name);
243 
244  /*
245  * Initialize the unshared control struct, including directory path. We
246  * assume caller set PagePrecedes.
247  */
248  ctl->shared = shared;
249  ctl->do_fsync = true; /* default behavior */
250  StrNCpy(ctl->Dir, subdir, sizeof(ctl->Dir));
251 }
252 
253 /*
254  * Initialize (or reinitialize) a page to zeroes.
255  *
256  * The page is not actually written, just set up in shared memory.
257  * The slot number of the new page is returned.
258  *
259  * Control lock must be held at entry, and will be held at exit.
260  */
261 int
262 SimpleLruZeroPage(SlruCtl ctl, int pageno)
263 {
264  SlruShared shared = ctl->shared;
265  int slotno;
266 
267  /* Find a suitable buffer slot for the page */
268  slotno = SlruSelectLRUPage(ctl, pageno);
269  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
270  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
271  !shared->page_dirty[slotno]) ||
272  shared->page_number[slotno] == pageno);
273 
274  /* Mark the slot as containing this page */
275  shared->page_number[slotno] = pageno;
276  shared->page_status[slotno] = SLRU_PAGE_VALID;
277  shared->page_dirty[slotno] = true;
278  SlruRecentlyUsed(shared, slotno);
279 
280  /* Set the buffer to zeroes */
281  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
282 
283  /* Set the LSNs for this new page to zero */
284  SimpleLruZeroLSNs(ctl, slotno);
285 
286  /* Assume this page is now the latest active page */
287  shared->latest_page_number = pageno;
288 
289  return slotno;
290 }
291 
292 /*
293  * Zero all the LSNs we store for this slru page.
294  *
295  * This should be called each time we create a new page, and each time we read
296  * in a page from disk into an existing buffer. (Such an old page cannot
297  * have any interesting LSNs, since we'd have flushed them before writing
298  * the page in the first place.)
299  *
300  * This assumes that InvalidXLogRecPtr is bitwise-all-0.
301  */
302 static void
303 SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
304 {
305  SlruShared shared = ctl->shared;
306 
307  if (shared->lsn_groups_per_page > 0)
308  MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
309  shared->lsn_groups_per_page * sizeof(XLogRecPtr));
310 }
311 
312 /*
313  * Wait for any active I/O on a page slot to finish. (This does not
314  * guarantee that new I/O hasn't been started before we return, though.
315  * In fact the slot might not even contain the same page anymore.)
316  *
317  * Control lock must be held at entry, and will be held at exit.
318  */
319 static void
320 SimpleLruWaitIO(SlruCtl ctl, int slotno)
321 {
322  SlruShared shared = ctl->shared;
323 
324  /* See notes at top of file */
325  LWLockRelease(shared->ControlLock);
326  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
327  LWLockRelease(&shared->buffer_locks[slotno].lock);
329 
330  /*
331  * If the slot is still in an io-in-progress state, then either someone
332  * already started a new I/O on the slot, or a previous I/O failed and
333  * neglected to reset the page state. That shouldn't happen, really, but
334  * it seems worth a few extra cycles to check and recover from it. We can
335  * cheaply test for failure by seeing if the buffer lock is still held (we
336  * assume that transaction abort would release the lock).
337  */
338  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
339  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
340  {
341  if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
342  {
343  /* indeed, the I/O must have failed */
344  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
345  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
346  else /* write_in_progress */
347  {
348  shared->page_status[slotno] = SLRU_PAGE_VALID;
349  shared->page_dirty[slotno] = true;
350  }
351  LWLockRelease(&shared->buffer_locks[slotno].lock);
352  }
353  }
354 }
355 
356 /*
357  * Find a page in a shared buffer, reading it in if necessary.
358  * The page number must correspond to an already-initialized page.
359  *
360  * If write_ok is true then it is OK to return a page that is in
361  * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
362  * that modification of the page is safe. If write_ok is false then we
363  * will not return the page until it is not undergoing active I/O.
364  *
365  * The passed-in xid is used only for error reporting, and may be
366  * InvalidTransactionId if no specific xid is associated with the action.
367  *
368  * Return value is the shared-buffer slot number now holding the page.
369  * The buffer's LRU access info is updated.
370  *
371  * Control lock must be held at entry, and will be held at exit.
372  */
373 int
374 SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
375  TransactionId xid)
376 {
377  SlruShared shared = ctl->shared;
378 
379  /* Outer loop handles restart if we must wait for someone else's I/O */
380  for (;;)
381  {
382  int slotno;
383  bool ok;
384 
385  /* See if page already is in memory; if not, pick victim slot */
386  slotno = SlruSelectLRUPage(ctl, pageno);
387 
388  /* Did we find the page in memory? */
389  if (shared->page_number[slotno] == pageno &&
390  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
391  {
392  /*
393  * If page is still being read in, we must wait for I/O. Likewise
394  * if the page is being written and the caller said that's not OK.
395  */
396  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
397  (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
398  !write_ok))
399  {
400  SimpleLruWaitIO(ctl, slotno);
401  /* Now we must recheck state from the top */
402  continue;
403  }
404  /* Otherwise, it's ready to use */
405  SlruRecentlyUsed(shared, slotno);
406  return slotno;
407  }
408 
409  /* We found no match; assert we selected a freeable slot */
410  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
411  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
412  !shared->page_dirty[slotno]));
413 
414  /* Mark the slot read-busy */
415  shared->page_number[slotno] = pageno;
416  shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
417  shared->page_dirty[slotno] = false;
418 
419  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
420  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
421 
422  /* Release control lock while doing I/O */
423  LWLockRelease(shared->ControlLock);
424 
425  /* Do the read */
426  ok = SlruPhysicalReadPage(ctl, pageno, slotno);
427 
428  /* Set the LSNs for this newly read-in page to zero */
429  SimpleLruZeroLSNs(ctl, slotno);
430 
431  /* Re-acquire control lock and update page state */
433 
434  Assert(shared->page_number[slotno] == pageno &&
435  shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
436  !shared->page_dirty[slotno]);
437 
438  shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
439 
440  LWLockRelease(&shared->buffer_locks[slotno].lock);
441 
442  /* Now it's okay to ereport if we failed */
443  if (!ok)
444  SlruReportIOError(ctl, pageno, xid);
445 
446  SlruRecentlyUsed(shared, slotno);
447  return slotno;
448  }
449 }
450 
451 /*
452  * Find a page in a shared buffer, reading it in if necessary.
453  * The page number must correspond to an already-initialized page.
454  * The caller must intend only read-only access to the page.
455  *
456  * The passed-in xid is used only for error reporting, and may be
457  * InvalidTransactionId if no specific xid is associated with the action.
458  *
459  * Return value is the shared-buffer slot number now holding the page.
460  * The buffer's LRU access info is updated.
461  *
462  * Control lock must NOT be held at entry, but will be held at exit.
463  * It is unspecified whether the lock will be shared or exclusive.
464  */
465 int
467 {
468  SlruShared shared = ctl->shared;
469  int slotno;
470 
471  /* Try to find the page while holding only shared lock */
473 
474  /* See if page is already in a buffer */
475  for (slotno = 0; slotno < shared->num_slots; slotno++)
476  {
477  if (shared->page_number[slotno] == pageno &&
478  shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
479  shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
480  {
481  /* See comments for SlruRecentlyUsed macro */
482  SlruRecentlyUsed(shared, slotno);
483  return slotno;
484  }
485  }
486 
487  /* No luck, so switch to normal exclusive lock and do regular read */
488  LWLockRelease(shared->ControlLock);
490 
491  return SimpleLruReadPage(ctl, pageno, true, xid);
492 }
493 
494 /*
495  * Write a page from a shared buffer, if necessary.
496  * Does nothing if the specified slot is not dirty.
497  *
498  * NOTE: only one write attempt is made here. Hence, it is possible that
499  * the page is still dirty at exit (if someone else re-dirtied it during
500  * the write). However, we *do* attempt a fresh write even if the page
501  * is already being written; this is for checkpoints.
502  *
503  * Control lock must be held at entry, and will be held at exit.
504  */
505 static void
506 SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
507 {
508  SlruShared shared = ctl->shared;
509  int pageno = shared->page_number[slotno];
510  bool ok;
511 
512  /* If a write is in progress, wait for it to finish */
513  while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
514  shared->page_number[slotno] == pageno)
515  {
516  SimpleLruWaitIO(ctl, slotno);
517  }
518 
519  /*
520  * Do nothing if page is not dirty, or if buffer no longer contains the
521  * same page we were called for.
522  */
523  if (!shared->page_dirty[slotno] ||
524  shared->page_status[slotno] != SLRU_PAGE_VALID ||
525  shared->page_number[slotno] != pageno)
526  return;
527 
528  /*
529  * Mark the slot write-busy, and clear the dirtybit. After this point, a
530  * transaction status update on this page will mark it dirty again.
531  */
532  shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
533  shared->page_dirty[slotno] = false;
534 
535  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
536  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
537 
538  /* Release control lock while doing I/O */
539  LWLockRelease(shared->ControlLock);
540 
541  /* Do the write */
542  ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
543 
544  /* If we failed, and we're in a flush, better close the files */
545  if (!ok && fdata)
546  {
547  int i;
548 
549  for (i = 0; i < fdata->num_files; i++)
550  CloseTransientFile(fdata->fd[i]);
551  }
552 
553  /* Re-acquire control lock and update page state */
555 
556  Assert(shared->page_number[slotno] == pageno &&
557  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
558 
559  /* If we failed to write, mark the page dirty again */
560  if (!ok)
561  shared->page_dirty[slotno] = true;
562 
563  shared->page_status[slotno] = SLRU_PAGE_VALID;
564 
565  LWLockRelease(&shared->buffer_locks[slotno].lock);
566 
567  /* Now it's okay to ereport if we failed */
568  if (!ok)
570 }
571 
572 /*
573  * Wrapper of SlruInternalWritePage, for external callers.
574  * fdata is always passed a NULL here.
575  */
576 void
577 SimpleLruWritePage(SlruCtl ctl, int slotno)
578 {
579  SlruInternalWritePage(ctl, slotno, NULL);
580 }
581 
582 /*
583  * Return whether the given page exists on disk.
584  *
585  * A false return means that either the file does not exist, or that it's not
586  * large enough to contain the given page.
587  */
588 bool
590 {
591  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
592  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
593  int offset = rpageno * BLCKSZ;
594  char path[MAXPGPATH];
595  int fd;
596  bool result;
597  off_t endpos;
598 
599  SlruFileName(ctl, path, segno);
600 
601  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
602  if (fd < 0)
603  {
604  /* expected: file doesn't exist */
605  if (errno == ENOENT)
606  return false;
607 
608  /* report error normally */
610  slru_errno = errno;
611  SlruReportIOError(ctl, pageno, 0);
612  }
613 
614  if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
615  {
617  slru_errno = errno;
618  SlruReportIOError(ctl, pageno, 0);
619  }
620 
621  result = endpos >= (off_t) (offset + BLCKSZ);
622 
623  if (CloseTransientFile(fd) != 0)
624  {
626  slru_errno = errno;
627  return false;
628  }
629 
630  return result;
631 }
632 
633 /*
634  * Physical read of a (previously existing) page into a buffer slot
635  *
636  * On failure, we cannot just ereport(ERROR) since caller has put state in
637  * shared memory that must be undone. So, we return false and save enough
638  * info in static variables to let SlruReportIOError make the report.
639  *
640  * For now, assume it's not worth keeping a file pointer open across
641  * read/write operations. We could cache one virtual file pointer ...
642  */
643 static bool
644 SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
645 {
646  SlruShared shared = ctl->shared;
647  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
648  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
649  int offset = rpageno * BLCKSZ;
650  char path[MAXPGPATH];
651  int fd;
652 
653  SlruFileName(ctl, path, segno);
654 
655  /*
656  * In a crash-and-restart situation, it's possible for us to receive
657  * commands to set the commit status of transactions whose bits are in
658  * already-truncated segments of the commit log (see notes in
659  * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
660  * where the file doesn't exist, and return zeroes instead.
661  */
662  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
663  if (fd < 0)
664  {
665  if (errno != ENOENT || !InRecovery)
666  {
668  slru_errno = errno;
669  return false;
670  }
671 
672  ereport(LOG,
673  (errmsg("file \"%s\" doesn't exist, reading as zeroes",
674  path)));
675  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
676  return true;
677  }
678 
679  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
680  {
682  slru_errno = errno;
683  CloseTransientFile(fd);
684  return false;
685  }
686 
687  errno = 0;
689  if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
690  {
693  slru_errno = errno;
694  CloseTransientFile(fd);
695  return false;
696  }
698 
699  if (CloseTransientFile(fd) != 0)
700  {
702  slru_errno = errno;
703  return false;
704  }
705 
706  return true;
707 }
708 
709 /*
710  * Physical write of a page from a buffer slot
711  *
712  * On failure, we cannot just ereport(ERROR) since caller has put state in
713  * shared memory that must be undone. So, we return false and save enough
714  * info in static variables to let SlruReportIOError make the report.
715  *
716  * For now, assume it's not worth keeping a file pointer open across
717  * independent read/write operations. We do batch operations during
718  * SimpleLruFlush, though.
719  *
720  * fdata is NULL for a standalone write, pointer to open-file info during
721  * SimpleLruFlush.
722  */
723 static bool
724 SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
725 {
726  SlruShared shared = ctl->shared;
727  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
728  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
729  int offset = rpageno * BLCKSZ;
730  char path[MAXPGPATH];
731  int fd = -1;
732 
733  /*
734  * Honor the write-WAL-before-data rule, if appropriate, so that we do not
735  * write out data before associated WAL records. This is the same action
736  * performed during FlushBuffer() in the main buffer manager.
737  */
738  if (shared->group_lsn != NULL)
739  {
740  /*
741  * We must determine the largest async-commit LSN for the page. This
742  * is a bit tedious, but since this entire function is a slow path
743  * anyway, it seems better to do this here than to maintain a per-page
744  * LSN variable (which'd need an extra comparison in the
745  * transaction-commit path).
746  */
747  XLogRecPtr max_lsn;
748  int lsnindex,
749  lsnoff;
750 
751  lsnindex = slotno * shared->lsn_groups_per_page;
752  max_lsn = shared->group_lsn[lsnindex++];
753  for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
754  {
755  XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
756 
757  if (max_lsn < this_lsn)
758  max_lsn = this_lsn;
759  }
760 
761  if (!XLogRecPtrIsInvalid(max_lsn))
762  {
763  /*
764  * As noted above, elog(ERROR) is not acceptable here, so if
765  * XLogFlush were to fail, we must PANIC. This isn't much of a
766  * restriction because XLogFlush is just about all critical
767  * section anyway, but let's make sure.
768  */
770  XLogFlush(max_lsn);
772  }
773  }
774 
775  /*
776  * During a Flush, we may already have the desired file open.
777  */
778  if (fdata)
779  {
780  int i;
781 
782  for (i = 0; i < fdata->num_files; i++)
783  {
784  if (fdata->segno[i] == segno)
785  {
786  fd = fdata->fd[i];
787  break;
788  }
789  }
790  }
791 
792  if (fd < 0)
793  {
794  /*
795  * If the file doesn't already exist, we should create it. It is
796  * possible for this to need to happen when writing a page that's not
797  * first in its segment; we assume the OS can cope with that. (Note:
798  * it might seem that it'd be okay to create files only when
799  * SimpleLruZeroPage is called for the first page of a segment.
800  * However, if after a crash and restart the REDO logic elects to
801  * replay the log from a checkpoint before the latest one, then it's
802  * possible that we will get commands to set transaction status of
803  * transactions that have already been truncated from the commit log.
804  * Easiest way to deal with that is to accept references to
805  * nonexistent files here and in SlruPhysicalReadPage.)
806  *
807  * Note: it is possible for more than one backend to be executing this
808  * code simultaneously for different pages of the same file. Hence,
809  * don't use O_EXCL or O_TRUNC or anything like that.
810  */
811  SlruFileName(ctl, path, segno);
812  fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
813  if (fd < 0)
814  {
816  slru_errno = errno;
817  return false;
818  }
819 
820  if (fdata)
821  {
822  if (fdata->num_files < MAX_FLUSH_BUFFERS)
823  {
824  fdata->fd[fdata->num_files] = fd;
825  fdata->segno[fdata->num_files] = segno;
826  fdata->num_files++;
827  }
828  else
829  {
830  /*
831  * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
832  * fall back to treating it as a standalone write.
833  */
834  fdata = NULL;
835  }
836  }
837  }
838 
839  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
840  {
842  slru_errno = errno;
843  if (!fdata)
844  CloseTransientFile(fd);
845  return false;
846  }
847 
848  errno = 0;
850  if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
851  {
853  /* if write didn't set errno, assume problem is no disk space */
854  if (errno == 0)
855  errno = ENOSPC;
857  slru_errno = errno;
858  if (!fdata)
859  CloseTransientFile(fd);
860  return false;
861  }
863 
864  /*
865  * If not part of Flush, need to fsync now. We assume this happens
866  * infrequently enough that it's not a performance issue.
867  */
868  if (!fdata)
869  {
871  if (ctl->do_fsync && pg_fsync(fd) != 0)
872  {
875  slru_errno = errno;
876  CloseTransientFile(fd);
877  return false;
878  }
880 
881  if (CloseTransientFile(fd) != 0)
882  {
884  slru_errno = errno;
885  return false;
886  }
887  }
888 
889  return true;
890 }
891 
892 /*
893  * Issue the error message after failure of SlruPhysicalReadPage or
894  * SlruPhysicalWritePage. Call this after cleaning up shared-memory state.
895  */
896 static void
898 {
899  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
900  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
901  int offset = rpageno * BLCKSZ;
902  char path[MAXPGPATH];
903 
904  SlruFileName(ctl, path, segno);
905  errno = slru_errno;
906  switch (slru_errcause)
907  {
908  case SLRU_OPEN_FAILED:
909  ereport(ERROR,
911  errmsg("could not access status of transaction %u", xid),
912  errdetail("Could not open file \"%s\": %m.", path)));
913  break;
914  case SLRU_SEEK_FAILED:
915  ereport(ERROR,
917  errmsg("could not access status of transaction %u", xid),
918  errdetail("Could not seek in file \"%s\" to offset %u: %m.",
919  path, offset)));
920  break;
921  case SLRU_READ_FAILED:
922  if (errno)
923  ereport(ERROR,
925  errmsg("could not access status of transaction %u", xid),
926  errdetail("Could not read from file \"%s\" at offset %u: %m.",
927  path, offset)));
928  else
929  ereport(ERROR,
930  (errmsg("could not access status of transaction %u", xid),
931  errdetail("Could not read from file \"%s\" at offset %u: read too few bytes.", path, offset)));
932  break;
933  case SLRU_WRITE_FAILED:
934  if (errno)
935  ereport(ERROR,
937  errmsg("could not access status of transaction %u", xid),
938  errdetail("Could not write to file \"%s\" at offset %u: %m.",
939  path, offset)));
940  else
941  ereport(ERROR,
942  (errmsg("could not access status of transaction %u", xid),
943  errdetail("Could not write to file \"%s\" at offset %u: wrote too few bytes.",
944  path, offset)));
945  break;
946  case SLRU_FSYNC_FAILED:
949  errmsg("could not access status of transaction %u", xid),
950  errdetail("Could not fsync file \"%s\": %m.",
951  path)));
952  break;
953  case SLRU_CLOSE_FAILED:
954  ereport(ERROR,
956  errmsg("could not access status of transaction %u", xid),
957  errdetail("Could not close file \"%s\": %m.",
958  path)));
959  break;
960  default:
961  /* can't get here, we trust */
962  elog(ERROR, "unrecognized SimpleLru error cause: %d",
963  (int) slru_errcause);
964  break;
965  }
966 }
967 
968 /*
969  * Select the slot to re-use when we need a free slot.
970  *
971  * The target page number is passed because we need to consider the
972  * possibility that some other process reads in the target page while
973  * we are doing I/O to free a slot. Hence, check or recheck to see if
974  * any slot already holds the target page, and return that slot if so.
975  * Thus, the returned slot is *either* a slot already holding the pageno
976  * (could be any state except EMPTY), *or* a freeable slot (state EMPTY
977  * or CLEAN).
978  *
979  * Control lock must be held at entry, and will be held at exit.
980  */
981 static int
982 SlruSelectLRUPage(SlruCtl ctl, int pageno)
983 {
984  SlruShared shared = ctl->shared;
985 
986  /* Outer loop handles restart after I/O */
987  for (;;)
988  {
989  int slotno;
990  int cur_count;
991  int bestvalidslot = 0; /* keep compiler quiet */
992  int best_valid_delta = -1;
993  int best_valid_page_number = 0; /* keep compiler quiet */
994  int bestinvalidslot = 0; /* keep compiler quiet */
995  int best_invalid_delta = -1;
996  int best_invalid_page_number = 0; /* keep compiler quiet */
997 
998  /* See if page already has a buffer assigned */
999  for (slotno = 0; slotno < shared->num_slots; slotno++)
1000  {
1001  if (shared->page_number[slotno] == pageno &&
1002  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
1003  return slotno;
1004  }
1005 
1006  /*
1007  * If we find any EMPTY slot, just select that one. Else choose a
1008  * victim page to replace. We normally take the least recently used
1009  * valid page, but we will never take the slot containing
1010  * latest_page_number, even if it appears least recently used. We
1011  * will select a slot that is already I/O busy only if there is no
1012  * other choice: a read-busy slot will not be least recently used once
1013  * the read finishes, and waiting for an I/O on a write-busy slot is
1014  * inferior to just picking some other slot. Testing shows the slot
1015  * we pick instead will often be clean, allowing us to begin a read at
1016  * once.
1017  *
1018  * Normally the page_lru_count values will all be different and so
1019  * there will be a well-defined LRU page. But since we allow
1020  * concurrent execution of SlruRecentlyUsed() within
1021  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1022  * acquire the same lru_count values. In that case we break ties by
1023  * choosing the furthest-back page.
1024  *
1025  * Notice that this next line forcibly advances cur_lru_count to a
1026  * value that is certainly beyond any value that will be in the
1027  * page_lru_count array after the loop finishes. This ensures that
1028  * the next execution of SlruRecentlyUsed will mark the page newly
1029  * used, even if it's for a page that has the current counter value.
1030  * That gets us back on the path to having good data when there are
1031  * multiple pages with the same lru_count.
1032  */
1033  cur_count = (shared->cur_lru_count)++;
1034  for (slotno = 0; slotno < shared->num_slots; slotno++)
1035  {
1036  int this_delta;
1037  int this_page_number;
1038 
1039  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1040  return slotno;
1041  this_delta = cur_count - shared->page_lru_count[slotno];
1042  if (this_delta < 0)
1043  {
1044  /*
1045  * Clean up in case shared updates have caused cur_count
1046  * increments to get "lost". We back off the page counts,
1047  * rather than trying to increase cur_count, to avoid any
1048  * question of infinite loops or failure in the presence of
1049  * wrapped-around counts.
1050  */
1051  shared->page_lru_count[slotno] = cur_count;
1052  this_delta = 0;
1053  }
1054  this_page_number = shared->page_number[slotno];
1055  if (this_page_number == shared->latest_page_number)
1056  continue;
1057  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1058  {
1059  if (this_delta > best_valid_delta ||
1060  (this_delta == best_valid_delta &&
1061  ctl->PagePrecedes(this_page_number,
1062  best_valid_page_number)))
1063  {
1064  bestvalidslot = slotno;
1065  best_valid_delta = this_delta;
1066  best_valid_page_number = this_page_number;
1067  }
1068  }
1069  else
1070  {
1071  if (this_delta > best_invalid_delta ||
1072  (this_delta == best_invalid_delta &&
1073  ctl->PagePrecedes(this_page_number,
1074  best_invalid_page_number)))
1075  {
1076  bestinvalidslot = slotno;
1077  best_invalid_delta = this_delta;
1078  best_invalid_page_number = this_page_number;
1079  }
1080  }
1081  }
1082 
1083  /*
1084  * If all pages (except possibly the latest one) are I/O busy, we'll
1085  * have to wait for an I/O to complete and then retry. In that
1086  * unhappy case, we choose to wait for the I/O on the least recently
1087  * used slot, on the assumption that it was likely initiated first of
1088  * all the I/Os in progress and may therefore finish first.
1089  */
1090  if (best_valid_delta < 0)
1091  {
1092  SimpleLruWaitIO(ctl, bestinvalidslot);
1093  continue;
1094  }
1095 
1096  /*
1097  * If the selected page is clean, we're set.
1098  */
1099  if (!shared->page_dirty[bestvalidslot])
1100  return bestvalidslot;
1101 
1102  /*
1103  * Write the page.
1104  */
1105  SlruInternalWritePage(ctl, bestvalidslot, NULL);
1106 
1107  /*
1108  * Now loop back and try again. This is the easiest way of dealing
1109  * with corner cases such as the victim page being re-dirtied while we
1110  * wrote it.
1111  */
1112  }
1113 }
1114 
1115 /*
1116  * Flush dirty pages to disk during checkpoint or database shutdown
1117  */
1118 void
1119 SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
1120 {
1121  SlruShared shared = ctl->shared;
1122  SlruFlushData fdata;
1123  int slotno;
1124  int pageno = 0;
1125  int i;
1126  bool ok;
1127 
1128  /*
1129  * Find and write dirty pages
1130  */
1131  fdata.num_files = 0;
1132 
1134 
1135  for (slotno = 0; slotno < shared->num_slots; slotno++)
1136  {
1137  SlruInternalWritePage(ctl, slotno, &fdata);
1138 
1139  /*
1140  * In some places (e.g. checkpoints), we cannot assert that the slot
1141  * is clean now, since another process might have re-dirtied it
1142  * already. That's okay.
1143  */
1144  Assert(allow_redirtied ||
1145  shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1146  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1147  !shared->page_dirty[slotno]));
1148  }
1149 
1150  LWLockRelease(shared->ControlLock);
1151 
1152  /*
1153  * Now fsync and close any files that were open
1154  */
1155  ok = true;
1156  for (i = 0; i < fdata.num_files; i++)
1157  {
1159  if (ctl->do_fsync && pg_fsync(fdata.fd[i]) != 0)
1160  {
1162  slru_errno = errno;
1163  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1164  ok = false;
1165  }
1167 
1168  if (CloseTransientFile(fdata.fd[i]) != 0)
1169  {
1171  slru_errno = errno;
1172  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1173  ok = false;
1174  }
1175  }
1176  if (!ok)
1178 }
1179 
1180 /*
1181  * Remove all segments before the one holding the passed page number
1182  */
1183 void
1184 SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
1185 {
1186  SlruShared shared = ctl->shared;
1187  int slotno;
1188 
1189  /*
1190  * The cutoff point is the start of the segment containing cutoffPage.
1191  */
1192  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1193 
1194  /*
1195  * Scan shared memory and remove any pages preceding the cutoff page, to
1196  * ensure we won't rewrite them later. (Since this is normally called in
1197  * or just after a checkpoint, any dirty pages should have been flushed
1198  * already ... we're just being extra careful here.)
1199  */
1201 
1202 restart:;
1203 
1204  /*
1205  * While we are holding the lock, make an important safety check: the
1206  * planned cutoff point must be <= the current endpoint page. Otherwise we
1207  * have already wrapped around, and proceeding with the truncation would
1208  * risk removing the current segment.
1209  */
1210  if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
1211  {
1212  LWLockRelease(shared->ControlLock);
1213  ereport(LOG,
1214  (errmsg("could not truncate directory \"%s\": apparent wraparound",
1215  ctl->Dir)));
1216  return;
1217  }
1218 
1219  for (slotno = 0; slotno < shared->num_slots; slotno++)
1220  {
1221  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1222  continue;
1223  if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1224  continue;
1225 
1226  /*
1227  * If page is clean, just change state to EMPTY (expected case).
1228  */
1229  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1230  !shared->page_dirty[slotno])
1231  {
1232  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1233  continue;
1234  }
1235 
1236  /*
1237  * Hmm, we have (or may have) I/O operations acting on the page, so
1238  * we've got to wait for them to finish and then start again. This is
1239  * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1240  * wouldn't it be OK to just discard it without writing it? For now,
1241  * keep the logic the same as it was.)
1242  */
1243  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1244  SlruInternalWritePage(ctl, slotno, NULL);
1245  else
1246  SimpleLruWaitIO(ctl, slotno);
1247  goto restart;
1248  }
1249 
1250  LWLockRelease(shared->ControlLock);
1251 
1252  /* Now we can remove the old segment(s) */
1253  (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1254 }
1255 
1256 /*
1257  * Delete an individual SLRU segment, identified by the filename.
1258  *
1259  * NB: This does not touch the SLRU buffers themselves, callers have to ensure
1260  * they either can't yet contain anything, or have already been cleaned out.
1261  */
1262 static void
1264 {
1265  char path[MAXPGPATH];
1266 
1267  snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
1268  ereport(DEBUG2,
1269  (errmsg("removing file \"%s\"", path)));
1270  unlink(path);
1271 }
1272 
1273 /*
1274  * Delete an individual SLRU segment, identified by the segment number.
1275  */
1276 void
1278 {
1279  SlruShared shared = ctl->shared;
1280  int slotno;
1281  char path[MAXPGPATH];
1282  bool did_write;
1283 
1284  /* Clean out any possibly existing references to the segment. */
1286 restart:
1287  did_write = false;
1288  for (slotno = 0; slotno < shared->num_slots; slotno++)
1289  {
1290  int pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1291 
1292  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1293  continue;
1294 
1295  /* not the segment we're looking for */
1296  if (pagesegno != segno)
1297  continue;
1298 
1299  /* If page is clean, just change state to EMPTY (expected case). */
1300  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1301  !shared->page_dirty[slotno])
1302  {
1303  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1304  continue;
1305  }
1306 
1307  /* Same logic as SimpleLruTruncate() */
1308  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1309  SlruInternalWritePage(ctl, slotno, NULL);
1310  else
1311  SimpleLruWaitIO(ctl, slotno);
1312 
1313  did_write = true;
1314  }
1315 
1316  /*
1317  * Be extra careful and re-check. The IO functions release the control
1318  * lock, so new pages could have been read in.
1319  */
1320  if (did_write)
1321  goto restart;
1322 
1323  snprintf(path, MAXPGPATH, "%s/%04X", ctl->Dir, segno);
1324  ereport(DEBUG2,
1325  (errmsg("removing file \"%s\"", path)));
1326  unlink(path);
1327 
1328  LWLockRelease(shared->ControlLock);
1329 }
1330 
1331 /*
1332  * SlruScanDirectory callback
1333  * This callback reports true if there's any segment prior to the one
1334  * containing the page passed as "data".
1335  */
1336 bool
1337 SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
1338 {
1339  int cutoffPage = *(int *) data;
1340 
1341  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1342 
1343  if (ctl->PagePrecedes(segpage, cutoffPage))
1344  return true; /* found one; don't iterate any more */
1345 
1346  return false; /* keep going */
1347 }
1348 
1349 /*
1350  * SlruScanDirectory callback.
1351  * This callback deletes segments prior to the one passed in as "data".
1352  */
1353 static bool
1354 SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
1355 {
1356  int cutoffPage = *(int *) data;
1357 
1358  if (ctl->PagePrecedes(segpage, cutoffPage))
1359  SlruInternalDeleteSegment(ctl, filename);
1360 
1361  return false; /* keep going */
1362 }
1363 
1364 /*
1365  * SlruScanDirectory callback.
1366  * This callback deletes all segments.
1367  */
1368 bool
1369 SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
1370 {
1371  SlruInternalDeleteSegment(ctl, filename);
1372 
1373  return false; /* keep going */
1374 }
1375 
1376 /*
1377  * Scan the SimpleLru directory and apply a callback to each file found in it.
1378  *
1379  * If the callback returns true, the scan is stopped. The last return value
1380  * from the callback is returned.
1381  *
1382  * The callback receives the following arguments: 1. the SlruCtl struct for the
1383  * slru being truncated; 2. the filename being considered; 3. the page number
1384  * for the first page of that file; 4. a pointer to the opaque data given to us
1385  * by the caller.
1386  *
1387  * Note that the ordering in which the directory is scanned is not guaranteed.
1388  *
1389  * Note that no locking is applied.
1390  */
1391 bool
1393 {
1394  bool retval = false;
1395  DIR *cldir;
1396  struct dirent *clde;
1397  int segno;
1398  int segpage;
1399 
1400  cldir = AllocateDir(ctl->Dir);
1401  while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1402  {
1403  size_t len;
1404 
1405  len = strlen(clde->d_name);
1406 
1407  if ((len == 4 || len == 5 || len == 6) &&
1408  strspn(clde->d_name, "0123456789ABCDEF") == len)
1409  {
1410  segno = (int) strtol(clde->d_name, NULL, 16);
1411  segpage = segno * SLRU_PAGES_PER_SEGMENT;
1412 
1413  elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1414  ctl->Dir, clde->d_name);
1415  retval = callback(ctl, clde->d_name, segpage, data);
1416  if (retval)
1417  break;
1418  }
1419  }
1420  FreeDir(cldir);
1421 
1422  return retval;
1423 }
LWLock * ControlLock
Definition: slru.h:57
int * page_number
Definition: slru.h:69
Definition: lwlock.h:32
SlruPageStatus
Definition: slru.h:44
uint32 TransactionId
Definition: c.h:514
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:506
bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1369
#define write(a, b, c)
Definition: win32.h:14
int latest_page_number
Definition: slru.h:99
char ** page_buffer
Definition: slru.h:66
void SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
Definition: slru.c:1184
bool InRecovery
Definition: xlog.c:200
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
SlruErrorCause
Definition: slru.c:112
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
Definition: slru.c:724
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define MemSet(start, val, len)
Definition: c.h:962
static SlruErrorCause slru_errcause
Definition: slru.c:122
int cur_lru_count
Definition: slru.h:92
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:303
int lsn_groups_per_page
Definition: slru.h:81
int segno[MAX_FLUSH_BUFFERS]
Definition: slru.c:78
#define LOG
Definition: elog.h:26
Definition: dirent.h:9
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:144
void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
Definition: slru.c:1119
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2805
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:897
#define PG_BINARY
Definition: c.h:1222
struct SlruFlushData * SlruFlush
Definition: slru.c:81
void LWLockRegisterTranche(int tranche_id, const char *tranche_name)
Definition: lwlock.c:603
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1726
static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename)
Definition: slru.c:1263
SlruPageStatus * page_status
Definition: slru.h:67
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2292
char lwlock_tranche_name[SLRU_MAX_NAME_LENGTH]
Definition: slru.h:103
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
Definition: slru.c:374
#define MAXPGPATH
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static XLogRecPtr endpos
Definition: pg_receivewal.c:46
#define DEBUG2
Definition: elog.h:24
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1294
#define MAX_FLUSH_BUFFERS
Definition: slru.c:72
bool IsUnderPostmaster
Definition: globals.c:109
LWLockPadded * buffer_locks
Definition: slru.h:104
int errdetail(const char *fmt,...)
Definition: elog.c:955
int errcode_for_file_access(void)
Definition: elog.c:631
#define InvalidTransactionId
Definition: transam.h:31
XLogRecPtr * group_lsn
Definition: slru.h:80
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2503
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
Definition: slru.c:589
void SimpleLruWritePage(SlruCtl ctl, int slotno)
Definition: slru.c:577
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1342
#define ereport(elevel, rest)
Definition: elog.h:141
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:320
bool do_fsync
Definition: slru.h:121
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:678
int CloseTransientFile(int fd)
Definition: fd.c:2469
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
bool(* SlruScanCallback)(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.h:153
SlruSharedData * SlruShared
Definition: slru.h:107
bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1337
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1354
#define SlruFileName(ctl, path, seg)
Definition: slru.c:62
int data_sync_elevel(int elevel)
Definition: fd.c:3519
char Dir[64]
Definition: slru.h:134
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:466
#define SLRU_MAX_NAME_LENGTH
Definition: slru.h:36
int num_files
Definition: slru.c:76
LWLock lock
Definition: lwlock.h:79
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
Definition: slru.c:644
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int * page_lru_count
Definition: slru.h:70
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:739
#define StrNCpy(dst, src, len)
Definition: c.h:935
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1392
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2569
size_t Size
Definition: c.h:467
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1318
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1122
#define MAXALIGN(LEN)
Definition: c.h:692
int num_slots
Definition: slru.h:60
const char * name
Definition: encode.c:521
struct SlruFlushData SlruFlushData
static int SlruSelectLRUPage(SlruCtl ctl, int pageno)
Definition: slru.c:982
static int slru_errno
Definition: slru.c:123
static char * filename
Definition: pg_dumpall.c:90
int errmsg(const char *fmt,...)
Definition: elog.c:822
bool * page_dirty
Definition: slru.h:68
#define elog(elevel,...)
Definition: elog.h:228
int i
SlruShared shared
Definition: slru.h:115
#define BUFFERALIGN(LEN)
Definition: c.h:694
int lwlock_tranche_id
Definition: slru.h:102
bool(* PagePrecedes)(int, int)
Definition: slru.h:128
void SlruDeleteSegment(SlruCtl ctl, int segno)
Definition: slru.c:1277
int pg_fsync(int fd)
Definition: fd.c:330
char d_name[MAX_PATH]
Definition: dirent.h:14
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:33
#define snprintf
Definition: port.h:192
int SimpleLruZeroPage(SlruCtl ctl, int pageno)
Definition: slru.c:262
#define read(a, b, c)
Definition: win32.h:13
int FreeDir(DIR *dir)
Definition: fd.c:2621
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id)
Definition: slru.c:164
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:102
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:77