PostgreSQL Source Code  git master
slru.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * slru.c
4  * Simple LRU buffering for wrap-around-able permanent metadata
5  *
6  * This module is used to maintain various pieces of transaction status
7  * indexed by TransactionId (such as commit status, parent transaction ID,
8  * commit timestamp), as well as storage for multixacts, serializable
9  * isolation locks and NOTIFY traffic. Extensions can define their own
10  * SLRUs, too.
11  *
12  * Under ordinary circumstances we expect that write traffic will occur
13  * mostly to the latest page (and to the just-prior page, soon after a
14  * page transition). Read traffic will probably touch a larger span of
15  * pages, but a relatively small number of buffers should be sufficient.
16  *
17  * We use a simple least-recently-used scheme to manage a pool of shared
18  * page buffers, split in banks by the lowest bits of the page number, and
19  * the management algorithm only processes the bank to which the desired
20  * page belongs, so a linear search is sufficient; there's no need for a
21  * hashtable or anything fancy. The algorithm is straight LRU except that
22  * we will never swap out the latest page (since we know it's going to be
23  * hit again eventually).
24  *
25  * We use per-bank control LWLocks to protect the shared data structures,
26  * plus per-buffer LWLocks that synchronize I/O for each buffer. The
27  * bank's control lock must be held to examine or modify any of the bank's
28  * shared state. A process that is reading in or writing out a page
29  * buffer does not hold the control lock, only the per-buffer lock for the
30  * buffer it is working on. One exception is latest_page_number, which is
31  * read and written using atomic ops.
32  *
33  * "Holding the bank control lock" means exclusive lock in all cases
34  * except for SimpleLruReadPage_ReadOnly(); see comments for
35  * SlruRecentlyUsed() for the implications of that.
36  *
37  * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
38  * before releasing the control lock. The per-buffer lock is released after
39  * completing the I/O, re-acquiring the control lock, and updating the shared
40  * state. (Deadlock is not possible here, because we never try to initiate
41  * I/O when someone else is already doing I/O on the same buffer.)
42  * To wait for I/O to complete, release the control lock, acquire the
43  * per-buffer lock in shared mode, immediately release the per-buffer lock,
44  * reacquire the control lock, and then recheck state (since arbitrary things
45  * could have happened while we didn't have the lock).
46  *
47  * As with the regular buffer manager, it is possible for another process
48  * to re-dirty a page that is currently being written out. This is handled
49  * by re-setting the page's page_dirty flag.
50  *
51  *
52  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
53  * Portions Copyright (c) 1994, Regents of the University of California
54  *
55  * src/backend/access/transam/slru.c
56  *
57  *-------------------------------------------------------------------------
58  */
59 #include "postgres.h"
60 
61 #include <fcntl.h>
62 #include <sys/stat.h>
63 #include <unistd.h>
64 
65 #include "access/slru.h"
66 #include "access/transam.h"
67 #include "access/xlog.h"
68 #include "access/xlogutils.h"
69 #include "miscadmin.h"
70 #include "pgstat.h"
71 #include "storage/fd.h"
72 #include "storage/shmem.h"
73 #include "utils/guc_hooks.h"
74 
75 static inline int
76 SlruFileName(SlruCtl ctl, char *path, int64 segno)
77 {
78  if (ctl->long_segment_names)
79  {
80  /*
81  * We could use 16 characters here but the disadvantage would be that
82  * the SLRU segments will be hard to distinguish from WAL segments.
83  *
84  * For this reason we use 15 characters. It is enough but also means
85  * that in the future we can't decrease SLRU_PAGES_PER_SEGMENT easily.
86  */
87  Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
88  return snprintf(path, MAXPGPATH, "%s/%015llX", ctl->Dir,
89  (long long) segno);
90  }
91  else
92  {
93  /*
94  * Despite the fact that %04X format string is used up to 24 bit
95  * integers are allowed. See SlruCorrectSegmentFilenameLength()
96  */
97  Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
98  return snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir,
99  (unsigned int) segno);
100  }
101 }
102 
103 /*
104  * During SimpleLruWriteAll(), we will usually not need to write more than one
105  * or two physical files, but we may need to write several pages per file. We
106  * can consolidate the I/O requests by leaving files open until control returns
107  * to SimpleLruWriteAll(). This data structure remembers which files are open.
108  */
109 #define MAX_WRITEALL_BUFFERS 16
110 
111 typedef struct SlruWriteAllData
112 {
113  int num_files; /* # files actually open */
114  int fd[MAX_WRITEALL_BUFFERS]; /* their FD's */
115  int64 segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */
117 
119 
120 
121 /*
122  * Bank size for the slot array. Pages are assigned a bank according to their
123  * page number, with each bank being this size. We want a power of 2 so that
124  * we can determine the bank number for a page with just bit shifting; we also
125  * want to keep the bank size small so that LRU victim search is fast. 16
126  * buffers per bank seems a good number.
127  */
128 #define SLRU_BANK_BITSHIFT 4
129 #define SLRU_BANK_SIZE (1 << SLRU_BANK_BITSHIFT)
130 
131 /*
132  * Macro to get the bank number to which the slot belongs.
133  */
134 #define SlotGetBankNumber(slotno) ((slotno) >> SLRU_BANK_BITSHIFT)
135 
136 
137 /*
138  * Populate a file tag describing a segment file. We only use the segment
139  * number, since we can derive everything else we need by having separate
140  * sync handler functions for clog, multixact etc.
141  */
142 #define INIT_SLRUFILETAG(a,xx_handler,xx_segno) \
143 ( \
144  memset(&(a), 0, sizeof(FileTag)), \
145  (a).handler = (xx_handler), \
146  (a).segno = (xx_segno) \
147 )
148 
149 /* Saved info for SlruReportIOError */
150 typedef enum
151 {
159 
161 static int slru_errno;
162 
163 
164 static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
165 static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
166 static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
167 static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno);
168 static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno,
169  SlruWriteAll fdata);
170 static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid);
171 static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno);
172 
173 static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
174  int64 segpage, void *data);
175 static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno);
176 static inline void SlruRecentlyUsed(SlruShared shared, int slotno);
177 
178 
179 /*
180  * Initialization of shared memory
181  */
182 
183 Size
184 SimpleLruShmemSize(int nslots, int nlsns)
185 {
186  int nbanks = nslots / SLRU_BANK_SIZE;
187  Size sz;
188 
189  Assert(nslots <= SLRU_MAX_ALLOWED_BUFFERS);
190  Assert(nslots % SLRU_BANK_SIZE == 0);
191 
192  /* we assume nslots isn't so large as to risk overflow */
193  sz = MAXALIGN(sizeof(SlruSharedData));
194  sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
195  sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
196  sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
197  sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
198  sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
199  sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
200  sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */
201  sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */
202 
203  if (nlsns > 0)
204  sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
205 
206  return BUFFERALIGN(sz) + BLCKSZ * nslots;
207 }
208 
209 /*
210  * Determine a number of SLRU buffers to use.
211  *
212  * We simply divide shared_buffers by the divisor given and cap
213  * that at the maximum given; but always at least SLRU_BANK_SIZE.
214  * Round down to the nearest multiple of SLRU_BANK_SIZE.
215  */
216 int
217 SimpleLruAutotuneBuffers(int divisor, int max)
218 {
219  return Min(max - (max % SLRU_BANK_SIZE),
221  NBuffers / divisor - (NBuffers / divisor) % SLRU_BANK_SIZE));
222 }
223 
224 /*
225  * Initialize, or attach to, a simple LRU cache in shared memory.
226  *
227  * ctl: address of local (unshared) control structure.
228  * name: name of SLRU. (This is user-visible, pick with care!)
229  * nslots: number of page slots to use.
230  * nlsns: number of LSN groups per page (set to zero if not relevant).
231  * subdir: PGDATA-relative subdirectory that will contain the files.
232  * buffer_tranche_id: tranche ID to use for the SLRU's per-buffer LWLocks.
233  * bank_tranche_id: tranche ID to use for the bank LWLocks.
234  * sync_handler: which set of functions to use to handle sync requests
235  */
236 void
237 SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
238  const char *subdir, int buffer_tranche_id, int bank_tranche_id,
239  SyncRequestHandler sync_handler, bool long_segment_names)
240 {
241  SlruShared shared;
242  bool found;
243  int nbanks = nslots / SLRU_BANK_SIZE;
244 
245  Assert(nslots <= SLRU_MAX_ALLOWED_BUFFERS);
246 
247  shared = (SlruShared) ShmemInitStruct(name,
248  SimpleLruShmemSize(nslots, nlsns),
249  &found);
250 
251  if (!IsUnderPostmaster)
252  {
253  /* Initialize locks and shared memory area */
254  char *ptr;
255  Size offset;
256 
257  Assert(!found);
258 
259  memset(shared, 0, sizeof(SlruSharedData));
260 
261  shared->num_slots = nslots;
262  shared->lsn_groups_per_page = nlsns;
263 
265 
267 
268  ptr = (char *) shared;
269  offset = MAXALIGN(sizeof(SlruSharedData));
270  shared->page_buffer = (char **) (ptr + offset);
271  offset += MAXALIGN(nslots * sizeof(char *));
272  shared->page_status = (SlruPageStatus *) (ptr + offset);
273  offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
274  shared->page_dirty = (bool *) (ptr + offset);
275  offset += MAXALIGN(nslots * sizeof(bool));
276  shared->page_number = (int64 *) (ptr + offset);
277  offset += MAXALIGN(nslots * sizeof(int64));
278  shared->page_lru_count = (int *) (ptr + offset);
279  offset += MAXALIGN(nslots * sizeof(int));
280 
281  /* Initialize LWLocks */
282  shared->buffer_locks = (LWLockPadded *) (ptr + offset);
283  offset += MAXALIGN(nslots * sizeof(LWLockPadded));
284  shared->bank_locks = (LWLockPadded *) (ptr + offset);
285  offset += MAXALIGN(nbanks * sizeof(LWLockPadded));
286  shared->bank_cur_lru_count = (int *) (ptr + offset);
287  offset += MAXALIGN(nbanks * sizeof(int));
288 
289  if (nlsns > 0)
290  {
291  shared->group_lsn = (XLogRecPtr *) (ptr + offset);
292  offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
293  }
294 
295  ptr += BUFFERALIGN(offset);
296  for (int slotno = 0; slotno < nslots; slotno++)
297  {
298  LWLockInitialize(&shared->buffer_locks[slotno].lock,
299  buffer_tranche_id);
300 
301  shared->page_buffer[slotno] = ptr;
302  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
303  shared->page_dirty[slotno] = false;
304  shared->page_lru_count[slotno] = 0;
305  ptr += BLCKSZ;
306  }
307 
308  /* Initialize the slot banks. */
309  for (int bankno = 0; bankno < nbanks; bankno++)
310  {
311  LWLockInitialize(&shared->bank_locks[bankno].lock, bank_tranche_id);
312  shared->bank_cur_lru_count[bankno] = 0;
313  }
314 
315  /* Should fit to estimated shmem size */
316  Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
317  }
318  else
319  {
320  Assert(found);
321  Assert(shared->num_slots == nslots);
322  }
323 
324  /*
325  * Initialize the unshared control struct, including directory path. We
326  * assume caller set PagePrecedes.
327  */
328  ctl->shared = shared;
329  ctl->sync_handler = sync_handler;
330  ctl->long_segment_names = long_segment_names;
331  ctl->bank_mask = (nslots / SLRU_BANK_SIZE) - 1;
332  strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
333 }
334 
335 /*
336  * Helper function for GUC check_hook to check whether slru buffers are in
337  * multiples of SLRU_BANK_SIZE.
338  */
339 bool
340 check_slru_buffers(const char *name, int *newval)
341 {
342  /* Valid values are multiples of SLRU_BANK_SIZE */
343  if (*newval % SLRU_BANK_SIZE == 0)
344  return true;
345 
346  GUC_check_errdetail("\"%s\" must be a multiple of %d", name,
348  return false;
349 }
350 
351 /*
352  * Initialize (or reinitialize) a page to zeroes.
353  *
354  * The page is not actually written, just set up in shared memory.
355  * The slot number of the new page is returned.
356  *
357  * Bank lock must be held at entry, and will be held at exit.
358  */
359 int
361 {
362  SlruShared shared = ctl->shared;
363  int slotno;
364 
366 
367  /* Find a suitable buffer slot for the page */
368  slotno = SlruSelectLRUPage(ctl, pageno);
369  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
370  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
371  !shared->page_dirty[slotno]) ||
372  shared->page_number[slotno] == pageno);
373 
374  /* Mark the slot as containing this page */
375  shared->page_number[slotno] = pageno;
376  shared->page_status[slotno] = SLRU_PAGE_VALID;
377  shared->page_dirty[slotno] = true;
378  SlruRecentlyUsed(shared, slotno);
379 
380  /* Set the buffer to zeroes */
381  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
382 
383  /* Set the LSNs for this new page to zero */
384  SimpleLruZeroLSNs(ctl, slotno);
385 
386  /*
387  * Assume this page is now the latest active page.
388  *
389  * Note that because both this routine and SlruSelectLRUPage run with
390  * ControlLock held, it is not possible for this to be zeroing a page that
391  * SlruSelectLRUPage is going to evict simultaneously. Therefore, there's
392  * no memory barrier here.
393  */
394  pg_atomic_write_u64(&shared->latest_page_number, pageno);
395 
396  /* update the stats counter of zeroed pages */
398 
399  return slotno;
400 }
401 
402 /*
403  * Zero all the LSNs we store for this slru page.
404  *
405  * This should be called each time we create a new page, and each time we read
406  * in a page from disk into an existing buffer. (Such an old page cannot
407  * have any interesting LSNs, since we'd have flushed them before writing
408  * the page in the first place.)
409  *
410  * This assumes that InvalidXLogRecPtr is bitwise-all-0.
411  */
412 static void
414 {
415  SlruShared shared = ctl->shared;
416 
417  if (shared->lsn_groups_per_page > 0)
418  MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
419  shared->lsn_groups_per_page * sizeof(XLogRecPtr));
420 }
421 
422 /*
423  * Wait for any active I/O on a page slot to finish. (This does not
424  * guarantee that new I/O hasn't been started before we return, though.
425  * In fact the slot might not even contain the same page anymore.)
426  *
427  * Bank lock must be held at entry, and will be held at exit.
428  */
429 static void
431 {
432  SlruShared shared = ctl->shared;
433  int bankno = SlotGetBankNumber(slotno);
434 
435  Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
436 
437  /* See notes at top of file */
438  LWLockRelease(&shared->bank_locks[bankno].lock);
439  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
440  LWLockRelease(&shared->buffer_locks[slotno].lock);
441  LWLockAcquire(&shared->bank_locks[bankno].lock, LW_EXCLUSIVE);
442 
443  /*
444  * If the slot is still in an io-in-progress state, then either someone
445  * already started a new I/O on the slot, or a previous I/O failed and
446  * neglected to reset the page state. That shouldn't happen, really, but
447  * it seems worth a few extra cycles to check and recover from it. We can
448  * cheaply test for failure by seeing if the buffer lock is still held (we
449  * assume that transaction abort would release the lock).
450  */
451  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
452  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
453  {
454  if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
455  {
456  /* indeed, the I/O must have failed */
457  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
458  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
459  else /* write_in_progress */
460  {
461  shared->page_status[slotno] = SLRU_PAGE_VALID;
462  shared->page_dirty[slotno] = true;
463  }
464  LWLockRelease(&shared->buffer_locks[slotno].lock);
465  }
466  }
467 }
468 
469 /*
470  * Find a page in a shared buffer, reading it in if necessary.
471  * The page number must correspond to an already-initialized page.
472  *
473  * If write_ok is true then it is OK to return a page that is in
474  * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
475  * that modification of the page is safe. If write_ok is false then we
476  * will not return the page until it is not undergoing active I/O.
477  *
478  * The passed-in xid is used only for error reporting, and may be
479  * InvalidTransactionId if no specific xid is associated with the action.
480  *
481  * Return value is the shared-buffer slot number now holding the page.
482  * The buffer's LRU access info is updated.
483  *
484  * The correct bank lock must be held at entry, and will be held at exit.
485  */
486 int
487 SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok,
488  TransactionId xid)
489 {
490  SlruShared shared = ctl->shared;
491  LWLock *banklock = SimpleLruGetBankLock(ctl, pageno);
492 
494 
495  /* Outer loop handles restart if we must wait for someone else's I/O */
496  for (;;)
497  {
498  int slotno;
499  bool ok;
500 
501  /* See if page already is in memory; if not, pick victim slot */
502  slotno = SlruSelectLRUPage(ctl, pageno);
503 
504  /* Did we find the page in memory? */
505  if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
506  shared->page_number[slotno] == pageno)
507  {
508  /*
509  * If page is still being read in, we must wait for I/O. Likewise
510  * if the page is being written and the caller said that's not OK.
511  */
512  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
513  (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
514  !write_ok))
515  {
516  SimpleLruWaitIO(ctl, slotno);
517  /* Now we must recheck state from the top */
518  continue;
519  }
520  /* Otherwise, it's ready to use */
521  SlruRecentlyUsed(shared, slotno);
522 
523  /* update the stats counter of pages found in the SLRU */
525 
526  return slotno;
527  }
528 
529  /* We found no match; assert we selected a freeable slot */
530  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
531  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
532  !shared->page_dirty[slotno]));
533 
534  /* Mark the slot read-busy */
535  shared->page_number[slotno] = pageno;
536  shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
537  shared->page_dirty[slotno] = false;
538 
539  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
540  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
541 
542  /* Release bank lock while doing I/O */
543  LWLockRelease(banklock);
544 
545  /* Do the read */
546  ok = SlruPhysicalReadPage(ctl, pageno, slotno);
547 
548  /* Set the LSNs for this newly read-in page to zero */
549  SimpleLruZeroLSNs(ctl, slotno);
550 
551  /* Re-acquire bank control lock and update page state */
552  LWLockAcquire(banklock, LW_EXCLUSIVE);
553 
554  Assert(shared->page_number[slotno] == pageno &&
555  shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
556  !shared->page_dirty[slotno]);
557 
558  shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
559 
560  LWLockRelease(&shared->buffer_locks[slotno].lock);
561 
562  /* Now it's okay to ereport if we failed */
563  if (!ok)
564  SlruReportIOError(ctl, pageno, xid);
565 
566  SlruRecentlyUsed(shared, slotno);
567 
568  /* update the stats counter of pages not found in SLRU */
570 
571  return slotno;
572  }
573 }
574 
575 /*
576  * Find a page in a shared buffer, reading it in if necessary.
577  * The page number must correspond to an already-initialized page.
578  * The caller must intend only read-only access to the page.
579  *
580  * The passed-in xid is used only for error reporting, and may be
581  * InvalidTransactionId if no specific xid is associated with the action.
582  *
583  * Return value is the shared-buffer slot number now holding the page.
584  * The buffer's LRU access info is updated.
585  *
586  * Bank control lock must NOT be held at entry, but will be held at exit.
587  * It is unspecified whether the lock will be shared or exclusive.
588  */
589 int
591 {
592  SlruShared shared = ctl->shared;
593  LWLock *banklock = SimpleLruGetBankLock(ctl, pageno);
594  int bankno = pageno & ctl->bank_mask;
595  int bankstart = bankno * SLRU_BANK_SIZE;
596  int bankend = bankstart + SLRU_BANK_SIZE;
597 
598  /* Try to find the page while holding only shared lock */
599  LWLockAcquire(banklock, LW_SHARED);
600 
601  /* See if page is already in a buffer */
602  for (int slotno = bankstart; slotno < bankend; slotno++)
603  {
604  if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
605  shared->page_number[slotno] == pageno &&
606  shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
607  {
608  /* See comments for SlruRecentlyUsed macro */
609  SlruRecentlyUsed(shared, slotno);
610 
611  /* update the stats counter of pages found in the SLRU */
613 
614  return slotno;
615  }
616  }
617 
618  /* No luck, so switch to normal exclusive lock and do regular read */
619  LWLockRelease(banklock);
620  LWLockAcquire(banklock, LW_EXCLUSIVE);
621 
622  return SimpleLruReadPage(ctl, pageno, true, xid);
623 }
624 
625 /*
626  * Write a page from a shared buffer, if necessary.
627  * Does nothing if the specified slot is not dirty.
628  *
629  * NOTE: only one write attempt is made here. Hence, it is possible that
630  * the page is still dirty at exit (if someone else re-dirtied it during
631  * the write). However, we *do* attempt a fresh write even if the page
632  * is already being written; this is for checkpoints.
633  *
634  * Bank lock must be held at entry, and will be held at exit.
635  */
636 static void
638 {
639  SlruShared shared = ctl->shared;
640  int64 pageno = shared->page_number[slotno];
641  int bankno = SlotGetBankNumber(slotno);
642  bool ok;
643 
644  Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
646 
647  /* If a write is in progress, wait for it to finish */
648  while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
649  shared->page_number[slotno] == pageno)
650  {
651  SimpleLruWaitIO(ctl, slotno);
652  }
653 
654  /*
655  * Do nothing if page is not dirty, or if buffer no longer contains the
656  * same page we were called for.
657  */
658  if (!shared->page_dirty[slotno] ||
659  shared->page_status[slotno] != SLRU_PAGE_VALID ||
660  shared->page_number[slotno] != pageno)
661  return;
662 
663  /*
664  * Mark the slot write-busy, and clear the dirtybit. After this point, a
665  * transaction status update on this page will mark it dirty again.
666  */
667  shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
668  shared->page_dirty[slotno] = false;
669 
670  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
671  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
672 
673  /* Release bank lock while doing I/O */
674  LWLockRelease(&shared->bank_locks[bankno].lock);
675 
676  /* Do the write */
677  ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
678 
679  /* If we failed, and we're in a flush, better close the files */
680  if (!ok && fdata)
681  {
682  for (int i = 0; i < fdata->num_files; i++)
683  CloseTransientFile(fdata->fd[i]);
684  }
685 
686  /* Re-acquire bank lock and update page state */
687  LWLockAcquire(&shared->bank_locks[bankno].lock, LW_EXCLUSIVE);
688 
689  Assert(shared->page_number[slotno] == pageno &&
690  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
691 
692  /* If we failed to write, mark the page dirty again */
693  if (!ok)
694  shared->page_dirty[slotno] = true;
695 
696  shared->page_status[slotno] = SLRU_PAGE_VALID;
697 
698  LWLockRelease(&shared->buffer_locks[slotno].lock);
699 
700  /* Now it's okay to ereport if we failed */
701  if (!ok)
703 
704  /* If part of a checkpoint, count this as a buffer written. */
705  if (fdata)
707 }
708 
709 /*
710  * Wrapper of SlruInternalWritePage, for external callers.
711  * fdata is always passed a NULL here.
712  */
713 void
715 {
716  Assert(ctl->shared->page_status[slotno] != SLRU_PAGE_EMPTY);
717 
718  SlruInternalWritePage(ctl, slotno, NULL);
719 }
720 
721 /*
722  * Return whether the given page exists on disk.
723  *
724  * A false return means that either the file does not exist, or that it's not
725  * large enough to contain the given page.
726  */
727 bool
729 {
730  int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
731  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
732  int offset = rpageno * BLCKSZ;
733  char path[MAXPGPATH];
734  int fd;
735  bool result;
736  off_t endpos;
737 
738  /* update the stats counter of checked pages */
739  pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
740 
741  SlruFileName(ctl, path, segno);
742 
743  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
744  if (fd < 0)
745  {
746  /* expected: file doesn't exist */
747  if (errno == ENOENT)
748  return false;
749 
750  /* report error normally */
752  slru_errno = errno;
753  SlruReportIOError(ctl, pageno, 0);
754  }
755 
756  if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
757  {
759  slru_errno = errno;
760  SlruReportIOError(ctl, pageno, 0);
761  }
762 
763  result = endpos >= (off_t) (offset + BLCKSZ);
764 
765  if (CloseTransientFile(fd) != 0)
766  {
768  slru_errno = errno;
769  return false;
770  }
771 
772  return result;
773 }
774 
775 /*
776  * Physical read of a (previously existing) page into a buffer slot
777  *
778  * On failure, we cannot just ereport(ERROR) since caller has put state in
779  * shared memory that must be undone. So, we return false and save enough
780  * info in static variables to let SlruReportIOError make the report.
781  *
782  * For now, assume it's not worth keeping a file pointer open across
783  * read/write operations. We could cache one virtual file pointer ...
784  */
785 static bool
786 SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
787 {
788  SlruShared shared = ctl->shared;
789  int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
790  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
791  off_t offset = rpageno * BLCKSZ;
792  char path[MAXPGPATH];
793  int fd;
794 
795  SlruFileName(ctl, path, segno);
796 
797  /*
798  * In a crash-and-restart situation, it's possible for us to receive
799  * commands to set the commit status of transactions whose bits are in
800  * already-truncated segments of the commit log (see notes in
801  * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
802  * where the file doesn't exist, and return zeroes instead.
803  */
804  fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
805  if (fd < 0)
806  {
807  if (errno != ENOENT || !InRecovery)
808  {
810  slru_errno = errno;
811  return false;
812  }
813 
814  ereport(LOG,
815  (errmsg("file \"%s\" doesn't exist, reading as zeroes",
816  path)));
817  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
818  return true;
819  }
820 
821  errno = 0;
822  pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
823  if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
824  {
827  slru_errno = errno;
829  return false;
830  }
832 
833  if (CloseTransientFile(fd) != 0)
834  {
836  slru_errno = errno;
837  return false;
838  }
839 
840  return true;
841 }
842 
843 /*
844  * Physical write of a page from a buffer slot
845  *
846  * On failure, we cannot just ereport(ERROR) since caller has put state in
847  * shared memory that must be undone. So, we return false and save enough
848  * info in static variables to let SlruReportIOError make the report.
849  *
850  * For now, assume it's not worth keeping a file pointer open across
851  * independent read/write operations. We do batch operations during
852  * SimpleLruWriteAll, though.
853  *
854  * fdata is NULL for a standalone write, pointer to open-file info during
855  * SimpleLruWriteAll.
856  */
857 static bool
858 SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
859 {
860  SlruShared shared = ctl->shared;
861  int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
862  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
863  off_t offset = rpageno * BLCKSZ;
864  char path[MAXPGPATH];
865  int fd = -1;
866 
867  /* update the stats counter of written pages */
869 
870  /*
871  * Honor the write-WAL-before-data rule, if appropriate, so that we do not
872  * write out data before associated WAL records. This is the same action
873  * performed during FlushBuffer() in the main buffer manager.
874  */
875  if (shared->group_lsn != NULL)
876  {
877  /*
878  * We must determine the largest async-commit LSN for the page. This
879  * is a bit tedious, but since this entire function is a slow path
880  * anyway, it seems better to do this here than to maintain a per-page
881  * LSN variable (which'd need an extra comparison in the
882  * transaction-commit path).
883  */
884  XLogRecPtr max_lsn;
885  int lsnindex;
886 
887  lsnindex = slotno * shared->lsn_groups_per_page;
888  max_lsn = shared->group_lsn[lsnindex++];
889  for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
890  {
891  XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
892 
893  if (max_lsn < this_lsn)
894  max_lsn = this_lsn;
895  }
896 
897  if (!XLogRecPtrIsInvalid(max_lsn))
898  {
899  /*
900  * As noted above, elog(ERROR) is not acceptable here, so if
901  * XLogFlush were to fail, we must PANIC. This isn't much of a
902  * restriction because XLogFlush is just about all critical
903  * section anyway, but let's make sure.
904  */
906  XLogFlush(max_lsn);
908  }
909  }
910 
911  /*
912  * During a SimpleLruWriteAll, we may already have the desired file open.
913  */
914  if (fdata)
915  {
916  for (int i = 0; i < fdata->num_files; i++)
917  {
918  if (fdata->segno[i] == segno)
919  {
920  fd = fdata->fd[i];
921  break;
922  }
923  }
924  }
925 
926  if (fd < 0)
927  {
928  /*
929  * If the file doesn't already exist, we should create it. It is
930  * possible for this to need to happen when writing a page that's not
931  * first in its segment; we assume the OS can cope with that. (Note:
932  * it might seem that it'd be okay to create files only when
933  * SimpleLruZeroPage is called for the first page of a segment.
934  * However, if after a crash and restart the REDO logic elects to
935  * replay the log from a checkpoint before the latest one, then it's
936  * possible that we will get commands to set transaction status of
937  * transactions that have already been truncated from the commit log.
938  * Easiest way to deal with that is to accept references to
939  * nonexistent files here and in SlruPhysicalReadPage.)
940  *
941  * Note: it is possible for more than one backend to be executing this
942  * code simultaneously for different pages of the same file. Hence,
943  * don't use O_EXCL or O_TRUNC or anything like that.
944  */
945  SlruFileName(ctl, path, segno);
946  fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
947  if (fd < 0)
948  {
950  slru_errno = errno;
951  return false;
952  }
953 
954  if (fdata)
955  {
956  if (fdata->num_files < MAX_WRITEALL_BUFFERS)
957  {
958  fdata->fd[fdata->num_files] = fd;
959  fdata->segno[fdata->num_files] = segno;
960  fdata->num_files++;
961  }
962  else
963  {
964  /*
965  * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
966  * fall back to treating it as a standalone write.
967  */
968  fdata = NULL;
969  }
970  }
971  }
972 
973  errno = 0;
974  pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
975  if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
976  {
978  /* if write didn't set errno, assume problem is no disk space */
979  if (errno == 0)
980  errno = ENOSPC;
982  slru_errno = errno;
983  if (!fdata)
985  return false;
986  }
988 
989  /* Queue up a sync request for the checkpointer. */
990  if (ctl->sync_handler != SYNC_HANDLER_NONE)
991  {
992  FileTag tag;
993 
994  INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
995  if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
996  {
997  /* No space to enqueue sync request. Do it synchronously. */
998  pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
999  if (pg_fsync(fd) != 0)
1000  {
1003  slru_errno = errno;
1005  return false;
1006  }
1008  }
1009  }
1010 
1011  /* Close file, unless part of flush request. */
1012  if (!fdata)
1013  {
1014  if (CloseTransientFile(fd) != 0)
1015  {
1017  slru_errno = errno;
1018  return false;
1019  }
1020  }
1021 
1022  return true;
1023 }
1024 
1025 /*
1026  * Issue the error message after failure of SlruPhysicalReadPage or
1027  * SlruPhysicalWritePage. Call this after cleaning up shared-memory state.
1028  */
1029 static void
1031 {
1032  int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
1033  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
1034  int offset = rpageno * BLCKSZ;
1035  char path[MAXPGPATH];
1036 
1037  SlruFileName(ctl, path, segno);
1038  errno = slru_errno;
1039  switch (slru_errcause)
1040  {
1041  case SLRU_OPEN_FAILED:
1042  ereport(ERROR,
1044  errmsg("could not access status of transaction %u", xid),
1045  errdetail("Could not open file \"%s\": %m.", path)));
1046  break;
1047  case SLRU_SEEK_FAILED:
1048  ereport(ERROR,
1050  errmsg("could not access status of transaction %u", xid),
1051  errdetail("Could not seek in file \"%s\" to offset %d: %m.",
1052  path, offset)));
1053  break;
1054  case SLRU_READ_FAILED:
1055  if (errno)
1056  ereport(ERROR,
1058  errmsg("could not access status of transaction %u", xid),
1059  errdetail("Could not read from file \"%s\" at offset %d: %m.",
1060  path, offset)));
1061  else
1062  ereport(ERROR,
1063  (errmsg("could not access status of transaction %u", xid),
1064  errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
1065  break;
1066  case SLRU_WRITE_FAILED:
1067  if (errno)
1068  ereport(ERROR,
1070  errmsg("could not access status of transaction %u", xid),
1071  errdetail("Could not write to file \"%s\" at offset %d: %m.",
1072  path, offset)));
1073  else
1074  ereport(ERROR,
1075  (errmsg("could not access status of transaction %u", xid),
1076  errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1077  path, offset)));
1078  break;
1079  case SLRU_FSYNC_FAILED:
1082  errmsg("could not access status of transaction %u", xid),
1083  errdetail("Could not fsync file \"%s\": %m.",
1084  path)));
1085  break;
1086  case SLRU_CLOSE_FAILED:
1087  ereport(ERROR,
1089  errmsg("could not access status of transaction %u", xid),
1090  errdetail("Could not close file \"%s\": %m.",
1091  path)));
1092  break;
1093  default:
1094  /* can't get here, we trust */
1095  elog(ERROR, "unrecognized SimpleLru error cause: %d",
1096  (int) slru_errcause);
1097  break;
1098  }
1099 }
1100 
1101 /*
1102  * Mark a buffer slot "most recently used".
1103  */
1104 static inline void
1105 SlruRecentlyUsed(SlruShared shared, int slotno)
1106 {
1107  int bankno = SlotGetBankNumber(slotno);
1108  int new_lru_count = shared->bank_cur_lru_count[bankno];
1109 
1110  Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
1111 
1112  /*
1113  * The reason for the if-test is that there are often many consecutive
1114  * accesses to the same page (particularly the latest page). By
1115  * suppressing useless increments of bank_cur_lru_count, we reduce the
1116  * probability that old pages' counts will "wrap around" and make them
1117  * appear recently used.
1118  *
1119  * We allow this code to be executed concurrently by multiple processes
1120  * within SimpleLruReadPage_ReadOnly(). As long as int reads and writes
1121  * are atomic, this should not cause any completely-bogus values to enter
1122  * the computation. However, it is possible for either bank_cur_lru_count
1123  * or individual page_lru_count entries to be "reset" to lower values than
1124  * they should have, in case a process is delayed while it executes this
1125  * function. With care in SlruSelectLRUPage(), this does little harm, and
1126  * in any case the absolute worst possible consequence is a nonoptimal
1127  * choice of page to evict. The gain from allowing concurrent reads of
1128  * SLRU pages seems worth it.
1129  */
1130  if (new_lru_count != shared->page_lru_count[slotno])
1131  {
1132  shared->bank_cur_lru_count[bankno] = ++new_lru_count;
1133  shared->page_lru_count[slotno] = new_lru_count;
1134  }
1135 }
1136 
1137 /*
1138  * Select the slot to re-use when we need a free slot for the given page.
1139  *
1140  * The target page number is passed not only because we need to know the
1141  * correct bank to use, but also because we need to consider the possibility
1142  * that some other process reads in the target page while we are doing I/O to
1143  * free a slot. Hence, check or recheck to see if any slot already holds the
1144  * target page, and return that slot if so. Thus, the returned slot is
1145  * *either* a slot already holding the pageno (could be any state except
1146  * EMPTY), *or* a freeable slot (state EMPTY or CLEAN).
1147  *
1148  * The correct bank lock must be held at entry, and will be held at exit.
1149  */
1150 static int
1152 {
1153  SlruShared shared = ctl->shared;
1154 
1155  /* Outer loop handles restart after I/O */
1156  for (;;)
1157  {
1158  int cur_count;
1159  int bestvalidslot = 0; /* keep compiler quiet */
1160  int best_valid_delta = -1;
1161  int64 best_valid_page_number = 0; /* keep compiler quiet */
1162  int bestinvalidslot = 0; /* keep compiler quiet */
1163  int best_invalid_delta = -1;
1164  int64 best_invalid_page_number = 0; /* keep compiler quiet */
1165  int bankno = pageno & ctl->bank_mask;
1166  int bankstart = bankno * SLRU_BANK_SIZE;
1167  int bankend = bankstart + SLRU_BANK_SIZE;
1168 
1170 
1171  /* See if page already has a buffer assigned */
1172  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1173  {
1174  if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
1175  shared->page_number[slotno] == pageno)
1176  return slotno;
1177  }
1178 
1179  /*
1180  * If we find any EMPTY slot, just select that one. Else choose a
1181  * victim page to replace. We normally take the least recently used
1182  * valid page, but we will never take the slot containing
1183  * latest_page_number, even if it appears least recently used. We
1184  * will select a slot that is already I/O busy only if there is no
1185  * other choice: a read-busy slot will not be least recently used once
1186  * the read finishes, and waiting for an I/O on a write-busy slot is
1187  * inferior to just picking some other slot. Testing shows the slot
1188  * we pick instead will often be clean, allowing us to begin a read at
1189  * once.
1190  *
1191  * Normally the page_lru_count values will all be different and so
1192  * there will be a well-defined LRU page. But since we allow
1193  * concurrent execution of SlruRecentlyUsed() within
1194  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1195  * acquire the same lru_count values. In that case we break ties by
1196  * choosing the furthest-back page.
1197  *
1198  * Notice that this next line forcibly advances cur_lru_count to a
1199  * value that is certainly beyond any value that will be in the
1200  * page_lru_count array after the loop finishes. This ensures that
1201  * the next execution of SlruRecentlyUsed will mark the page newly
1202  * used, even if it's for a page that has the current counter value.
1203  * That gets us back on the path to having good data when there are
1204  * multiple pages with the same lru_count.
1205  */
1206  cur_count = (shared->bank_cur_lru_count[bankno])++;
1207  for (int slotno = bankstart; slotno < bankend; slotno++)
1208  {
1209  int this_delta;
1210  int64 this_page_number;
1211 
1212  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1213  return slotno;
1214 
1215  this_delta = cur_count - shared->page_lru_count[slotno];
1216  if (this_delta < 0)
1217  {
1218  /*
1219  * Clean up in case shared updates have caused cur_count
1220  * increments to get "lost". We back off the page counts,
1221  * rather than trying to increase cur_count, to avoid any
1222  * question of infinite loops or failure in the presence of
1223  * wrapped-around counts.
1224  */
1225  shared->page_lru_count[slotno] = cur_count;
1226  this_delta = 0;
1227  }
1228 
1229  /*
1230  * If this page is the one most recently zeroed, don't consider it
1231  * an eviction candidate. See comments in SimpleLruZeroPage for an
1232  * explanation about the lack of a memory barrier here.
1233  */
1234  this_page_number = shared->page_number[slotno];
1235  if (this_page_number ==
1237  continue;
1238 
1239  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1240  {
1241  if (this_delta > best_valid_delta ||
1242  (this_delta == best_valid_delta &&
1243  ctl->PagePrecedes(this_page_number,
1244  best_valid_page_number)))
1245  {
1246  bestvalidslot = slotno;
1247  best_valid_delta = this_delta;
1248  best_valid_page_number = this_page_number;
1249  }
1250  }
1251  else
1252  {
1253  if (this_delta > best_invalid_delta ||
1254  (this_delta == best_invalid_delta &&
1255  ctl->PagePrecedes(this_page_number,
1256  best_invalid_page_number)))
1257  {
1258  bestinvalidslot = slotno;
1259  best_invalid_delta = this_delta;
1260  best_invalid_page_number = this_page_number;
1261  }
1262  }
1263  }
1264 
1265  /*
1266  * If all pages (except possibly the latest one) are I/O busy, we'll
1267  * have to wait for an I/O to complete and then retry. In that
1268  * unhappy case, we choose to wait for the I/O on the least recently
1269  * used slot, on the assumption that it was likely initiated first of
1270  * all the I/Os in progress and may therefore finish first.
1271  */
1272  if (best_valid_delta < 0)
1273  {
1274  SimpleLruWaitIO(ctl, bestinvalidslot);
1275  continue;
1276  }
1277 
1278  /*
1279  * If the selected page is clean, we're set.
1280  */
1281  if (!shared->page_dirty[bestvalidslot])
1282  return bestvalidslot;
1283 
1284  /*
1285  * Write the page.
1286  */
1287  SlruInternalWritePage(ctl, bestvalidslot, NULL);
1288 
1289  /*
1290  * Now loop back and try again. This is the easiest way of dealing
1291  * with corner cases such as the victim page being re-dirtied while we
1292  * wrote it.
1293  */
1294  }
1295 }
1296 
1297 /*
1298  * Write dirty pages to disk during checkpoint or database shutdown. Flushing
1299  * is deferred until the next call to ProcessSyncRequests(), though we do fsync
1300  * the containing directory here to make sure that newly created directory
1301  * entries are on disk.
1302  */
1303 void
1304 SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
1305 {
1306  SlruShared shared = ctl->shared;
1307  SlruWriteAllData fdata;
1308  int64 pageno = 0;
1309  int prevbank = SlotGetBankNumber(0);
1310  bool ok;
1311 
1312  /* update the stats counter of flushes */
1314 
1315  /*
1316  * Find and write dirty pages
1317  */
1318  fdata.num_files = 0;
1319 
1320  LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1321 
1322  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1323  {
1324  int curbank = SlotGetBankNumber(slotno);
1325 
1326  /*
1327  * If the current bank lock is not same as the previous bank lock then
1328  * release the previous lock and acquire the new lock.
1329  */
1330  if (curbank != prevbank)
1331  {
1332  LWLockRelease(&shared->bank_locks[prevbank].lock);
1333  LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1334  prevbank = curbank;
1335  }
1336 
1337  /* Do nothing if slot is unused */
1338  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1339  continue;
1340 
1341  SlruInternalWritePage(ctl, slotno, &fdata);
1342 
1343  /*
1344  * In some places (e.g. checkpoints), we cannot assert that the slot
1345  * is clean now, since another process might have re-dirtied it
1346  * already. That's okay.
1347  */
1348  Assert(allow_redirtied ||
1349  shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1350  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1351  !shared->page_dirty[slotno]));
1352  }
1353 
1354  LWLockRelease(&shared->bank_locks[prevbank].lock);
1355 
1356  /*
1357  * Now close any files that were open
1358  */
1359  ok = true;
1360  for (int i = 0; i < fdata.num_files; i++)
1361  {
1362  if (CloseTransientFile(fdata.fd[i]) != 0)
1363  {
1365  slru_errno = errno;
1366  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1367  ok = false;
1368  }
1369  }
1370  if (!ok)
1372 
1373  /* Ensure that directory entries for new files are on disk. */
1374  if (ctl->sync_handler != SYNC_HANDLER_NONE)
1375  fsync_fname(ctl->Dir, true);
1376 }
1377 
1378 /*
1379  * Remove all segments before the one holding the passed page number
1380  *
1381  * All SLRUs prevent concurrent calls to this function, either with an LWLock
1382  * or by calling it only as part of a checkpoint. Mutual exclusion must begin
1383  * before computing cutoffPage. Mutual exclusion must end after any limit
1384  * update that would permit other backends to write fresh data into the
1385  * segment immediately preceding the one containing cutoffPage. Otherwise,
1386  * when the SLRU is quite full, SimpleLruTruncate() might delete that segment
1387  * after it has accrued freshly-written data.
1388  */
1389 void
1390 SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
1391 {
1392  SlruShared shared = ctl->shared;
1393  int prevbank;
1394 
1395  /* update the stats counter of truncates */
1397 
1398  /*
1399  * Scan shared memory and remove any pages preceding the cutoff page, to
1400  * ensure we won't rewrite them later. (Since this is normally called in
1401  * or just after a checkpoint, any dirty pages should have been flushed
1402  * already ... we're just being extra careful here.)
1403  */
1404 restart:
1405 
1406  /*
1407  * An important safety check: the current endpoint page must not be
1408  * eligible for removal. This check is just a backstop against wraparound
1409  * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1410  * outdated value; therefore we don't add a memory barrier.
1411  */
1412  if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
1413  cutoffPage))
1414  {
1415  ereport(LOG,
1416  (errmsg("could not truncate directory \"%s\": apparent wraparound",
1417  ctl->Dir)));
1418  return;
1419  }
1420 
1421  prevbank = SlotGetBankNumber(0);
1422  LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1423  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1424  {
1425  int curbank = SlotGetBankNumber(slotno);
1426 
1427  /*
1428  * If the current bank lock is not same as the previous bank lock then
1429  * release the previous lock and acquire the new lock.
1430  */
1431  if (curbank != prevbank)
1432  {
1433  LWLockRelease(&shared->bank_locks[prevbank].lock);
1434  LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1435  prevbank = curbank;
1436  }
1437 
1438  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1439  continue;
1440  if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1441  continue;
1442 
1443  /*
1444  * If page is clean, just change state to EMPTY (expected case).
1445  */
1446  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1447  !shared->page_dirty[slotno])
1448  {
1449  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1450  continue;
1451  }
1452 
1453  /*
1454  * Hmm, we have (or may have) I/O operations acting on the page, so
1455  * we've got to wait for them to finish and then start again. This is
1456  * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1457  * wouldn't it be OK to just discard it without writing it?
1458  * SlruMayDeleteSegment() uses a stricter qualification, so we might
1459  * not delete this page in the end; even if we don't delete it, we
1460  * won't have cause to read its data again. For now, keep the logic
1461  * the same as it was.)
1462  */
1463  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1464  SlruInternalWritePage(ctl, slotno, NULL);
1465  else
1466  SimpleLruWaitIO(ctl, slotno);
1467 
1468  LWLockRelease(&shared->bank_locks[prevbank].lock);
1469  goto restart;
1470  }
1471 
1472  LWLockRelease(&shared->bank_locks[prevbank].lock);
1473 
1474  /* Now we can remove the old segment(s) */
1475  (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1476 }
1477 
1478 /*
1479  * Delete an individual SLRU segment.
1480  *
1481  * NB: This does not touch the SLRU buffers themselves, callers have to ensure
1482  * they either can't yet contain anything, or have already been cleaned out.
1483  */
1484 static void
1486 {
1487  char path[MAXPGPATH];
1488 
1489  /* Forget any fsync requests queued for this segment. */
1490  if (ctl->sync_handler != SYNC_HANDLER_NONE)
1491  {
1492  FileTag tag;
1493 
1494  INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1496  }
1497 
1498  /* Unlink the file. */
1499  SlruFileName(ctl, path, segno);
1500  ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1501  unlink(path);
1502 }
1503 
1504 /*
1505  * Delete an individual SLRU segment, identified by the segment number.
1506  */
1507 void
1509 {
1510  SlruShared shared = ctl->shared;
1511  int prevbank = SlotGetBankNumber(0);
1512  bool did_write;
1513 
1514  /* Clean out any possibly existing references to the segment. */
1515  LWLockAcquire(&shared->bank_locks[prevbank].lock, LW_EXCLUSIVE);
1516 restart:
1517  did_write = false;
1518  for (int slotno = 0; slotno < shared->num_slots; slotno++)
1519  {
1520  int pagesegno;
1521  int curbank = SlotGetBankNumber(slotno);
1522 
1523  /*
1524  * If the current bank lock is not same as the previous bank lock then
1525  * release the previous lock and acquire the new lock.
1526  */
1527  if (curbank != prevbank)
1528  {
1529  LWLockRelease(&shared->bank_locks[prevbank].lock);
1530  LWLockAcquire(&shared->bank_locks[curbank].lock, LW_EXCLUSIVE);
1531  prevbank = curbank;
1532  }
1533 
1534  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1535  continue;
1536 
1537  pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1538  /* not the segment we're looking for */
1539  if (pagesegno != segno)
1540  continue;
1541 
1542  /* If page is clean, just change state to EMPTY (expected case). */
1543  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1544  !shared->page_dirty[slotno])
1545  {
1546  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1547  continue;
1548  }
1549 
1550  /* Same logic as SimpleLruTruncate() */
1551  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1552  SlruInternalWritePage(ctl, slotno, NULL);
1553  else
1554  SimpleLruWaitIO(ctl, slotno);
1555 
1556  did_write = true;
1557  }
1558 
1559  /*
1560  * Be extra careful and re-check. The IO functions release the control
1561  * lock, so new pages could have been read in.
1562  */
1563  if (did_write)
1564  goto restart;
1565 
1567 
1568  LWLockRelease(&shared->bank_locks[prevbank].lock);
1569 }
1570 
1571 /*
1572  * Determine whether a segment is okay to delete.
1573  *
1574  * segpage is the first page of the segment, and cutoffPage is the oldest (in
1575  * PagePrecedes order) page in the SLRU containing still-useful data. Since
1576  * every core PagePrecedes callback implements "wrap around", check the
1577  * segment's first and last pages:
1578  *
1579  * first<cutoff && last<cutoff: yes
1580  * first<cutoff && last>=cutoff: no; cutoff falls inside this segment
1581  * first>=cutoff && last<cutoff: no; wrap point falls inside this segment
1582  * first>=cutoff && last>=cutoff: no; every page of this segment is too young
1583  */
1584 static bool
1585 SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage)
1586 {
1587  int64 seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
1588 
1589  Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
1590 
1591  return (ctl->PagePrecedes(segpage, cutoffPage) &&
1592  ctl->PagePrecedes(seg_last_page, cutoffPage));
1593 }
1594 
1595 #ifdef USE_ASSERT_CHECKING
1596 static void
1597 SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
1598 {
1599  TransactionId lhs,
1600  rhs;
1601  int64 newestPage,
1602  oldestPage;
1603  TransactionId newestXact,
1604  oldestXact;
1605 
1606  /*
1607  * Compare an XID pair having undefined order (see RFC 1982), a pair at
1608  * "opposite ends" of the XID space. TransactionIdPrecedes() treats each
1609  * as preceding the other. If RHS is oldestXact, LHS is the first XID we
1610  * must not assign.
1611  */
1612  lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */
1613  rhs = lhs + (1U << 31);
1614  Assert(TransactionIdPrecedes(lhs, rhs));
1615  Assert(TransactionIdPrecedes(rhs, lhs));
1616  Assert(!TransactionIdPrecedes(lhs - 1, rhs));
1617  Assert(TransactionIdPrecedes(rhs, lhs - 1));
1618  Assert(TransactionIdPrecedes(lhs + 1, rhs));
1619  Assert(!TransactionIdPrecedes(rhs, lhs + 1));
1622  Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page));
1623  Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page));
1624  Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page));
1625  Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
1626  Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
1627  Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
1628  Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
1629  || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */
1630  Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
1631  || (1U << 31) % per_page != 0);
1632  Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
1633  Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
1634  Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
1635 
1636  /*
1637  * GetNewTransactionId() has assigned the last XID it can safely use, and
1638  * that XID is in the *LAST* page of the second segment. We must not
1639  * delete that segment.
1640  */
1641  newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1;
1642  newestXact = newestPage * per_page + offset;
1643  Assert(newestXact / per_page == newestPage);
1644  oldestXact = newestXact + 1;
1645  oldestXact -= 1U << 31;
1646  oldestPage = oldestXact / per_page;
1648  (newestPage -
1649  newestPage % SLRU_PAGES_PER_SEGMENT),
1650  oldestPage));
1651 
1652  /*
1653  * GetNewTransactionId() has assigned the last XID it can safely use, and
1654  * that XID is in the *FIRST* page of the second segment. We must not
1655  * delete that segment.
1656  */
1657  newestPage = SLRU_PAGES_PER_SEGMENT;
1658  newestXact = newestPage * per_page + offset;
1659  Assert(newestXact / per_page == newestPage);
1660  oldestXact = newestXact + 1;
1661  oldestXact -= 1U << 31;
1662  oldestPage = oldestXact / per_page;
1664  (newestPage -
1665  newestPage % SLRU_PAGES_PER_SEGMENT),
1666  oldestPage));
1667 }
1668 
1669 /*
1670  * Unit-test a PagePrecedes function.
1671  *
1672  * This assumes every uint32 >= FirstNormalTransactionId is a valid key. It
1673  * assumes each value occupies a contiguous, fixed-size region of SLRU bytes.
1674  * (MultiXactMemberCtl separates flags from XIDs. NotifyCtl has
1675  * variable-length entries, no keys, and no random access. These unit tests
1676  * do not apply to them.)
1677  */
1678 void
1679 SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page)
1680 {
1681  /* Test first, middle and last entries of a page. */
1682  SlruPagePrecedesTestOffset(ctl, per_page, 0);
1683  SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2);
1684  SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1);
1685 }
1686 #endif
1687 
1688 /*
1689  * SlruScanDirectory callback
1690  * This callback reports true if there's any segment wholly prior to the
1691  * one containing the page passed as "data".
1692  */
1693 bool
1695  void *data)
1696 {
1697  int64 cutoffPage = *(int64 *) data;
1698 
1699  if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1700  return true; /* found one; don't iterate any more */
1701 
1702  return false; /* keep going */
1703 }
1704 
1705 /*
1706  * SlruScanDirectory callback.
1707  * This callback deletes segments prior to the one passed in as "data".
1708  */
1709 static bool
1711  void *data)
1712 {
1713  int64 cutoffPage = *(int64 *) data;
1714 
1715  if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
1717 
1718  return false; /* keep going */
1719 }
1720 
1721 /*
1722  * SlruScanDirectory callback.
1723  * This callback deletes all segments.
1724  */
1725 bool
1726 SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data)
1727 {
1729 
1730  return false; /* keep going */
1731 }
1732 
1733 /*
1734  * An internal function used by SlruScanDirectory().
1735  *
1736  * Returns true if a file with a name of a given length may be a correct
1737  * SLRU segment.
1738  */
1739 static inline bool
1741 {
1742  if (ctl->long_segment_names)
1743  return (len == 15); /* see SlruFileName() */
1744  else
1745 
1746  /*
1747  * Commit 638cf09e76d allowed 5-character lengths. Later commit
1748  * 73c986adde5 allowed 6-character length.
1749  *
1750  * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1751  * numbers, and the corresponding 15-character file names, which may
1752  * eventually deprecate the support for 4, 5, and 6-character names.
1753  */
1754  return (len == 4 || len == 5 || len == 6);
1755 }
1756 
1757 /*
1758  * Scan the SimpleLru directory and apply a callback to each file found in it.
1759  *
1760  * If the callback returns true, the scan is stopped. The last return value
1761  * from the callback is returned.
1762  *
1763  * The callback receives the following arguments: 1. the SlruCtl struct for the
1764  * slru being truncated; 2. the filename being considered; 3. the page number
1765  * for the first page of that file; 4. a pointer to the opaque data given to us
1766  * by the caller.
1767  *
1768  * Note that the ordering in which the directory is scanned is not guaranteed.
1769  *
1770  * Note that no locking is applied.
1771  */
1772 bool
1774 {
1775  bool retval = false;
1776  DIR *cldir;
1777  struct dirent *clde;
1778  int64 segno;
1779  int64 segpage;
1780 
1781  cldir = AllocateDir(ctl->Dir);
1782  while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1783  {
1784  size_t len;
1785 
1786  len = strlen(clde->d_name);
1787 
1789  strspn(clde->d_name, "0123456789ABCDEF") == len)
1790  {
1791  segno = strtoi64(clde->d_name, NULL, 16);
1792  segpage = segno * SLRU_PAGES_PER_SEGMENT;
1793 
1794  elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1795  ctl->Dir, clde->d_name);
1796  retval = callback(ctl, clde->d_name, segpage, data);
1797  if (retval)
1798  break;
1799  }
1800  }
1801  FreeDir(cldir);
1802 
1803  return retval;
1804 }
1805 
1806 /*
1807  * Individual SLRUs (clog, ...) have to provide a sync.c handler function so
1808  * that they can provide the correct "SlruCtl" (otherwise we don't know how to
1809  * build the path), but they just forward to this common implementation that
1810  * performs the fsync.
1811  */
1812 int
1813 SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
1814 {
1815  int fd;
1816  int save_errno;
1817  int result;
1818 
1819  SlruFileName(ctl, path, ftag->segno);
1820 
1821  fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
1822  if (fd < 0)
1823  return -1;
1824 
1825  pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
1826  result = pg_fsync(fd);
1828  save_errno = errno;
1829 
1831 
1832  errno = save_errno;
1833  return result;
1834 }
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:480
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:448
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:462
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:1004
#define MAXALIGN(LEN)
Definition: c.h:811
#define Max(x, y)
Definition: c.h:998
#define strtoi64(str, endptr, base)
Definition: c.h:1297
#define BUFFERALIGN(LEN)
Definition: c.h:813
#define Assert(condition)
Definition: c.h:858
#define PG_BINARY
Definition: c.h:1273
#define MemSet(start, val, len)
Definition: c.h:1020
uint32 TransactionId
Definition: c.h:652
size_t Size
Definition: c.h:605
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errcode_for_file_access(void)
Definition: elog.c:880
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2909
int FreeDir(DIR *dir)
Definition: fd.c:2961
int CloseTransientFile(int fd)
Definition: fd.c:2809
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:756
int data_sync_elevel(int elevel)
Definition: fd.c:3936
int pg_fsync(int fd)
Definition: fd.c:386
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2633
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
int NBuffers
Definition: globals.c:139
bool IsUnderPostmaster
Definition: globals.c:117
#define newval
#define GUC_check_errdetail
Definition: guc.h:447
int i
Definition: isn.c:73
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1895
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1170
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1939
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1783
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:709
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1341
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define MAXPGPATH
const void size_t len
const void * data
static char * filename
Definition: pg_dumpall.c:119
static XLogRecPtr endpos
Definition: pg_receivewal.c:56
void pgstat_count_slru_page_exists(int slru_idx)
Definition: pgstat_slru.c:71
void pgstat_count_slru_page_read(int slru_idx)
Definition: pgstat_slru.c:77
int pgstat_get_slru_index(const char *name)
Definition: pgstat_slru.c:132
void pgstat_count_slru_page_hit(int slru_idx)
Definition: pgstat_slru.c:65
void pgstat_count_slru_page_zeroed(int slru_idx)
Definition: pgstat_slru.c:59
void pgstat_count_slru_truncate(int slru_idx)
Definition: pgstat_slru.c:95
void pgstat_count_slru_page_written(int slru_idx)
Definition: pgstat_slru.c:83
void pgstat_count_slru_flush(int slru_idx)
Definition: pgstat_slru.c:89
#define pg_pwrite
Definition: port.h:226
#define pg_pread
Definition: port.h:225
#define snprintf
Definition: port.h:238
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
static int fd(const char *x, int i)
Definition: preproc-init.c:105
tree ctl
Definition: radixtree.h:1851
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
Definition: slru.c:237
static int SlruFileName(SlruCtl ctl, char *path, int64 segno)
Definition: slru.c:76
static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
Definition: slru.c:786
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:590
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
Definition: slru.c:142
void SimpleLruWritePage(SlruCtl ctl, int slotno)
Definition: slru.c:714
void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
Definition: slru.c:1304
static bool SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage)
Definition: slru.c:1585
static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:1030
struct SlruWriteAllData SlruWriteAllData
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:413
#define SLRU_BANK_SIZE
Definition: slru.c:129
int SimpleLruAutotuneBuffers(int divisor, int max)
Definition: slru.c:217
static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
Definition: slru.c:858
static bool SlruCorrectSegmentFilenameLength(SlruCtl ctl, size_t len)
Definition: slru.c:1740
static SlruErrorCause slru_errcause
Definition: slru.c:160
#define MAX_WRITEALL_BUFFERS
Definition: slru.c:109
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:430
static int slru_errno
Definition: slru.c:161
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
Definition: slru.c:728
void SlruDeleteSegment(SlruCtl ctl, int64 segno)
Definition: slru.c:1508
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
Definition: slru.c:637
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1773
bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1726
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition: slru.c:487
int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
Definition: slru.c:1813
static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:1151
#define SlotGetBankNumber(slotno)
Definition: slru.c:134
int SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:360
void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
Definition: slru.c:1390
static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno)
Definition: slru.c:1485
struct SlruWriteAllData * SlruWriteAll
Definition: slru.c:118
SlruErrorCause
Definition: slru.c:151
@ SLRU_WRITE_FAILED
Definition: slru.c:155
@ SLRU_FSYNC_FAILED
Definition: slru.c:156
@ SLRU_SEEK_FAILED
Definition: slru.c:153
@ SLRU_OPEN_FAILED
Definition: slru.c:152
@ SLRU_CLOSE_FAILED
Definition: slru.c:157
@ SLRU_READ_FAILED
Definition: slru.c:154
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:184
bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1694
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.c:1710
static void SlruRecentlyUsed(SlruShared shared, int slotno)
Definition: slru.c:1105
bool check_slru_buffers(const char *name, int *newval)
Definition: slru.c:340
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition: slru.h:179
SlruSharedData * SlruShared
Definition: slru.h:121
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition: slru.h:203
bool(* SlruScanCallback)(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: slru.h:208
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:39
#define SLRU_MAX_ALLOWED_BUFFERS
Definition: slru.h:24
SlruPageStatus
Definition: slru.h:48
@ SLRU_PAGE_VALID
Definition: slru.h:51
@ SLRU_PAGE_WRITE_IN_PROGRESS
Definition: slru.h:52
@ SLRU_PAGE_READ_IN_PROGRESS
Definition: slru.h:50
@ SLRU_PAGE_EMPTY
Definition: slru.h:49
int ckpt_bufs_written
Definition: xlog.h:165
Definition: dirent.c:26
Definition: sync.h:51
uint64 segno
Definition: sync.h:55
Definition: lwlock.h:42
int slru_stats_idx
Definition: slru.h:118
int64 * page_number
Definition: slru.h:73
int num_slots
Definition: slru.h:64
LWLockPadded * bank_locks
Definition: slru.h:80
int * page_lru_count
Definition: slru.h:74
pg_atomic_uint64 latest_page_number
Definition: slru.h:115
XLogRecPtr * group_lsn
Definition: slru.h:107
int * bank_cur_lru_count
Definition: slru.h:97
int lsn_groups_per_page
Definition: slru.h:108
SlruPageStatus * page_status
Definition: slru.h:71
bool * page_dirty
Definition: slru.h:72
LWLockPadded * buffer_locks
Definition: slru.h:77
char ** page_buffer
Definition: slru.h:70
int num_files
Definition: slru.c:113
int fd[MAX_WRITEALL_BUFFERS]
Definition: slru.c:114
int64 segno[MAX_WRITEALL_BUFFERS]
Definition: slru.c:115
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition: sync.c:580
SyncRequestHandler
Definition: sync.h:36
@ SYNC_HANDLER_NONE
Definition: sync.h:42
@ SYNC_FORGET_REQUEST
Definition: sync.h:27
@ SYNC_REQUEST
Definition: sync.h:25
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
#define InvalidTransactionId
Definition: transam.h:31
LWLock lock
Definition: lwlock.h:70
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:82
static void pgstat_report_wait_end(void)
Definition: wait_event.h:98
const char * name
CheckpointStatsData CheckpointStats
Definition: xlog.c:209
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2791
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
bool InRecovery
Definition: xlogutils.c:50