PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
slru.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * slru.c
4  * Simple LRU buffering for transaction status logfiles
5  *
6  * We use a simple least-recently-used scheme to manage a pool of page
7  * buffers. Under ordinary circumstances we expect that write
8  * traffic will occur mostly to the latest page (and to the just-prior
9  * page, soon after a page transition). Read traffic will probably touch
10  * a larger span of pages, but in any case a fairly small number of page
11  * buffers should be sufficient. So, we just search the buffers using plain
12  * linear search; there's no need for a hashtable or anything fancy.
13  * The management algorithm is straight LRU except that we will never swap
14  * out the latest page (since we know it's going to be hit again eventually).
15  *
16  * We use a control LWLock to protect the shared data structures, plus
17  * per-buffer LWLocks that synchronize I/O for each buffer. The control lock
18  * must be held to examine or modify any shared state. A process that is
19  * reading in or writing out a page buffer does not hold the control lock,
20  * only the per-buffer lock for the buffer it is working on.
21  *
22  * "Holding the control lock" means exclusive lock in all cases except for
23  * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
24  * the implications of that.
25  *
26  * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
27  * before releasing the control lock. The per-buffer lock is released after
28  * completing the I/O, re-acquiring the control lock, and updating the shared
29  * state. (Deadlock is not possible here, because we never try to initiate
30  * I/O when someone else is already doing I/O on the same buffer.)
31  * To wait for I/O to complete, release the control lock, acquire the
32  * per-buffer lock in shared mode, immediately release the per-buffer lock,
33  * reacquire the control lock, and then recheck state (since arbitrary things
34  * could have happened while we didn't have the lock).
35  *
36  * As with the regular buffer manager, it is possible for another process
37  * to re-dirty a page that is currently being written out. This is handled
38  * by re-setting the page's page_dirty flag.
39  *
40  *
41  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
42  * Portions Copyright (c) 1994, Regents of the University of California
43  *
44  * src/backend/access/transam/slru.c
45  *
46  *-------------------------------------------------------------------------
47  */
48 #include "postgres.h"
49 
50 #include <fcntl.h>
51 #include <sys/stat.h>
52 #include <unistd.h>
53 
54 #include "access/slru.h"
55 #include "access/transam.h"
56 #include "access/xlog.h"
57 #include "storage/fd.h"
58 #include "storage/shmem.h"
59 #include "miscadmin.h"
60 
61 
62 #define SlruFileName(ctl, path, seg) \
63  snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
64 
65 /*
66  * During SimpleLruFlush(), we will usually not need to write/fsync more
67  * than one or two physical files, but we may need to write several pages
68  * per file. We can consolidate the I/O requests by leaving files open
69  * until control returns to SimpleLruFlush(). This data structure remembers
70  * which files are open.
71  */
72 #define MAX_FLUSH_BUFFERS 16
73 
74 typedef struct SlruFlushData
75 {
76  int num_files; /* # files actually open */
77  int fd[MAX_FLUSH_BUFFERS]; /* their FD's */
78  int segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */
80 
81 typedef struct SlruFlushData *SlruFlush;
82 
83 /*
84  * Macro to mark a buffer slot "most recently used". Note multiple evaluation
85  * of arguments!
86  *
87  * The reason for the if-test is that there are often many consecutive
88  * accesses to the same page (particularly the latest page). By suppressing
89  * useless increments of cur_lru_count, we reduce the probability that old
90  * pages' counts will "wrap around" and make them appear recently used.
91  *
92  * We allow this code to be executed concurrently by multiple processes within
93  * SimpleLruReadPage_ReadOnly(). As long as int reads and writes are atomic,
94  * this should not cause any completely-bogus values to enter the computation.
95  * However, it is possible for either cur_lru_count or individual
96  * page_lru_count entries to be "reset" to lower values than they should have,
97  * in case a process is delayed while it executes this macro. With care in
98  * SlruSelectLRUPage(), this does little harm, and in any case the absolute
99  * worst possible consequence is a nonoptimal choice of page to evict. The
100  * gain from allowing concurrent reads of SLRU pages seems worth it.
101  */
102 #define SlruRecentlyUsed(shared, slotno) \
103  do { \
104  int new_lru_count = (shared)->cur_lru_count; \
105  if (new_lru_count != (shared)->page_lru_count[slotno]) { \
106  (shared)->cur_lru_count = ++new_lru_count; \
107  (shared)->page_lru_count[slotno] = new_lru_count; \
108  } \
109  } while (0)
110 
111 /* Saved info for SlruReportIOError */
112 typedef enum
113 {
121 
123 static int slru_errno;
124 
125 
126 static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
127 static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
128 static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata);
129 static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
130 static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
131  SlruFlush fdata);
132 static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
133 static int SlruSelectLRUPage(SlruCtl ctl, int pageno);
134 
135 static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
136  int segpage, void *data);
137 static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename);
138 
139 /*
140  * Initialization of shared memory
141  */
142 
143 Size
144 SimpleLruShmemSize(int nslots, int nlsns)
145 {
146  Size sz;
147 
148  /* we assume nslots isn't so large as to risk overflow */
149  sz = MAXALIGN(sizeof(SlruSharedData));
150  sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
151  sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
152  sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
153  sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
154  sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
155  sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
156 
157  if (nlsns > 0)
158  sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
159 
160  return BUFFERALIGN(sz) + BLCKSZ * nslots;
161 }
162 
163 void
164 SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
165  LWLock *ctllock, const char *subdir, int tranche_id)
166 {
167  SlruShared shared;
168  bool found;
169 
170  shared = (SlruShared) ShmemInitStruct(name,
171  SimpleLruShmemSize(nslots, nlsns),
172  &found);
173 
174  if (!IsUnderPostmaster)
175  {
176  /* Initialize locks and shared memory area */
177  char *ptr;
178  Size offset;
179  int slotno;
180 
181  Assert(!found);
182 
183  memset(shared, 0, sizeof(SlruSharedData));
184 
185  shared->ControlLock = ctllock;
186 
187  shared->num_slots = nslots;
188  shared->lsn_groups_per_page = nlsns;
189 
190  shared->cur_lru_count = 0;
191 
192  /* shared->latest_page_number will be set later */
193 
194  ptr = (char *) shared;
195  offset = MAXALIGN(sizeof(SlruSharedData));
196  shared->page_buffer = (char **) (ptr + offset);
197  offset += MAXALIGN(nslots * sizeof(char *));
198  shared->page_status = (SlruPageStatus *) (ptr + offset);
199  offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
200  shared->page_dirty = (bool *) (ptr + offset);
201  offset += MAXALIGN(nslots * sizeof(bool));
202  shared->page_number = (int *) (ptr + offset);
203  offset += MAXALIGN(nslots * sizeof(int));
204  shared->page_lru_count = (int *) (ptr + offset);
205  offset += MAXALIGN(nslots * sizeof(int));
206 
207  if (nlsns > 0)
208  {
209  shared->group_lsn = (XLogRecPtr *) (ptr + offset);
210  offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
211  }
212 
213  /* Initialize LWLocks */
214  shared->buffer_locks = (LWLockPadded *) ShmemAlloc(sizeof(LWLockPadded) * nslots);
215 
216  Assert(strlen(name) + 1 < SLRU_MAX_NAME_LENGTH);
218  shared->lwlock_tranche_id = tranche_id;
219 
220  ptr += BUFFERALIGN(offset);
221  for (slotno = 0; slotno < nslots; slotno++)
222  {
223  LWLockInitialize(&shared->buffer_locks[slotno].lock,
224  shared->lwlock_tranche_id);
225 
226  shared->page_buffer[slotno] = ptr;
227  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
228  shared->page_dirty[slotno] = false;
229  shared->page_lru_count[slotno] = 0;
230  ptr += BLCKSZ;
231  }
232  }
233  else
234  Assert(found);
235 
236  /* Register SLRU tranche in the main tranches array */
238  shared->lwlock_tranche_name);
239 
240  /*
241  * Initialize the unshared control struct, including directory path. We
242  * assume caller set PagePrecedes.
243  */
244  ctl->shared = shared;
245  ctl->do_fsync = true; /* default behavior */
246  StrNCpy(ctl->Dir, subdir, sizeof(ctl->Dir));
247 }
248 
249 /*
250  * Initialize (or reinitialize) a page to zeroes.
251  *
252  * The page is not actually written, just set up in shared memory.
253  * The slot number of the new page is returned.
254  *
255  * Control lock must be held at entry, and will be held at exit.
256  */
257 int
258 SimpleLruZeroPage(SlruCtl ctl, int pageno)
259 {
260  SlruShared shared = ctl->shared;
261  int slotno;
262 
263  /* Find a suitable buffer slot for the page */
264  slotno = SlruSelectLRUPage(ctl, pageno);
265  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
266  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
267  !shared->page_dirty[slotno]) ||
268  shared->page_number[slotno] == pageno);
269 
270  /* Mark the slot as containing this page */
271  shared->page_number[slotno] = pageno;
272  shared->page_status[slotno] = SLRU_PAGE_VALID;
273  shared->page_dirty[slotno] = true;
274  SlruRecentlyUsed(shared, slotno);
275 
276  /* Set the buffer to zeroes */
277  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
278 
279  /* Set the LSNs for this new page to zero */
280  SimpleLruZeroLSNs(ctl, slotno);
281 
282  /* Assume this page is now the latest active page */
283  shared->latest_page_number = pageno;
284 
285  return slotno;
286 }
287 
288 /*
289  * Zero all the LSNs we store for this slru page.
290  *
291  * This should be called each time we create a new page, and each time we read
292  * in a page from disk into an existing buffer. (Such an old page cannot
293  * have any interesting LSNs, since we'd have flushed them before writing
294  * the page in the first place.)
295  *
296  * This assumes that InvalidXLogRecPtr is bitwise-all-0.
297  */
298 static void
299 SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
300 {
301  SlruShared shared = ctl->shared;
302 
303  if (shared->lsn_groups_per_page > 0)
304  MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
305  shared->lsn_groups_per_page * sizeof(XLogRecPtr));
306 }
307 
308 /*
309  * Wait for any active I/O on a page slot to finish. (This does not
310  * guarantee that new I/O hasn't been started before we return, though.
311  * In fact the slot might not even contain the same page anymore.)
312  *
313  * Control lock must be held at entry, and will be held at exit.
314  */
315 static void
316 SimpleLruWaitIO(SlruCtl ctl, int slotno)
317 {
318  SlruShared shared = ctl->shared;
319 
320  /* See notes at top of file */
321  LWLockRelease(shared->ControlLock);
322  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
323  LWLockRelease(&shared->buffer_locks[slotno].lock);
325 
326  /*
327  * If the slot is still in an io-in-progress state, then either someone
328  * already started a new I/O on the slot, or a previous I/O failed and
329  * neglected to reset the page state. That shouldn't happen, really, but
330  * it seems worth a few extra cycles to check and recover from it. We can
331  * cheaply test for failure by seeing if the buffer lock is still held (we
332  * assume that transaction abort would release the lock).
333  */
334  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
335  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
336  {
337  if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
338  {
339  /* indeed, the I/O must have failed */
340  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
341  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
342  else /* write_in_progress */
343  {
344  shared->page_status[slotno] = SLRU_PAGE_VALID;
345  shared->page_dirty[slotno] = true;
346  }
347  LWLockRelease(&shared->buffer_locks[slotno].lock);
348  }
349  }
350 }
351 
352 /*
353  * Find a page in a shared buffer, reading it in if necessary.
354  * The page number must correspond to an already-initialized page.
355  *
356  * If write_ok is true then it is OK to return a page that is in
357  * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
358  * that modification of the page is safe. If write_ok is false then we
359  * will not return the page until it is not undergoing active I/O.
360  *
361  * The passed-in xid is used only for error reporting, and may be
362  * InvalidTransactionId if no specific xid is associated with the action.
363  *
364  * Return value is the shared-buffer slot number now holding the page.
365  * The buffer's LRU access info is updated.
366  *
367  * Control lock must be held at entry, and will be held at exit.
368  */
369 int
370 SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
371  TransactionId xid)
372 {
373  SlruShared shared = ctl->shared;
374 
375  /* Outer loop handles restart if we must wait for someone else's I/O */
376  for (;;)
377  {
378  int slotno;
379  bool ok;
380 
381  /* See if page already is in memory; if not, pick victim slot */
382  slotno = SlruSelectLRUPage(ctl, pageno);
383 
384  /* Did we find the page in memory? */
385  if (shared->page_number[slotno] == pageno &&
386  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
387  {
388  /*
389  * If page is still being read in, we must wait for I/O. Likewise
390  * if the page is being written and the caller said that's not OK.
391  */
392  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
393  (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
394  !write_ok))
395  {
396  SimpleLruWaitIO(ctl, slotno);
397  /* Now we must recheck state from the top */
398  continue;
399  }
400  /* Otherwise, it's ready to use */
401  SlruRecentlyUsed(shared, slotno);
402  return slotno;
403  }
404 
405  /* We found no match; assert we selected a freeable slot */
406  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
407  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
408  !shared->page_dirty[slotno]));
409 
410  /* Mark the slot read-busy */
411  shared->page_number[slotno] = pageno;
412  shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
413  shared->page_dirty[slotno] = false;
414 
415  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
416  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
417 
418  /* Release control lock while doing I/O */
419  LWLockRelease(shared->ControlLock);
420 
421  /* Do the read */
422  ok = SlruPhysicalReadPage(ctl, pageno, slotno);
423 
424  /* Set the LSNs for this newly read-in page to zero */
425  SimpleLruZeroLSNs(ctl, slotno);
426 
427  /* Re-acquire control lock and update page state */
429 
430  Assert(shared->page_number[slotno] == pageno &&
431  shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
432  !shared->page_dirty[slotno]);
433 
434  shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
435 
436  LWLockRelease(&shared->buffer_locks[slotno].lock);
437 
438  /* Now it's okay to ereport if we failed */
439  if (!ok)
440  SlruReportIOError(ctl, pageno, xid);
441 
442  SlruRecentlyUsed(shared, slotno);
443  return slotno;
444  }
445 }
446 
447 /*
448  * Find a page in a shared buffer, reading it in if necessary.
449  * The page number must correspond to an already-initialized page.
450  * The caller must intend only read-only access to the page.
451  *
452  * The passed-in xid is used only for error reporting, and may be
453  * InvalidTransactionId if no specific xid is associated with the action.
454  *
455  * Return value is the shared-buffer slot number now holding the page.
456  * The buffer's LRU access info is updated.
457  *
458  * Control lock must NOT be held at entry, but will be held at exit.
459  * It is unspecified whether the lock will be shared or exclusive.
460  */
461 int
463 {
464  SlruShared shared = ctl->shared;
465  int slotno;
466 
467  /* Try to find the page while holding only shared lock */
469 
470  /* See if page is already in a buffer */
471  for (slotno = 0; slotno < shared->num_slots; slotno++)
472  {
473  if (shared->page_number[slotno] == pageno &&
474  shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
475  shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
476  {
477  /* See comments for SlruRecentlyUsed macro */
478  SlruRecentlyUsed(shared, slotno);
479  return slotno;
480  }
481  }
482 
483  /* No luck, so switch to normal exclusive lock and do regular read */
484  LWLockRelease(shared->ControlLock);
486 
487  return SimpleLruReadPage(ctl, pageno, true, xid);
488 }
489 
490 /*
491  * Write a page from a shared buffer, if necessary.
492  * Does nothing if the specified slot is not dirty.
493  *
494  * NOTE: only one write attempt is made here. Hence, it is possible that
495  * the page is still dirty at exit (if someone else re-dirtied it during
496  * the write). However, we *do* attempt a fresh write even if the page
497  * is already being written; this is for checkpoints.
498  *
499  * Control lock must be held at entry, and will be held at exit.
500  */
501 static void
502 SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
503 {
504  SlruShared shared = ctl->shared;
505  int pageno = shared->page_number[slotno];
506  bool ok;
507 
508  /* If a write is in progress, wait for it to finish */
509  while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
510  shared->page_number[slotno] == pageno)
511  {
512  SimpleLruWaitIO(ctl, slotno);
513  }
514 
515  /*
516  * Do nothing if page is not dirty, or if buffer no longer contains the
517  * same page we were called for.
518  */
519  if (!shared->page_dirty[slotno] ||
520  shared->page_status[slotno] != SLRU_PAGE_VALID ||
521  shared->page_number[slotno] != pageno)
522  return;
523 
524  /*
525  * Mark the slot write-busy, and clear the dirtybit. After this point, a
526  * transaction status update on this page will mark it dirty again.
527  */
528  shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
529  shared->page_dirty[slotno] = false;
530 
531  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
532  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
533 
534  /* Release control lock while doing I/O */
535  LWLockRelease(shared->ControlLock);
536 
537  /* Do the write */
538  ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
539 
540  /* If we failed, and we're in a flush, better close the files */
541  if (!ok && fdata)
542  {
543  int i;
544 
545  for (i = 0; i < fdata->num_files; i++)
546  CloseTransientFile(fdata->fd[i]);
547  }
548 
549  /* Re-acquire control lock and update page state */
551 
552  Assert(shared->page_number[slotno] == pageno &&
553  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
554 
555  /* If we failed to write, mark the page dirty again */
556  if (!ok)
557  shared->page_dirty[slotno] = true;
558 
559  shared->page_status[slotno] = SLRU_PAGE_VALID;
560 
561  LWLockRelease(&shared->buffer_locks[slotno].lock);
562 
563  /* Now it's okay to ereport if we failed */
564  if (!ok)
566 }
567 
568 /*
569  * Wrapper of SlruInternalWritePage, for external callers.
570  * fdata is always passed a NULL here.
571  */
572 void
573 SimpleLruWritePage(SlruCtl ctl, int slotno)
574 {
575  SlruInternalWritePage(ctl, slotno, NULL);
576 }
577 
578 /*
579  * Return whether the given page exists on disk.
580  *
581  * A false return means that either the file does not exist, or that it's not
582  * large enough to contain the given page.
583  */
584 bool
586 {
587  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
588  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
589  int offset = rpageno * BLCKSZ;
590  char path[MAXPGPATH];
591  int fd;
592  bool result;
593  off_t endpos;
594 
595  SlruFileName(ctl, path, segno);
596 
597  fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
598  if (fd < 0)
599  {
600  /* expected: file doesn't exist */
601  if (errno == ENOENT)
602  return false;
603 
604  /* report error normally */
606  slru_errno = errno;
607  SlruReportIOError(ctl, pageno, 0);
608  }
609 
610  if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
611  {
613  slru_errno = errno;
614  SlruReportIOError(ctl, pageno, 0);
615  }
616 
617  result = endpos >= (off_t) (offset + BLCKSZ);
618 
619  CloseTransientFile(fd);
620  return result;
621 }
622 
623 /*
624  * Physical read of a (previously existing) page into a buffer slot
625  *
626  * On failure, we cannot just ereport(ERROR) since caller has put state in
627  * shared memory that must be undone. So, we return FALSE and save enough
628  * info in static variables to let SlruReportIOError make the report.
629  *
630  * For now, assume it's not worth keeping a file pointer open across
631  * read/write operations. We could cache one virtual file pointer ...
632  */
633 static bool
634 SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
635 {
636  SlruShared shared = ctl->shared;
637  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
638  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
639  int offset = rpageno * BLCKSZ;
640  char path[MAXPGPATH];
641  int fd;
642 
643  SlruFileName(ctl, path, segno);
644 
645  /*
646  * In a crash-and-restart situation, it's possible for us to receive
647  * commands to set the commit status of transactions whose bits are in
648  * already-truncated segments of the commit log (see notes in
649  * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
650  * where the file doesn't exist, and return zeroes instead.
651  */
652  fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
653  if (fd < 0)
654  {
655  if (errno != ENOENT || !InRecovery)
656  {
658  slru_errno = errno;
659  return false;
660  }
661 
662  ereport(LOG,
663  (errmsg("file \"%s\" doesn't exist, reading as zeroes",
664  path)));
665  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
666  return true;
667  }
668 
669  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
670  {
672  slru_errno = errno;
673  CloseTransientFile(fd);
674  return false;
675  }
676 
677  errno = 0;
678  if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
679  {
681  slru_errno = errno;
682  CloseTransientFile(fd);
683  return false;
684  }
685 
686  if (CloseTransientFile(fd))
687  {
689  slru_errno = errno;
690  return false;
691  }
692 
693  return true;
694 }
695 
696 /*
697  * Physical write of a page from a buffer slot
698  *
699  * On failure, we cannot just ereport(ERROR) since caller has put state in
700  * shared memory that must be undone. So, we return FALSE and save enough
701  * info in static variables to let SlruReportIOError make the report.
702  *
703  * For now, assume it's not worth keeping a file pointer open across
704  * independent read/write operations. We do batch operations during
705  * SimpleLruFlush, though.
706  *
707  * fdata is NULL for a standalone write, pointer to open-file info during
708  * SimpleLruFlush.
709  */
710 static bool
711 SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
712 {
713  SlruShared shared = ctl->shared;
714  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
715  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
716  int offset = rpageno * BLCKSZ;
717  char path[MAXPGPATH];
718  int fd = -1;
719 
720  /*
721  * Honor the write-WAL-before-data rule, if appropriate, so that we do not
722  * write out data before associated WAL records. This is the same action
723  * performed during FlushBuffer() in the main buffer manager.
724  */
725  if (shared->group_lsn != NULL)
726  {
727  /*
728  * We must determine the largest async-commit LSN for the page. This
729  * is a bit tedious, but since this entire function is a slow path
730  * anyway, it seems better to do this here than to maintain a per-page
731  * LSN variable (which'd need an extra comparison in the
732  * transaction-commit path).
733  */
734  XLogRecPtr max_lsn;
735  int lsnindex,
736  lsnoff;
737 
738  lsnindex = slotno * shared->lsn_groups_per_page;
739  max_lsn = shared->group_lsn[lsnindex++];
740  for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
741  {
742  XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
743 
744  if (max_lsn < this_lsn)
745  max_lsn = this_lsn;
746  }
747 
748  if (!XLogRecPtrIsInvalid(max_lsn))
749  {
750  /*
751  * As noted above, elog(ERROR) is not acceptable here, so if
752  * XLogFlush were to fail, we must PANIC. This isn't much of a
753  * restriction because XLogFlush is just about all critical
754  * section anyway, but let's make sure.
755  */
757  XLogFlush(max_lsn);
759  }
760  }
761 
762  /*
763  * During a Flush, we may already have the desired file open.
764  */
765  if (fdata)
766  {
767  int i;
768 
769  for (i = 0; i < fdata->num_files; i++)
770  {
771  if (fdata->segno[i] == segno)
772  {
773  fd = fdata->fd[i];
774  break;
775  }
776  }
777  }
778 
779  if (fd < 0)
780  {
781  /*
782  * If the file doesn't already exist, we should create it. It is
783  * possible for this to need to happen when writing a page that's not
784  * first in its segment; we assume the OS can cope with that. (Note:
785  * it might seem that it'd be okay to create files only when
786  * SimpleLruZeroPage is called for the first page of a segment.
787  * However, if after a crash and restart the REDO logic elects to
788  * replay the log from a checkpoint before the latest one, then it's
789  * possible that we will get commands to set transaction status of
790  * transactions that have already been truncated from the commit log.
791  * Easiest way to deal with that is to accept references to
792  * nonexistent files here and in SlruPhysicalReadPage.)
793  *
794  * Note: it is possible for more than one backend to be executing this
795  * code simultaneously for different pages of the same file. Hence,
796  * don't use O_EXCL or O_TRUNC or anything like that.
797  */
798  SlruFileName(ctl, path, segno);
799  fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY,
800  S_IRUSR | S_IWUSR);
801  if (fd < 0)
802  {
804  slru_errno = errno;
805  return false;
806  }
807 
808  if (fdata)
809  {
810  if (fdata->num_files < MAX_FLUSH_BUFFERS)
811  {
812  fdata->fd[fdata->num_files] = fd;
813  fdata->segno[fdata->num_files] = segno;
814  fdata->num_files++;
815  }
816  else
817  {
818  /*
819  * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
820  * fall back to treating it as a standalone write.
821  */
822  fdata = NULL;
823  }
824  }
825  }
826 
827  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
828  {
830  slru_errno = errno;
831  if (!fdata)
832  CloseTransientFile(fd);
833  return false;
834  }
835 
836  errno = 0;
837  if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
838  {
839  /* if write didn't set errno, assume problem is no disk space */
840  if (errno == 0)
841  errno = ENOSPC;
843  slru_errno = errno;
844  if (!fdata)
845  CloseTransientFile(fd);
846  return false;
847  }
848 
849  /*
850  * If not part of Flush, need to fsync now. We assume this happens
851  * infrequently enough that it's not a performance issue.
852  */
853  if (!fdata)
854  {
855  if (ctl->do_fsync && pg_fsync(fd))
856  {
858  slru_errno = errno;
859  CloseTransientFile(fd);
860  return false;
861  }
862 
863  if (CloseTransientFile(fd))
864  {
866  slru_errno = errno;
867  return false;
868  }
869  }
870 
871  return true;
872 }
873 
874 /*
875  * Issue the error message after failure of SlruPhysicalReadPage or
876  * SlruPhysicalWritePage. Call this after cleaning up shared-memory state.
877  */
878 static void
880 {
881  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
882  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
883  int offset = rpageno * BLCKSZ;
884  char path[MAXPGPATH];
885 
886  SlruFileName(ctl, path, segno);
887  errno = slru_errno;
888  switch (slru_errcause)
889  {
890  case SLRU_OPEN_FAILED:
891  ereport(ERROR,
893  errmsg("could not access status of transaction %u", xid),
894  errdetail("Could not open file \"%s\": %m.", path)));
895  break;
896  case SLRU_SEEK_FAILED:
897  ereport(ERROR,
899  errmsg("could not access status of transaction %u", xid),
900  errdetail("Could not seek in file \"%s\" to offset %u: %m.",
901  path, offset)));
902  break;
903  case SLRU_READ_FAILED:
904  ereport(ERROR,
906  errmsg("could not access status of transaction %u", xid),
907  errdetail("Could not read from file \"%s\" at offset %u: %m.",
908  path, offset)));
909  break;
910  case SLRU_WRITE_FAILED:
911  ereport(ERROR,
913  errmsg("could not access status of transaction %u", xid),
914  errdetail("Could not write to file \"%s\" at offset %u: %m.",
915  path, offset)));
916  break;
917  case SLRU_FSYNC_FAILED:
918  ereport(ERROR,
920  errmsg("could not access status of transaction %u", xid),
921  errdetail("Could not fsync file \"%s\": %m.",
922  path)));
923  break;
924  case SLRU_CLOSE_FAILED:
925  ereport(ERROR,
927  errmsg("could not access status of transaction %u", xid),
928  errdetail("Could not close file \"%s\": %m.",
929  path)));
930  break;
931  default:
932  /* can't get here, we trust */
933  elog(ERROR, "unrecognized SimpleLru error cause: %d",
934  (int) slru_errcause);
935  break;
936  }
937 }
938 
939 /*
940  * Select the slot to re-use when we need a free slot.
941  *
942  * The target page number is passed because we need to consider the
943  * possibility that some other process reads in the target page while
944  * we are doing I/O to free a slot. Hence, check or recheck to see if
945  * any slot already holds the target page, and return that slot if so.
946  * Thus, the returned slot is *either* a slot already holding the pageno
947  * (could be any state except EMPTY), *or* a freeable slot (state EMPTY
948  * or CLEAN).
949  *
950  * Control lock must be held at entry, and will be held at exit.
951  */
952 static int
953 SlruSelectLRUPage(SlruCtl ctl, int pageno)
954 {
955  SlruShared shared = ctl->shared;
956 
957  /* Outer loop handles restart after I/O */
958  for (;;)
959  {
960  int slotno;
961  int cur_count;
962  int bestvalidslot = 0; /* keep compiler quiet */
963  int best_valid_delta = -1;
964  int best_valid_page_number = 0; /* keep compiler quiet */
965  int bestinvalidslot = 0; /* keep compiler quiet */
966  int best_invalid_delta = -1;
967  int best_invalid_page_number = 0; /* keep compiler quiet */
968 
969  /* See if page already has a buffer assigned */
970  for (slotno = 0; slotno < shared->num_slots; slotno++)
971  {
972  if (shared->page_number[slotno] == pageno &&
973  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
974  return slotno;
975  }
976 
977  /*
978  * If we find any EMPTY slot, just select that one. Else choose a
979  * victim page to replace. We normally take the least recently used
980  * valid page, but we will never take the slot containing
981  * latest_page_number, even if it appears least recently used. We
982  * will select a slot that is already I/O busy only if there is no
983  * other choice: a read-busy slot will not be least recently used once
984  * the read finishes, and waiting for an I/O on a write-busy slot is
985  * inferior to just picking some other slot. Testing shows the slot
986  * we pick instead will often be clean, allowing us to begin a read at
987  * once.
988  *
989  * Normally the page_lru_count values will all be different and so
990  * there will be a well-defined LRU page. But since we allow
991  * concurrent execution of SlruRecentlyUsed() within
992  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
993  * acquire the same lru_count values. In that case we break ties by
994  * choosing the furthest-back page.
995  *
996  * Notice that this next line forcibly advances cur_lru_count to a
997  * value that is certainly beyond any value that will be in the
998  * page_lru_count array after the loop finishes. This ensures that
999  * the next execution of SlruRecentlyUsed will mark the page newly
1000  * used, even if it's for a page that has the current counter value.
1001  * That gets us back on the path to having good data when there are
1002  * multiple pages with the same lru_count.
1003  */
1004  cur_count = (shared->cur_lru_count)++;
1005  for (slotno = 0; slotno < shared->num_slots; slotno++)
1006  {
1007  int this_delta;
1008  int this_page_number;
1009 
1010  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1011  return slotno;
1012  this_delta = cur_count - shared->page_lru_count[slotno];
1013  if (this_delta < 0)
1014  {
1015  /*
1016  * Clean up in case shared updates have caused cur_count
1017  * increments to get "lost". We back off the page counts,
1018  * rather than trying to increase cur_count, to avoid any
1019  * question of infinite loops or failure in the presence of
1020  * wrapped-around counts.
1021  */
1022  shared->page_lru_count[slotno] = cur_count;
1023  this_delta = 0;
1024  }
1025  this_page_number = shared->page_number[slotno];
1026  if (this_page_number == shared->latest_page_number)
1027  continue;
1028  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1029  {
1030  if (this_delta > best_valid_delta ||
1031  (this_delta == best_valid_delta &&
1032  ctl->PagePrecedes(this_page_number,
1033  best_valid_page_number)))
1034  {
1035  bestvalidslot = slotno;
1036  best_valid_delta = this_delta;
1037  best_valid_page_number = this_page_number;
1038  }
1039  }
1040  else
1041  {
1042  if (this_delta > best_invalid_delta ||
1043  (this_delta == best_invalid_delta &&
1044  ctl->PagePrecedes(this_page_number,
1045  best_invalid_page_number)))
1046  {
1047  bestinvalidslot = slotno;
1048  best_invalid_delta = this_delta;
1049  best_invalid_page_number = this_page_number;
1050  }
1051  }
1052  }
1053 
1054  /*
1055  * If all pages (except possibly the latest one) are I/O busy, we'll
1056  * have to wait for an I/O to complete and then retry. In that
1057  * unhappy case, we choose to wait for the I/O on the least recently
1058  * used slot, on the assumption that it was likely initiated first of
1059  * all the I/Os in progress and may therefore finish first.
1060  */
1061  if (best_valid_delta < 0)
1062  {
1063  SimpleLruWaitIO(ctl, bestinvalidslot);
1064  continue;
1065  }
1066 
1067  /*
1068  * If the selected page is clean, we're set.
1069  */
1070  if (!shared->page_dirty[bestvalidslot])
1071  return bestvalidslot;
1072 
1073  /*
1074  * Write the page.
1075  */
1076  SlruInternalWritePage(ctl, bestvalidslot, NULL);
1077 
1078  /*
1079  * Now loop back and try again. This is the easiest way of dealing
1080  * with corner cases such as the victim page being re-dirtied while we
1081  * wrote it.
1082  */
1083  }
1084 }
1085 
1086 /*
1087  * Flush dirty pages to disk during checkpoint or database shutdown
1088  */
1089 void
1090 SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
1091 {
1092  SlruShared shared = ctl->shared;
1093  SlruFlushData fdata;
1094  int slotno;
1095  int pageno = 0;
1096  int i;
1097  bool ok;
1098 
1099  /*
1100  * Find and write dirty pages
1101  */
1102  fdata.num_files = 0;
1103 
1105 
1106  for (slotno = 0; slotno < shared->num_slots; slotno++)
1107  {
1108  SlruInternalWritePage(ctl, slotno, &fdata);
1109 
1110  /*
1111  * In some places (e.g. checkpoints), we cannot assert that the slot
1112  * is clean now, since another process might have re-dirtied it
1113  * already. That's okay.
1114  */
1115  Assert(allow_redirtied ||
1116  shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1117  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1118  !shared->page_dirty[slotno]));
1119  }
1120 
1121  LWLockRelease(shared->ControlLock);
1122 
1123  /*
1124  * Now fsync and close any files that were open
1125  */
1126  ok = true;
1127  for (i = 0; i < fdata.num_files; i++)
1128  {
1129  if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
1130  {
1132  slru_errno = errno;
1133  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1134  ok = false;
1135  }
1136 
1137  if (CloseTransientFile(fdata.fd[i]))
1138  {
1140  slru_errno = errno;
1141  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1142  ok = false;
1143  }
1144  }
1145  if (!ok)
1147 }
1148 
1149 /*
1150  * Remove all segments before the one holding the passed page number
1151  */
1152 void
1153 SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
1154 {
1155  SlruShared shared = ctl->shared;
1156  int slotno;
1157 
1158  /*
1159  * The cutoff point is the start of the segment containing cutoffPage.
1160  */
1161  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1162 
1163  /*
1164  * Scan shared memory and remove any pages preceding the cutoff page, to
1165  * ensure we won't rewrite them later. (Since this is normally called in
1166  * or just after a checkpoint, any dirty pages should have been flushed
1167  * already ... we're just being extra careful here.)
1168  */
1170 
1171 restart:;
1172 
1173  /*
1174  * While we are holding the lock, make an important safety check: the
1175  * planned cutoff point must be <= the current endpoint page. Otherwise we
1176  * have already wrapped around, and proceeding with the truncation would
1177  * risk removing the current segment.
1178  */
1179  if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
1180  {
1181  LWLockRelease(shared->ControlLock);
1182  ereport(LOG,
1183  (errmsg("could not truncate directory \"%s\": apparent wraparound",
1184  ctl->Dir)));
1185  return;
1186  }
1187 
1188  for (slotno = 0; slotno < shared->num_slots; slotno++)
1189  {
1190  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1191  continue;
1192  if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1193  continue;
1194 
1195  /*
1196  * If page is clean, just change state to EMPTY (expected case).
1197  */
1198  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1199  !shared->page_dirty[slotno])
1200  {
1201  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1202  continue;
1203  }
1204 
1205  /*
1206  * Hmm, we have (or may have) I/O operations acting on the page, so
1207  * we've got to wait for them to finish and then start again. This is
1208  * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1209  * wouldn't it be OK to just discard it without writing it? For now,
1210  * keep the logic the same as it was.)
1211  */
1212  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1213  SlruInternalWritePage(ctl, slotno, NULL);
1214  else
1215  SimpleLruWaitIO(ctl, slotno);
1216  goto restart;
1217  }
1218 
1219  LWLockRelease(shared->ControlLock);
1220 
1221  /* Now we can remove the old segment(s) */
1222  (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1223 }
1224 
1225 /*
1226  * Delete an individual SLRU segment, identified by the filename.
1227  *
1228  * NB: This does not touch the SLRU buffers themselves, callers have to ensure
1229  * they either can't yet contain anything, or have already been cleaned out.
1230  */
1231 static void
1233 {
1234  char path[MAXPGPATH];
1235 
1236  snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
1237  ereport(DEBUG2,
1238  (errmsg("removing file \"%s\"", path)));
1239  unlink(path);
1240 }
1241 
1242 /*
1243  * Delete an individual SLRU segment, identified by the segment number.
1244  */
1245 void
1247 {
1248  SlruShared shared = ctl->shared;
1249  int slotno;
1250  char path[MAXPGPATH];
1251  bool did_write;
1252 
1253  /* Clean out any possibly existing references to the segment. */
1255 restart:
1256  did_write = false;
1257  for (slotno = 0; slotno < shared->num_slots; slotno++)
1258  {
1259  int pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1260 
1261  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1262  continue;
1263 
1264  /* not the segment we're looking for */
1265  if (pagesegno != segno)
1266  continue;
1267 
1268  /* If page is clean, just change state to EMPTY (expected case). */
1269  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1270  !shared->page_dirty[slotno])
1271  {
1272  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1273  continue;
1274  }
1275 
1276  /* Same logic as SimpleLruTruncate() */
1277  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1278  SlruInternalWritePage(ctl, slotno, NULL);
1279  else
1280  SimpleLruWaitIO(ctl, slotno);
1281 
1282  did_write = true;
1283  }
1284 
1285  /*
1286  * Be extra careful and re-check. The IO functions release the control
1287  * lock, so new pages could have been read in.
1288  */
1289  if (did_write)
1290  goto restart;
1291 
1292  snprintf(path, MAXPGPATH, "%s/%04X", ctl->Dir, segno);
1293  ereport(DEBUG2,
1294  (errmsg("removing file \"%s\"", path)));
1295  unlink(path);
1296 
1297  LWLockRelease(shared->ControlLock);
1298 }
1299 
1300 /*
1301  * SlruScanDirectory callback
1302  * This callback reports true if there's any segment prior to the one
1303  * containing the page passed as "data".
1304  */
1305 bool
1306 SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
1307 {
1308  int cutoffPage = *(int *) data;
1309 
1310  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1311 
1312  if (ctl->PagePrecedes(segpage, cutoffPage))
1313  return true; /* found one; don't iterate any more */
1314 
1315  return false; /* keep going */
1316 }
1317 
1318 /*
1319  * SlruScanDirectory callback.
1320  * This callback deletes segments prior to the one passed in as "data".
1321  */
1322 static bool
1323 SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
1324 {
1325  int cutoffPage = *(int *) data;
1326 
1327  if (ctl->PagePrecedes(segpage, cutoffPage))
1328  SlruInternalDeleteSegment(ctl, filename);
1329 
1330  return false; /* keep going */
1331 }
1332 
1333 /*
1334  * SlruScanDirectory callback.
1335  * This callback deletes all segments.
1336  */
1337 bool
1338 SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
1339 {
1340  SlruInternalDeleteSegment(ctl, filename);
1341 
1342  return false; /* keep going */
1343 }
1344 
1345 /*
1346  * Scan the SimpleLRU directory and apply a callback to each file found in it.
1347  *
1348  * If the callback returns true, the scan is stopped. The last return value
1349  * from the callback is returned.
1350  *
1351  * The callback receives the following arguments: 1. the SlruCtl struct for the
1352  * slru being truncated; 2. the filename being considered; 3. the page number
1353  * for the first page of that file; 4. a pointer to the opaque data given to us
1354  * by the caller.
1355  *
1356  * Note that the ordering in which the directory is scanned is not guaranteed.
1357  *
1358  * Note that no locking is applied.
1359  */
1360 bool
1362 {
1363  bool retval = false;
1364  DIR *cldir;
1365  struct dirent *clde;
1366  int segno;
1367  int segpage;
1368 
1369  cldir = AllocateDir(ctl->Dir);
1370  while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1371  {
1372  size_t len;
1373 
1374  len = strlen(clde->d_name);
1375 
1376  if ((len == 4 || len == 5 || len == 6) &&
1377  strspn(clde->d_name, "0123456789ABCDEF") == len)
1378  {
1379  segno = (int) strtol(clde->d_name, NULL, 16);
1380  segpage = segno * SLRU_PAGES_PER_SEGMENT;
1381 
1382  elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1383  ctl->Dir, clde->d_name);
1384  retval = callback(ctl, clde->d_name, segpage, data);
1385  if (retval)
1386  break;
1387  }
1388  }
1389  FreeDir(cldir);
1390 
1391  return retval;
1392 }
LWLock * ControlLock
Definition: slru.h:61
int * page_number
Definition: slru.h:73
Definition: lwlock.h:32
SlruPageStatus
Definition: slru.h:48
uint32 TransactionId
Definition: c.h:394
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:502
bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1338
#define write(a, b, c)
Definition: win32.h:19
int latest_page_number
Definition: slru.h:103
char ** page_buffer
Definition: slru.h:70
void SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
Definition: slru.c:1153
bool InRecovery
Definition: xlog.c:191
#define END_CRIT_SECTION()
Definition: miscadmin.h:132
SlruErrorCause
Definition: slru.c:112
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
Definition: slru.c:711
#define START_CRIT_SECTION()
Definition: miscadmin.h:130
#define MemSet(start, val, len)
Definition: c.h:853
static SlruErrorCause slru_errcause
Definition: slru.c:122
int cur_lru_count
Definition: slru.h:96
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:299
int lsn_groups_per_page
Definition: slru.h:85
int segno[MAX_FLUSH_BUFFERS]
Definition: slru.c:78
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
void * ShmemAlloc(Size size)
Definition: shmem.c:157
#define LOG
Definition: elog.h:26
Definition: dirent.h:9
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:144
void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
Definition: slru.c:1090
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2745
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:879
#define PG_BINARY
Definition: c.h:1038
struct SlruFlushData * SlruFlush
Definition: slru.c:81
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1714
static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename)
Definition: slru.c:1232
static XLogRecPtr endpos
SlruPageStatus * page_status
Definition: slru.h:71
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
char lwlock_tranche_name[SLRU_MAX_NAME_LENGTH]
Definition: slru.h:107
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
Definition: slru.c:370
#define MAXPGPATH
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:49
#define DEBUG2
Definition: elog.h:24
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1282
#define MAX_FLUSH_BUFFERS
Definition: slru.c:72
bool IsUnderPostmaster
Definition: globals.c:100
LWLockPadded * buffer_locks
Definition: slru.h:108
int OpenTransientFile(FileName fileName, int fileFlags, int fileMode)
Definition: fd.c:2093
int errdetail(const char *fmt,...)
Definition: elog.c:873
int errcode_for_file_access(void)
Definition: elog.c:598
#define InvalidTransactionId
Definition: transam.h:31
XLogRecPtr * group_lsn
Definition: slru.h:84
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2284
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
Definition: slru.c:585
void SimpleLruWritePage(SlruCtl ctl, int slotno)
Definition: slru.c:573
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:316
bool do_fsync
Definition: slru.h:125
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:666
int CloseTransientFile(int fd)
Definition: fd.c:2254
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
SlruSharedData * SlruShared
Definition: slru.h:111
bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1306
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1323
#define SlruFileName(ctl, path, seg)
Definition: slru.c:62
char Dir[64]
Definition: slru.h:138
bool(* PagePrecedes)(int, int)
Definition: slru.h:132
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:462
#define SLRU_MAX_NAME_LENGTH
Definition: slru.h:40
int num_files
Definition: slru.c:76
LWLock lock
Definition: lwlock.h:79
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
Definition: slru.c:634
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int * page_lru_count
Definition: slru.h:74
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:671
#define StrNCpy(dst, src, len)
Definition: c.h:826
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1361
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2350
size_t Size
Definition: c.h:353
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1110
#define MAXALIGN(LEN)
Definition: c.h:584
void LWLockRegisterTranche(int tranche_id, char *tranche_name)
Definition: lwlock.c:591
int num_slots
Definition: slru.h:64
const char * name
Definition: encode.c:521
struct SlruFlushData SlruFlushData
static int SlruSelectLRUPage(SlruCtl ctl, int pageno)
Definition: slru.c:953
static int slru_errno
Definition: slru.c:123
static char * filename
Definition: pg_dumpall.c:84
int errmsg(const char *fmt,...)
Definition: elog.c:797
bool * page_dirty
Definition: slru.h:72
bool(* SlruScanCallback)(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.h:157
int i
SlruShared shared
Definition: slru.h:119
#define BUFFERALIGN(LEN)
Definition: c.h:586
int lwlock_tranche_id
Definition: slru.h:106
void SlruDeleteSegment(SlruCtl ctl, int segno)
Definition: slru.c:1246
int pg_fsync(int fd)
Definition: fd.c:333
char d_name[MAX_PATH]
Definition: dirent.h:14
#define elog
Definition: elog.h:219
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:37
int SimpleLruZeroPage(SlruCtl ctl, int pageno)
Definition: slru.c:258
#define read(a, b, c)
Definition: win32.h:18
int FreeDir(DIR *dir)
Definition: fd.c:2393
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id)
Definition: slru.c:164
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:102
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:77