PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
slru.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * slru.c
4  * Simple LRU buffering for transaction status logfiles
5  *
6  * We use a simple least-recently-used scheme to manage a pool of page
7  * buffers. Under ordinary circumstances we expect that write
8  * traffic will occur mostly to the latest page (and to the just-prior
9  * page, soon after a page transition). Read traffic will probably touch
10  * a larger span of pages, but in any case a fairly small number of page
11  * buffers should be sufficient. So, we just search the buffers using plain
12  * linear search; there's no need for a hashtable or anything fancy.
13  * The management algorithm is straight LRU except that we will never swap
14  * out the latest page (since we know it's going to be hit again eventually).
15  *
16  * We use a control LWLock to protect the shared data structures, plus
17  * per-buffer LWLocks that synchronize I/O for each buffer. The control lock
18  * must be held to examine or modify any shared state. A process that is
19  * reading in or writing out a page buffer does not hold the control lock,
20  * only the per-buffer lock for the buffer it is working on.
21  *
22  * "Holding the control lock" means exclusive lock in all cases except for
23  * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
24  * the implications of that.
25  *
26  * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
27  * before releasing the control lock. The per-buffer lock is released after
28  * completing the I/O, re-acquiring the control lock, and updating the shared
29  * state. (Deadlock is not possible here, because we never try to initiate
30  * I/O when someone else is already doing I/O on the same buffer.)
31  * To wait for I/O to complete, release the control lock, acquire the
32  * per-buffer lock in shared mode, immediately release the per-buffer lock,
33  * reacquire the control lock, and then recheck state (since arbitrary things
34  * could have happened while we didn't have the lock).
35  *
36  * As with the regular buffer manager, it is possible for another process
37  * to re-dirty a page that is currently being written out. This is handled
38  * by re-setting the page's page_dirty flag.
39  *
40  *
41  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
42  * Portions Copyright (c) 1994, Regents of the University of California
43  *
44  * src/backend/access/transam/slru.c
45  *
46  *-------------------------------------------------------------------------
47  */
48 #include "postgres.h"
49 
50 #include <fcntl.h>
51 #include <sys/stat.h>
52 #include <unistd.h>
53 
54 #include "access/slru.h"
55 #include "access/transam.h"
56 #include "access/xlog.h"
57 #include "pgstat.h"
58 #include "storage/fd.h"
59 #include "storage/shmem.h"
60 #include "miscadmin.h"
61 
62 
63 #define SlruFileName(ctl, path, seg) \
64  snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
65 
66 /*
67  * During SimpleLruFlush(), we will usually not need to write/fsync more
68  * than one or two physical files, but we may need to write several pages
69  * per file. We can consolidate the I/O requests by leaving files open
70  * until control returns to SimpleLruFlush(). This data structure remembers
71  * which files are open.
72  */
73 #define MAX_FLUSH_BUFFERS 16
74 
75 typedef struct SlruFlushData
76 {
77  int num_files; /* # files actually open */
78  int fd[MAX_FLUSH_BUFFERS]; /* their FD's */
79  int segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */
81 
82 typedef struct SlruFlushData *SlruFlush;
83 
84 /*
85  * Macro to mark a buffer slot "most recently used". Note multiple evaluation
86  * of arguments!
87  *
88  * The reason for the if-test is that there are often many consecutive
89  * accesses to the same page (particularly the latest page). By suppressing
90  * useless increments of cur_lru_count, we reduce the probability that old
91  * pages' counts will "wrap around" and make them appear recently used.
92  *
93  * We allow this code to be executed concurrently by multiple processes within
94  * SimpleLruReadPage_ReadOnly(). As long as int reads and writes are atomic,
95  * this should not cause any completely-bogus values to enter the computation.
96  * However, it is possible for either cur_lru_count or individual
97  * page_lru_count entries to be "reset" to lower values than they should have,
98  * in case a process is delayed while it executes this macro. With care in
99  * SlruSelectLRUPage(), this does little harm, and in any case the absolute
100  * worst possible consequence is a nonoptimal choice of page to evict. The
101  * gain from allowing concurrent reads of SLRU pages seems worth it.
102  */
103 #define SlruRecentlyUsed(shared, slotno) \
104  do { \
105  int new_lru_count = (shared)->cur_lru_count; \
106  if (new_lru_count != (shared)->page_lru_count[slotno]) { \
107  (shared)->cur_lru_count = ++new_lru_count; \
108  (shared)->page_lru_count[slotno] = new_lru_count; \
109  } \
110  } while (0)
111 
112 /* Saved info for SlruReportIOError */
113 typedef enum
114 {
122 
124 static int slru_errno;
125 
126 
127 static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
128 static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
129 static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata);
130 static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
131 static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
132  SlruFlush fdata);
133 static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
134 static int SlruSelectLRUPage(SlruCtl ctl, int pageno);
135 
136 static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
137  int segpage, void *data);
138 static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename);
139 
140 /*
141  * Initialization of shared memory
142  */
143 
144 Size
145 SimpleLruShmemSize(int nslots, int nlsns)
146 {
147  Size sz;
148 
149  /* we assume nslots isn't so large as to risk overflow */
150  sz = MAXALIGN(sizeof(SlruSharedData));
151  sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
152  sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
153  sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
154  sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
155  sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
156  sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
157 
158  if (nlsns > 0)
159  sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
160 
161  return BUFFERALIGN(sz) + BLCKSZ * nslots;
162 }
163 
164 void
165 SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
166  LWLock *ctllock, const char *subdir, int tranche_id)
167 {
168  SlruShared shared;
169  bool found;
170 
171  shared = (SlruShared) ShmemInitStruct(name,
172  SimpleLruShmemSize(nslots, nlsns),
173  &found);
174 
175  if (!IsUnderPostmaster)
176  {
177  /* Initialize locks and shared memory area */
178  char *ptr;
179  Size offset;
180  int slotno;
181 
182  Assert(!found);
183 
184  memset(shared, 0, sizeof(SlruSharedData));
185 
186  shared->ControlLock = ctllock;
187 
188  shared->num_slots = nslots;
189  shared->lsn_groups_per_page = nlsns;
190 
191  shared->cur_lru_count = 0;
192 
193  /* shared->latest_page_number will be set later */
194 
195  ptr = (char *) shared;
196  offset = MAXALIGN(sizeof(SlruSharedData));
197  shared->page_buffer = (char **) (ptr + offset);
198  offset += MAXALIGN(nslots * sizeof(char *));
199  shared->page_status = (SlruPageStatus *) (ptr + offset);
200  offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
201  shared->page_dirty = (bool *) (ptr + offset);
202  offset += MAXALIGN(nslots * sizeof(bool));
203  shared->page_number = (int *) (ptr + offset);
204  offset += MAXALIGN(nslots * sizeof(int));
205  shared->page_lru_count = (int *) (ptr + offset);
206  offset += MAXALIGN(nslots * sizeof(int));
207 
208  if (nlsns > 0)
209  {
210  shared->group_lsn = (XLogRecPtr *) (ptr + offset);
211  offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
212  }
213 
214  /* Initialize LWLocks */
215  shared->buffer_locks = (LWLockPadded *) ShmemAlloc(sizeof(LWLockPadded) * nslots);
216 
217  Assert(strlen(name) + 1 < SLRU_MAX_NAME_LENGTH);
219  shared->lwlock_tranche_id = tranche_id;
220 
221  ptr += BUFFERALIGN(offset);
222  for (slotno = 0; slotno < nslots; slotno++)
223  {
224  LWLockInitialize(&shared->buffer_locks[slotno].lock,
225  shared->lwlock_tranche_id);
226 
227  shared->page_buffer[slotno] = ptr;
228  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
229  shared->page_dirty[slotno] = false;
230  shared->page_lru_count[slotno] = 0;
231  ptr += BLCKSZ;
232  }
233  }
234  else
235  Assert(found);
236 
237  /* Register SLRU tranche in the main tranches array */
239  shared->lwlock_tranche_name);
240 
241  /*
242  * Initialize the unshared control struct, including directory path. We
243  * assume caller set PagePrecedes.
244  */
245  ctl->shared = shared;
246  ctl->do_fsync = true; /* default behavior */
247  StrNCpy(ctl->Dir, subdir, sizeof(ctl->Dir));
248 }
249 
250 /*
251  * Initialize (or reinitialize) a page to zeroes.
252  *
253  * The page is not actually written, just set up in shared memory.
254  * The slot number of the new page is returned.
255  *
256  * Control lock must be held at entry, and will be held at exit.
257  */
258 int
259 SimpleLruZeroPage(SlruCtl ctl, int pageno)
260 {
261  SlruShared shared = ctl->shared;
262  int slotno;
263 
264  /* Find a suitable buffer slot for the page */
265  slotno = SlruSelectLRUPage(ctl, pageno);
266  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
267  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
268  !shared->page_dirty[slotno]) ||
269  shared->page_number[slotno] == pageno);
270 
271  /* Mark the slot as containing this page */
272  shared->page_number[slotno] = pageno;
273  shared->page_status[slotno] = SLRU_PAGE_VALID;
274  shared->page_dirty[slotno] = true;
275  SlruRecentlyUsed(shared, slotno);
276 
277  /* Set the buffer to zeroes */
278  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
279 
280  /* Set the LSNs for this new page to zero */
281  SimpleLruZeroLSNs(ctl, slotno);
282 
283  /* Assume this page is now the latest active page */
284  shared->latest_page_number = pageno;
285 
286  return slotno;
287 }
288 
289 /*
290  * Zero all the LSNs we store for this slru page.
291  *
292  * This should be called each time we create a new page, and each time we read
293  * in a page from disk into an existing buffer. (Such an old page cannot
294  * have any interesting LSNs, since we'd have flushed them before writing
295  * the page in the first place.)
296  *
297  * This assumes that InvalidXLogRecPtr is bitwise-all-0.
298  */
299 static void
300 SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
301 {
302  SlruShared shared = ctl->shared;
303 
304  if (shared->lsn_groups_per_page > 0)
305  MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
306  shared->lsn_groups_per_page * sizeof(XLogRecPtr));
307 }
308 
309 /*
310  * Wait for any active I/O on a page slot to finish. (This does not
311  * guarantee that new I/O hasn't been started before we return, though.
312  * In fact the slot might not even contain the same page anymore.)
313  *
314  * Control lock must be held at entry, and will be held at exit.
315  */
316 static void
317 SimpleLruWaitIO(SlruCtl ctl, int slotno)
318 {
319  SlruShared shared = ctl->shared;
320 
321  /* See notes at top of file */
322  LWLockRelease(shared->ControlLock);
323  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
324  LWLockRelease(&shared->buffer_locks[slotno].lock);
326 
327  /*
328  * If the slot is still in an io-in-progress state, then either someone
329  * already started a new I/O on the slot, or a previous I/O failed and
330  * neglected to reset the page state. That shouldn't happen, really, but
331  * it seems worth a few extra cycles to check and recover from it. We can
332  * cheaply test for failure by seeing if the buffer lock is still held (we
333  * assume that transaction abort would release the lock).
334  */
335  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
336  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
337  {
338  if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
339  {
340  /* indeed, the I/O must have failed */
341  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
342  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
343  else /* write_in_progress */
344  {
345  shared->page_status[slotno] = SLRU_PAGE_VALID;
346  shared->page_dirty[slotno] = true;
347  }
348  LWLockRelease(&shared->buffer_locks[slotno].lock);
349  }
350  }
351 }
352 
353 /*
354  * Find a page in a shared buffer, reading it in if necessary.
355  * The page number must correspond to an already-initialized page.
356  *
357  * If write_ok is true then it is OK to return a page that is in
358  * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
359  * that modification of the page is safe. If write_ok is false then we
360  * will not return the page until it is not undergoing active I/O.
361  *
362  * The passed-in xid is used only for error reporting, and may be
363  * InvalidTransactionId if no specific xid is associated with the action.
364  *
365  * Return value is the shared-buffer slot number now holding the page.
366  * The buffer's LRU access info is updated.
367  *
368  * Control lock must be held at entry, and will be held at exit.
369  */
370 int
371 SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
372  TransactionId xid)
373 {
374  SlruShared shared = ctl->shared;
375 
376  /* Outer loop handles restart if we must wait for someone else's I/O */
377  for (;;)
378  {
379  int slotno;
380  bool ok;
381 
382  /* See if page already is in memory; if not, pick victim slot */
383  slotno = SlruSelectLRUPage(ctl, pageno);
384 
385  /* Did we find the page in memory? */
386  if (shared->page_number[slotno] == pageno &&
387  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
388  {
389  /*
390  * If page is still being read in, we must wait for I/O. Likewise
391  * if the page is being written and the caller said that's not OK.
392  */
393  if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
394  (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
395  !write_ok))
396  {
397  SimpleLruWaitIO(ctl, slotno);
398  /* Now we must recheck state from the top */
399  continue;
400  }
401  /* Otherwise, it's ready to use */
402  SlruRecentlyUsed(shared, slotno);
403  return slotno;
404  }
405 
406  /* We found no match; assert we selected a freeable slot */
407  Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
408  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
409  !shared->page_dirty[slotno]));
410 
411  /* Mark the slot read-busy */
412  shared->page_number[slotno] = pageno;
413  shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
414  shared->page_dirty[slotno] = false;
415 
416  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
417  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
418 
419  /* Release control lock while doing I/O */
420  LWLockRelease(shared->ControlLock);
421 
422  /* Do the read */
423  ok = SlruPhysicalReadPage(ctl, pageno, slotno);
424 
425  /* Set the LSNs for this newly read-in page to zero */
426  SimpleLruZeroLSNs(ctl, slotno);
427 
428  /* Re-acquire control lock and update page state */
430 
431  Assert(shared->page_number[slotno] == pageno &&
432  shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
433  !shared->page_dirty[slotno]);
434 
435  shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
436 
437  LWLockRelease(&shared->buffer_locks[slotno].lock);
438 
439  /* Now it's okay to ereport if we failed */
440  if (!ok)
441  SlruReportIOError(ctl, pageno, xid);
442 
443  SlruRecentlyUsed(shared, slotno);
444  return slotno;
445  }
446 }
447 
448 /*
449  * Find a page in a shared buffer, reading it in if necessary.
450  * The page number must correspond to an already-initialized page.
451  * The caller must intend only read-only access to the page.
452  *
453  * The passed-in xid is used only for error reporting, and may be
454  * InvalidTransactionId if no specific xid is associated with the action.
455  *
456  * Return value is the shared-buffer slot number now holding the page.
457  * The buffer's LRU access info is updated.
458  *
459  * Control lock must NOT be held at entry, but will be held at exit.
460  * It is unspecified whether the lock will be shared or exclusive.
461  */
462 int
464 {
465  SlruShared shared = ctl->shared;
466  int slotno;
467 
468  /* Try to find the page while holding only shared lock */
470 
471  /* See if page is already in a buffer */
472  for (slotno = 0; slotno < shared->num_slots; slotno++)
473  {
474  if (shared->page_number[slotno] == pageno &&
475  shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
476  shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
477  {
478  /* See comments for SlruRecentlyUsed macro */
479  SlruRecentlyUsed(shared, slotno);
480  return slotno;
481  }
482  }
483 
484  /* No luck, so switch to normal exclusive lock and do regular read */
485  LWLockRelease(shared->ControlLock);
487 
488  return SimpleLruReadPage(ctl, pageno, true, xid);
489 }
490 
491 /*
492  * Write a page from a shared buffer, if necessary.
493  * Does nothing if the specified slot is not dirty.
494  *
495  * NOTE: only one write attempt is made here. Hence, it is possible that
496  * the page is still dirty at exit (if someone else re-dirtied it during
497  * the write). However, we *do* attempt a fresh write even if the page
498  * is already being written; this is for checkpoints.
499  *
500  * Control lock must be held at entry, and will be held at exit.
501  */
502 static void
503 SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
504 {
505  SlruShared shared = ctl->shared;
506  int pageno = shared->page_number[slotno];
507  bool ok;
508 
509  /* If a write is in progress, wait for it to finish */
510  while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
511  shared->page_number[slotno] == pageno)
512  {
513  SimpleLruWaitIO(ctl, slotno);
514  }
515 
516  /*
517  * Do nothing if page is not dirty, or if buffer no longer contains the
518  * same page we were called for.
519  */
520  if (!shared->page_dirty[slotno] ||
521  shared->page_status[slotno] != SLRU_PAGE_VALID ||
522  shared->page_number[slotno] != pageno)
523  return;
524 
525  /*
526  * Mark the slot write-busy, and clear the dirtybit. After this point, a
527  * transaction status update on this page will mark it dirty again.
528  */
529  shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
530  shared->page_dirty[slotno] = false;
531 
532  /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
533  LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
534 
535  /* Release control lock while doing I/O */
536  LWLockRelease(shared->ControlLock);
537 
538  /* Do the write */
539  ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
540 
541  /* If we failed, and we're in a flush, better close the files */
542  if (!ok && fdata)
543  {
544  int i;
545 
546  for (i = 0; i < fdata->num_files; i++)
547  CloseTransientFile(fdata->fd[i]);
548  }
549 
550  /* Re-acquire control lock and update page state */
552 
553  Assert(shared->page_number[slotno] == pageno &&
554  shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
555 
556  /* If we failed to write, mark the page dirty again */
557  if (!ok)
558  shared->page_dirty[slotno] = true;
559 
560  shared->page_status[slotno] = SLRU_PAGE_VALID;
561 
562  LWLockRelease(&shared->buffer_locks[slotno].lock);
563 
564  /* Now it's okay to ereport if we failed */
565  if (!ok)
567 }
568 
569 /*
570  * Wrapper of SlruInternalWritePage, for external callers.
571  * fdata is always passed a NULL here.
572  */
573 void
574 SimpleLruWritePage(SlruCtl ctl, int slotno)
575 {
576  SlruInternalWritePage(ctl, slotno, NULL);
577 }
578 
579 /*
580  * Return whether the given page exists on disk.
581  *
582  * A false return means that either the file does not exist, or that it's not
583  * large enough to contain the given page.
584  */
585 bool
587 {
588  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
589  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
590  int offset = rpageno * BLCKSZ;
591  char path[MAXPGPATH];
592  int fd;
593  bool result;
594  off_t endpos;
595 
596  SlruFileName(ctl, path, segno);
597 
598  fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
599  if (fd < 0)
600  {
601  /* expected: file doesn't exist */
602  if (errno == ENOENT)
603  return false;
604 
605  /* report error normally */
607  slru_errno = errno;
608  SlruReportIOError(ctl, pageno, 0);
609  }
610 
611  if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
612  {
614  slru_errno = errno;
615  SlruReportIOError(ctl, pageno, 0);
616  }
617 
618  result = endpos >= (off_t) (offset + BLCKSZ);
619 
620  CloseTransientFile(fd);
621  return result;
622 }
623 
624 /*
625  * Physical read of a (previously existing) page into a buffer slot
626  *
627  * On failure, we cannot just ereport(ERROR) since caller has put state in
628  * shared memory that must be undone. So, we return FALSE and save enough
629  * info in static variables to let SlruReportIOError make the report.
630  *
631  * For now, assume it's not worth keeping a file pointer open across
632  * read/write operations. We could cache one virtual file pointer ...
633  */
634 static bool
635 SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
636 {
637  SlruShared shared = ctl->shared;
638  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
639  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
640  int offset = rpageno * BLCKSZ;
641  char path[MAXPGPATH];
642  int fd;
643 
644  SlruFileName(ctl, path, segno);
645 
646  /*
647  * In a crash-and-restart situation, it's possible for us to receive
648  * commands to set the commit status of transactions whose bits are in
649  * already-truncated segments of the commit log (see notes in
650  * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
651  * where the file doesn't exist, and return zeroes instead.
652  */
653  fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
654  if (fd < 0)
655  {
656  if (errno != ENOENT || !InRecovery)
657  {
659  slru_errno = errno;
660  return false;
661  }
662 
663  ereport(LOG,
664  (errmsg("file \"%s\" doesn't exist, reading as zeroes",
665  path)));
666  MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
667  return true;
668  }
669 
670  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
671  {
673  slru_errno = errno;
674  CloseTransientFile(fd);
675  return false;
676  }
677 
678  errno = 0;
680  if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
681  {
684  slru_errno = errno;
685  CloseTransientFile(fd);
686  return false;
687  }
689 
690  if (CloseTransientFile(fd))
691  {
693  slru_errno = errno;
694  return false;
695  }
696 
697  return true;
698 }
699 
700 /*
701  * Physical write of a page from a buffer slot
702  *
703  * On failure, we cannot just ereport(ERROR) since caller has put state in
704  * shared memory that must be undone. So, we return FALSE and save enough
705  * info in static variables to let SlruReportIOError make the report.
706  *
707  * For now, assume it's not worth keeping a file pointer open across
708  * independent read/write operations. We do batch operations during
709  * SimpleLruFlush, though.
710  *
711  * fdata is NULL for a standalone write, pointer to open-file info during
712  * SimpleLruFlush.
713  */
714 static bool
715 SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
716 {
717  SlruShared shared = ctl->shared;
718  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
719  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
720  int offset = rpageno * BLCKSZ;
721  char path[MAXPGPATH];
722  int fd = -1;
723 
724  /*
725  * Honor the write-WAL-before-data rule, if appropriate, so that we do not
726  * write out data before associated WAL records. This is the same action
727  * performed during FlushBuffer() in the main buffer manager.
728  */
729  if (shared->group_lsn != NULL)
730  {
731  /*
732  * We must determine the largest async-commit LSN for the page. This
733  * is a bit tedious, but since this entire function is a slow path
734  * anyway, it seems better to do this here than to maintain a per-page
735  * LSN variable (which'd need an extra comparison in the
736  * transaction-commit path).
737  */
738  XLogRecPtr max_lsn;
739  int lsnindex,
740  lsnoff;
741 
742  lsnindex = slotno * shared->lsn_groups_per_page;
743  max_lsn = shared->group_lsn[lsnindex++];
744  for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
745  {
746  XLogRecPtr this_lsn = shared->group_lsn[lsnindex++];
747 
748  if (max_lsn < this_lsn)
749  max_lsn = this_lsn;
750  }
751 
752  if (!XLogRecPtrIsInvalid(max_lsn))
753  {
754  /*
755  * As noted above, elog(ERROR) is not acceptable here, so if
756  * XLogFlush were to fail, we must PANIC. This isn't much of a
757  * restriction because XLogFlush is just about all critical
758  * section anyway, but let's make sure.
759  */
761  XLogFlush(max_lsn);
763  }
764  }
765 
766  /*
767  * During a Flush, we may already have the desired file open.
768  */
769  if (fdata)
770  {
771  int i;
772 
773  for (i = 0; i < fdata->num_files; i++)
774  {
775  if (fdata->segno[i] == segno)
776  {
777  fd = fdata->fd[i];
778  break;
779  }
780  }
781  }
782 
783  if (fd < 0)
784  {
785  /*
786  * If the file doesn't already exist, we should create it. It is
787  * possible for this to need to happen when writing a page that's not
788  * first in its segment; we assume the OS can cope with that. (Note:
789  * it might seem that it'd be okay to create files only when
790  * SimpleLruZeroPage is called for the first page of a segment.
791  * However, if after a crash and restart the REDO logic elects to
792  * replay the log from a checkpoint before the latest one, then it's
793  * possible that we will get commands to set transaction status of
794  * transactions that have already been truncated from the commit log.
795  * Easiest way to deal with that is to accept references to
796  * nonexistent files here and in SlruPhysicalReadPage.)
797  *
798  * Note: it is possible for more than one backend to be executing this
799  * code simultaneously for different pages of the same file. Hence,
800  * don't use O_EXCL or O_TRUNC or anything like that.
801  */
802  SlruFileName(ctl, path, segno);
803  fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY,
804  S_IRUSR | S_IWUSR);
805  if (fd < 0)
806  {
808  slru_errno = errno;
809  return false;
810  }
811 
812  if (fdata)
813  {
814  if (fdata->num_files < MAX_FLUSH_BUFFERS)
815  {
816  fdata->fd[fdata->num_files] = fd;
817  fdata->segno[fdata->num_files] = segno;
818  fdata->num_files++;
819  }
820  else
821  {
822  /*
823  * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
824  * fall back to treating it as a standalone write.
825  */
826  fdata = NULL;
827  }
828  }
829  }
830 
831  if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
832  {
834  slru_errno = errno;
835  if (!fdata)
836  CloseTransientFile(fd);
837  return false;
838  }
839 
840  errno = 0;
842  if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
843  {
845  /* if write didn't set errno, assume problem is no disk space */
846  if (errno == 0)
847  errno = ENOSPC;
849  slru_errno = errno;
850  if (!fdata)
851  CloseTransientFile(fd);
852  return false;
853  }
855 
856  /*
857  * If not part of Flush, need to fsync now. We assume this happens
858  * infrequently enough that it's not a performance issue.
859  */
860  if (!fdata)
861  {
863  if (ctl->do_fsync && pg_fsync(fd))
864  {
867  slru_errno = errno;
868  CloseTransientFile(fd);
869  return false;
870  }
872 
873  if (CloseTransientFile(fd))
874  {
876  slru_errno = errno;
877  return false;
878  }
879  }
880 
881  return true;
882 }
883 
884 /*
885  * Issue the error message after failure of SlruPhysicalReadPage or
886  * SlruPhysicalWritePage. Call this after cleaning up shared-memory state.
887  */
888 static void
890 {
891  int segno = pageno / SLRU_PAGES_PER_SEGMENT;
892  int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
893  int offset = rpageno * BLCKSZ;
894  char path[MAXPGPATH];
895 
896  SlruFileName(ctl, path, segno);
897  errno = slru_errno;
898  switch (slru_errcause)
899  {
900  case SLRU_OPEN_FAILED:
901  ereport(ERROR,
903  errmsg("could not access status of transaction %u", xid),
904  errdetail("Could not open file \"%s\": %m.", path)));
905  break;
906  case SLRU_SEEK_FAILED:
907  ereport(ERROR,
909  errmsg("could not access status of transaction %u", xid),
910  errdetail("Could not seek in file \"%s\" to offset %u: %m.",
911  path, offset)));
912  break;
913  case SLRU_READ_FAILED:
914  ereport(ERROR,
916  errmsg("could not access status of transaction %u", xid),
917  errdetail("Could not read from file \"%s\" at offset %u: %m.",
918  path, offset)));
919  break;
920  case SLRU_WRITE_FAILED:
921  ereport(ERROR,
923  errmsg("could not access status of transaction %u", xid),
924  errdetail("Could not write to file \"%s\" at offset %u: %m.",
925  path, offset)));
926  break;
927  case SLRU_FSYNC_FAILED:
928  ereport(ERROR,
930  errmsg("could not access status of transaction %u", xid),
931  errdetail("Could not fsync file \"%s\": %m.",
932  path)));
933  break;
934  case SLRU_CLOSE_FAILED:
935  ereport(ERROR,
937  errmsg("could not access status of transaction %u", xid),
938  errdetail("Could not close file \"%s\": %m.",
939  path)));
940  break;
941  default:
942  /* can't get here, we trust */
943  elog(ERROR, "unrecognized SimpleLru error cause: %d",
944  (int) slru_errcause);
945  break;
946  }
947 }
948 
949 /*
950  * Select the slot to re-use when we need a free slot.
951  *
952  * The target page number is passed because we need to consider the
953  * possibility that some other process reads in the target page while
954  * we are doing I/O to free a slot. Hence, check or recheck to see if
955  * any slot already holds the target page, and return that slot if so.
956  * Thus, the returned slot is *either* a slot already holding the pageno
957  * (could be any state except EMPTY), *or* a freeable slot (state EMPTY
958  * or CLEAN).
959  *
960  * Control lock must be held at entry, and will be held at exit.
961  */
962 static int
963 SlruSelectLRUPage(SlruCtl ctl, int pageno)
964 {
965  SlruShared shared = ctl->shared;
966 
967  /* Outer loop handles restart after I/O */
968  for (;;)
969  {
970  int slotno;
971  int cur_count;
972  int bestvalidslot = 0; /* keep compiler quiet */
973  int best_valid_delta = -1;
974  int best_valid_page_number = 0; /* keep compiler quiet */
975  int bestinvalidslot = 0; /* keep compiler quiet */
976  int best_invalid_delta = -1;
977  int best_invalid_page_number = 0; /* keep compiler quiet */
978 
979  /* See if page already has a buffer assigned */
980  for (slotno = 0; slotno < shared->num_slots; slotno++)
981  {
982  if (shared->page_number[slotno] == pageno &&
983  shared->page_status[slotno] != SLRU_PAGE_EMPTY)
984  return slotno;
985  }
986 
987  /*
988  * If we find any EMPTY slot, just select that one. Else choose a
989  * victim page to replace. We normally take the least recently used
990  * valid page, but we will never take the slot containing
991  * latest_page_number, even if it appears least recently used. We
992  * will select a slot that is already I/O busy only if there is no
993  * other choice: a read-busy slot will not be least recently used once
994  * the read finishes, and waiting for an I/O on a write-busy slot is
995  * inferior to just picking some other slot. Testing shows the slot
996  * we pick instead will often be clean, allowing us to begin a read at
997  * once.
998  *
999  * Normally the page_lru_count values will all be different and so
1000  * there will be a well-defined LRU page. But since we allow
1001  * concurrent execution of SlruRecentlyUsed() within
1002  * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1003  * acquire the same lru_count values. In that case we break ties by
1004  * choosing the furthest-back page.
1005  *
1006  * Notice that this next line forcibly advances cur_lru_count to a
1007  * value that is certainly beyond any value that will be in the
1008  * page_lru_count array after the loop finishes. This ensures that
1009  * the next execution of SlruRecentlyUsed will mark the page newly
1010  * used, even if it's for a page that has the current counter value.
1011  * That gets us back on the path to having good data when there are
1012  * multiple pages with the same lru_count.
1013  */
1014  cur_count = (shared->cur_lru_count)++;
1015  for (slotno = 0; slotno < shared->num_slots; slotno++)
1016  {
1017  int this_delta;
1018  int this_page_number;
1019 
1020  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1021  return slotno;
1022  this_delta = cur_count - shared->page_lru_count[slotno];
1023  if (this_delta < 0)
1024  {
1025  /*
1026  * Clean up in case shared updates have caused cur_count
1027  * increments to get "lost". We back off the page counts,
1028  * rather than trying to increase cur_count, to avoid any
1029  * question of infinite loops or failure in the presence of
1030  * wrapped-around counts.
1031  */
1032  shared->page_lru_count[slotno] = cur_count;
1033  this_delta = 0;
1034  }
1035  this_page_number = shared->page_number[slotno];
1036  if (this_page_number == shared->latest_page_number)
1037  continue;
1038  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1039  {
1040  if (this_delta > best_valid_delta ||
1041  (this_delta == best_valid_delta &&
1042  ctl->PagePrecedes(this_page_number,
1043  best_valid_page_number)))
1044  {
1045  bestvalidslot = slotno;
1046  best_valid_delta = this_delta;
1047  best_valid_page_number = this_page_number;
1048  }
1049  }
1050  else
1051  {
1052  if (this_delta > best_invalid_delta ||
1053  (this_delta == best_invalid_delta &&
1054  ctl->PagePrecedes(this_page_number,
1055  best_invalid_page_number)))
1056  {
1057  bestinvalidslot = slotno;
1058  best_invalid_delta = this_delta;
1059  best_invalid_page_number = this_page_number;
1060  }
1061  }
1062  }
1063 
1064  /*
1065  * If all pages (except possibly the latest one) are I/O busy, we'll
1066  * have to wait for an I/O to complete and then retry. In that
1067  * unhappy case, we choose to wait for the I/O on the least recently
1068  * used slot, on the assumption that it was likely initiated first of
1069  * all the I/Os in progress and may therefore finish first.
1070  */
1071  if (best_valid_delta < 0)
1072  {
1073  SimpleLruWaitIO(ctl, bestinvalidslot);
1074  continue;
1075  }
1076 
1077  /*
1078  * If the selected page is clean, we're set.
1079  */
1080  if (!shared->page_dirty[bestvalidslot])
1081  return bestvalidslot;
1082 
1083  /*
1084  * Write the page.
1085  */
1086  SlruInternalWritePage(ctl, bestvalidslot, NULL);
1087 
1088  /*
1089  * Now loop back and try again. This is the easiest way of dealing
1090  * with corner cases such as the victim page being re-dirtied while we
1091  * wrote it.
1092  */
1093  }
1094 }
1095 
1096 /*
1097  * Flush dirty pages to disk during checkpoint or database shutdown
1098  */
1099 void
1100 SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
1101 {
1102  SlruShared shared = ctl->shared;
1103  SlruFlushData fdata;
1104  int slotno;
1105  int pageno = 0;
1106  int i;
1107  bool ok;
1108 
1109  /*
1110  * Find and write dirty pages
1111  */
1112  fdata.num_files = 0;
1113 
1115 
1116  for (slotno = 0; slotno < shared->num_slots; slotno++)
1117  {
1118  SlruInternalWritePage(ctl, slotno, &fdata);
1119 
1120  /*
1121  * In some places (e.g. checkpoints), we cannot assert that the slot
1122  * is clean now, since another process might have re-dirtied it
1123  * already. That's okay.
1124  */
1125  Assert(allow_redirtied ||
1126  shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1127  (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1128  !shared->page_dirty[slotno]));
1129  }
1130 
1131  LWLockRelease(shared->ControlLock);
1132 
1133  /*
1134  * Now fsync and close any files that were open
1135  */
1136  ok = true;
1137  for (i = 0; i < fdata.num_files; i++)
1138  {
1140  if (ctl->do_fsync && pg_fsync(fdata.fd[i]))
1141  {
1143  slru_errno = errno;
1144  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1145  ok = false;
1146  }
1148 
1149  if (CloseTransientFile(fdata.fd[i]))
1150  {
1152  slru_errno = errno;
1153  pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1154  ok = false;
1155  }
1156  }
1157  if (!ok)
1159 }
1160 
1161 /*
1162  * Remove all segments before the one holding the passed page number
1163  */
1164 void
1165 SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
1166 {
1167  SlruShared shared = ctl->shared;
1168  int slotno;
1169 
1170  /*
1171  * The cutoff point is the start of the segment containing cutoffPage.
1172  */
1173  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1174 
1175  /*
1176  * Scan shared memory and remove any pages preceding the cutoff page, to
1177  * ensure we won't rewrite them later. (Since this is normally called in
1178  * or just after a checkpoint, any dirty pages should have been flushed
1179  * already ... we're just being extra careful here.)
1180  */
1182 
1183 restart:;
1184 
1185  /*
1186  * While we are holding the lock, make an important safety check: the
1187  * planned cutoff point must be <= the current endpoint page. Otherwise we
1188  * have already wrapped around, and proceeding with the truncation would
1189  * risk removing the current segment.
1190  */
1191  if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
1192  {
1193  LWLockRelease(shared->ControlLock);
1194  ereport(LOG,
1195  (errmsg("could not truncate directory \"%s\": apparent wraparound",
1196  ctl->Dir)));
1197  return;
1198  }
1199 
1200  for (slotno = 0; slotno < shared->num_slots; slotno++)
1201  {
1202  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1203  continue;
1204  if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1205  continue;
1206 
1207  /*
1208  * If page is clean, just change state to EMPTY (expected case).
1209  */
1210  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1211  !shared->page_dirty[slotno])
1212  {
1213  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1214  continue;
1215  }
1216 
1217  /*
1218  * Hmm, we have (or may have) I/O operations acting on the page, so
1219  * we've got to wait for them to finish and then start again. This is
1220  * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1221  * wouldn't it be OK to just discard it without writing it? For now,
1222  * keep the logic the same as it was.)
1223  */
1224  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1225  SlruInternalWritePage(ctl, slotno, NULL);
1226  else
1227  SimpleLruWaitIO(ctl, slotno);
1228  goto restart;
1229  }
1230 
1231  LWLockRelease(shared->ControlLock);
1232 
1233  /* Now we can remove the old segment(s) */
1234  (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
1235 }
1236 
1237 /*
1238  * Delete an individual SLRU segment, identified by the filename.
1239  *
1240  * NB: This does not touch the SLRU buffers themselves, callers have to ensure
1241  * they either can't yet contain anything, or have already been cleaned out.
1242  */
1243 static void
1245 {
1246  char path[MAXPGPATH];
1247 
1248  snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, filename);
1249  ereport(DEBUG2,
1250  (errmsg("removing file \"%s\"", path)));
1251  unlink(path);
1252 }
1253 
1254 /*
1255  * Delete an individual SLRU segment, identified by the segment number.
1256  */
1257 void
1259 {
1260  SlruShared shared = ctl->shared;
1261  int slotno;
1262  char path[MAXPGPATH];
1263  bool did_write;
1264 
1265  /* Clean out any possibly existing references to the segment. */
1267 restart:
1268  did_write = false;
1269  for (slotno = 0; slotno < shared->num_slots; slotno++)
1270  {
1271  int pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
1272 
1273  if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1274  continue;
1275 
1276  /* not the segment we're looking for */
1277  if (pagesegno != segno)
1278  continue;
1279 
1280  /* If page is clean, just change state to EMPTY (expected case). */
1281  if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1282  !shared->page_dirty[slotno])
1283  {
1284  shared->page_status[slotno] = SLRU_PAGE_EMPTY;
1285  continue;
1286  }
1287 
1288  /* Same logic as SimpleLruTruncate() */
1289  if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1290  SlruInternalWritePage(ctl, slotno, NULL);
1291  else
1292  SimpleLruWaitIO(ctl, slotno);
1293 
1294  did_write = true;
1295  }
1296 
1297  /*
1298  * Be extra careful and re-check. The IO functions release the control
1299  * lock, so new pages could have been read in.
1300  */
1301  if (did_write)
1302  goto restart;
1303 
1304  snprintf(path, MAXPGPATH, "%s/%04X", ctl->Dir, segno);
1305  ereport(DEBUG2,
1306  (errmsg("removing file \"%s\"", path)));
1307  unlink(path);
1308 
1309  LWLockRelease(shared->ControlLock);
1310 }
1311 
1312 /*
1313  * SlruScanDirectory callback
1314  * This callback reports true if there's any segment prior to the one
1315  * containing the page passed as "data".
1316  */
1317 bool
1318 SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
1319 {
1320  int cutoffPage = *(int *) data;
1321 
1322  cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT;
1323 
1324  if (ctl->PagePrecedes(segpage, cutoffPage))
1325  return true; /* found one; don't iterate any more */
1326 
1327  return false; /* keep going */
1328 }
1329 
1330 /*
1331  * SlruScanDirectory callback.
1332  * This callback deletes segments prior to the one passed in as "data".
1333  */
1334 static bool
1335 SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
1336 {
1337  int cutoffPage = *(int *) data;
1338 
1339  if (ctl->PagePrecedes(segpage, cutoffPage))
1340  SlruInternalDeleteSegment(ctl, filename);
1341 
1342  return false; /* keep going */
1343 }
1344 
1345 /*
1346  * SlruScanDirectory callback.
1347  * This callback deletes all segments.
1348  */
1349 bool
1350 SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
1351 {
1352  SlruInternalDeleteSegment(ctl, filename);
1353 
1354  return false; /* keep going */
1355 }
1356 
1357 /*
1358  * Scan the SimpleLRU directory and apply a callback to each file found in it.
1359  *
1360  * If the callback returns true, the scan is stopped. The last return value
1361  * from the callback is returned.
1362  *
1363  * The callback receives the following arguments: 1. the SlruCtl struct for the
1364  * slru being truncated; 2. the filename being considered; 3. the page number
1365  * for the first page of that file; 4. a pointer to the opaque data given to us
1366  * by the caller.
1367  *
1368  * Note that the ordering in which the directory is scanned is not guaranteed.
1369  *
1370  * Note that no locking is applied.
1371  */
1372 bool
1374 {
1375  bool retval = false;
1376  DIR *cldir;
1377  struct dirent *clde;
1378  int segno;
1379  int segpage;
1380 
1381  cldir = AllocateDir(ctl->Dir);
1382  while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1383  {
1384  size_t len;
1385 
1386  len = strlen(clde->d_name);
1387 
1388  if ((len == 4 || len == 5 || len == 6) &&
1389  strspn(clde->d_name, "0123456789ABCDEF") == len)
1390  {
1391  segno = (int) strtol(clde->d_name, NULL, 16);
1392  segpage = segno * SLRU_PAGES_PER_SEGMENT;
1393 
1394  elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1395  ctl->Dir, clde->d_name);
1396  retval = callback(ctl, clde->d_name, segpage, data);
1397  if (retval)
1398  break;
1399  }
1400  }
1401  FreeDir(cldir);
1402 
1403  return retval;
1404 }
LWLock * ControlLock
Definition: slru.h:61
int * page_number
Definition: slru.h:73
Definition: lwlock.h:32
SlruPageStatus
Definition: slru.h:48
uint32 TransactionId
Definition: c.h:397
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
Definition: slru.c:503
bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1350
#define write(a, b, c)
Definition: win32.h:14
int latest_page_number
Definition: slru.h:103
char ** page_buffer
Definition: slru.h:70
void SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
Definition: slru.c:1165
bool InRecovery
Definition: xlog.c:192
#define END_CRIT_SECTION()
Definition: miscadmin.h:132
SlruErrorCause
Definition: slru.c:113
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
Definition: slru.c:715
#define START_CRIT_SECTION()
Definition: miscadmin.h:130
#define MemSet(start, val, len)
Definition: c.h:857
static SlruErrorCause slru_errcause
Definition: slru.c:123
int cur_lru_count
Definition: slru.h:96
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition: slru.c:300
int lsn_groups_per_page
Definition: slru.h:85
return result
Definition: formatting.c:1632
int segno[MAX_FLUSH_BUFFERS]
Definition: slru.c:79
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
void * ShmemAlloc(Size size)
Definition: shmem.c:157
#define LOG
Definition: elog.h:26
Definition: dirent.h:9
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:145
void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
Definition: slru.c:1100
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2758
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:889
#define PG_BINARY
Definition: c.h:1038
struct SlruFlushData * SlruFlush
Definition: slru.c:82
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1715
static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename)
Definition: slru.c:1244
static XLogRecPtr endpos
SlruPageStatus * page_status
Definition: slru.h:71
Definition: dirent.c:25
#define ERROR
Definition: elog.h:43
char lwlock_tranche_name[SLRU_MAX_NAME_LENGTH]
Definition: slru.h:107
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:372
int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid)
Definition: slru.c:371
#define MAXPGPATH
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define DEBUG2
Definition: elog.h:24
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1283
#define MAX_FLUSH_BUFFERS
Definition: slru.c:73
bool IsUnderPostmaster
Definition: globals.c:100
LWLockPadded * buffer_locks
Definition: slru.h:108
int OpenTransientFile(FileName fileName, int fileFlags, int fileMode)
Definition: fd.c:2144
int errdetail(const char *fmt,...)
Definition: elog.c:873
int errcode_for_file_access(void)
Definition: elog.c:598
#define InvalidTransactionId
Definition: transam.h:31
XLogRecPtr * group_lsn
Definition: slru.h:84
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2335
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
Definition: slru.c:586
void SimpleLruWritePage(SlruCtl ctl, int slotno)
Definition: slru.c:574
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1232
int unlink(const char *filename)
#define ereport(elevel, rest)
Definition: elog.h:122
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition: slru.c:317
bool do_fsync
Definition: slru.h:125
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:667
int CloseTransientFile(int fd)
Definition: fd.c:2305
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
SlruSharedData * SlruShared
Definition: slru.h:111
bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1318
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.c:1335
#define SlruFileName(ctl, path, seg)
Definition: slru.c:63
char Dir[64]
Definition: slru.h:138
bool(* PagePrecedes)(int, int)
Definition: slru.h:132
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
Definition: slru.c:463
#define SLRU_MAX_NAME_LENGTH
Definition: slru.h:40
int num_files
Definition: slru.c:77
LWLock lock
Definition: lwlock.h:79
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
Definition: slru.c:635
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int * page_lru_count
Definition: slru.h:74
#define NULL
Definition: c.h:229
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
#define StrNCpy(dst, src, len)
Definition: c.h:830
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1373
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2401
size_t Size
Definition: c.h:356
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1208
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1111
#define MAXALIGN(LEN)
Definition: c.h:588
void LWLockRegisterTranche(int tranche_id, char *tranche_name)
Definition: lwlock.c:592
int num_slots
Definition: slru.h:64
const char * name
Definition: encode.c:521
struct SlruFlushData SlruFlushData
static int SlruSelectLRUPage(SlruCtl ctl, int pageno)
Definition: slru.c:963
static int slru_errno
Definition: slru.c:124
static char * filename
Definition: pg_dumpall.c:89
int errmsg(const char *fmt,...)
Definition: elog.c:797
bool * page_dirty
Definition: slru.h:72
bool(* SlruScanCallback)(SlruCtl ctl, char *filename, int segpage, void *data)
Definition: slru.h:157
int i
SlruShared shared
Definition: slru.h:119
#define BUFFERALIGN(LEN)
Definition: c.h:590
int lwlock_tranche_id
Definition: slru.h:106
void SlruDeleteSegment(SlruCtl ctl, int segno)
Definition: slru.c:1258
int pg_fsync(int fd)
Definition: fd.c:333
char d_name[MAX_PATH]
Definition: dirent.h:14
#define elog
Definition: elog.h:219
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:37
int SimpleLruZeroPage(SlruCtl ctl, int pageno)
Definition: slru.c:259
#define read(a, b, c)
Definition: win32.h:13
int FreeDir(DIR *dir)
Definition: fd.c:2444
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id)
Definition: slru.c:165
#define SlruRecentlyUsed(shared, slotno)
Definition: slru.c:103
int fd[MAX_FLUSH_BUFFERS]
Definition: slru.c:78