PostgreSQL Source Code git master
Loading...
Searching...
No Matches
slru.c File Reference
#include "postgres.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/slru.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
#include "utils/guc.h"
Include dependency graph for slru.c:

Go to the source code of this file.

Data Structures

struct  SlruWriteAllData
 

Macros

#define MAX_WRITEALL_BUFFERS   16
 
#define SLRU_BANK_BITSHIFT   4
 
#define SLRU_BANK_SIZE   (1 << SLRU_BANK_BITSHIFT)
 
#define SlotGetBankNumber(slotno)   ((slotno) >> SLRU_BANK_BITSHIFT)
 
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
 

Typedefs

typedef struct SlruWriteAllData SlruWriteAllData
 
typedef struct SlruWriteAllDataSlruWriteAll
 

Enumerations

enum  SlruErrorCause {
  SLRU_OPEN_FAILED , SLRU_SEEK_FAILED , SLRU_READ_FAILED , SLRU_WRITE_FAILED ,
  SLRU_FSYNC_FAILED , SLRU_CLOSE_FAILED
}
 

Functions

static int SlruFileName (SlruCtl ctl, char *path, int64 segno)
 
static void SimpleLruZeroLSNs (SlruCtl ctl, int slotno)
 
static void SimpleLruWaitIO (SlruCtl ctl, int slotno)
 
static void SlruInternalWritePage (SlruCtl ctl, int slotno, SlruWriteAll fdata)
 
static bool SlruPhysicalReadPage (SlruCtl ctl, int64 pageno, int slotno)
 
static bool SlruPhysicalWritePage (SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
 
static void SlruReportIOError (SlruCtl ctl, int64 pageno, TransactionId xid)
 
static int SlruSelectLRUPage (SlruCtl ctl, int64 pageno)
 
static bool SlruScanDirCbDeleteCutoff (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
static void SlruInternalDeleteSegment (SlruCtl ctl, int64 segno)
 
static void SlruRecentlyUsed (SlruShared shared, int slotno)
 
Size SimpleLruShmemSize (int nslots, int nlsns)
 
int SimpleLruAutotuneBuffers (int divisor, int max)
 
void SimpleLruInit (SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
 
bool check_slru_buffers (const char *name, int *newval)
 
int SimpleLruZeroPage (SlruCtl ctl, int64 pageno)
 
void SimpleLruZeroAndWritePage (SlruCtl ctl, int64 pageno)
 
int SimpleLruReadPage (SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
 
int SimpleLruReadPage_ReadOnly (SlruCtl ctl, int64 pageno, TransactionId xid)
 
void SimpleLruWritePage (SlruCtl ctl, int slotno)
 
bool SimpleLruDoesPhysicalPageExist (SlruCtl ctl, int64 pageno)
 
void SimpleLruWriteAll (SlruCtl ctl, bool allow_redirtied)
 
void SimpleLruTruncate (SlruCtl ctl, int64 cutoffPage)
 
void SlruDeleteSegment (SlruCtl ctl, int64 segno)
 
static bool SlruMayDeleteSegment (SlruCtl ctl, int64 segpage, int64 cutoffPage)
 
bool SlruScanDirCbReportPresence (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
bool SlruScanDirCbDeleteAll (SlruCtl ctl, char *filename, int64 segpage, void *data)
 
static bool SlruCorrectSegmentFilenameLength (SlruCtl ctl, size_t len)
 
bool SlruScanDirectory (SlruCtl ctl, SlruScanCallback callback, void *data)
 
int SlruSyncFileTag (SlruCtl ctl, const FileTag *ftag, char *path)
 

Variables

static SlruErrorCause slru_errcause
 
static int slru_errno
 

Macro Definition Documentation

◆ INIT_SLRUFILETAG

#define INIT_SLRUFILETAG (   a,
  xx_handler,
  xx_segno 
)
Value:
( \
memset(&(a), 0, sizeof(FileTag)), \
(a).handler = (xx_handler), \
(a).segno = (xx_segno) \
)
int a
Definition isn.c:73
static int fb(int x)
Definition sync.h:51

Definition at line 156 of file slru.c.

164{
172
174static int slru_errno;
175
176
177static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
178static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
180static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno);
181static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno,
183static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid);
184static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno);
185
187 int64 segpage, void *data);
188static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno);
189static inline void SlruRecentlyUsed(SlruShared shared, int slotno);
190
191
192/*
193 * Initialization of shared memory
194 */
195
196Size
197SimpleLruShmemSize(int nslots, int nlsns)
198{
199 int nbanks = nslots / SLRU_BANK_SIZE;
200 Size sz;
201
203 Assert(nslots % SLRU_BANK_SIZE == 0);
204
205 /* we assume nslots isn't so large as to risk overflow */
206 sz = MAXALIGN(sizeof(SlruSharedData));
207 sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
208 sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
209 sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
210 sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
211 sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
212 sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
213 sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */
214 sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */
215
216 if (nlsns > 0)
217 sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
218
219 return BUFFERALIGN(sz) + BLCKSZ * nslots;
220}
221
222/*
223 * Determine a number of SLRU buffers to use.
224 *
225 * We simply divide shared_buffers by the divisor given and cap
226 * that at the maximum given; but always at least SLRU_BANK_SIZE.
227 * Round down to the nearest multiple of SLRU_BANK_SIZE.
228 */
229int
231{
232 return Min(max - (max % SLRU_BANK_SIZE),
235}
236
237/*
238 * Initialize, or attach to, a simple LRU cache in shared memory.
239 *
240 * ctl: address of local (unshared) control structure.
241 * name: name of SLRU. (This is user-visible, pick with care!)
242 * nslots: number of page slots to use.
243 * nlsns: number of LSN groups per page (set to zero if not relevant).
244 * subdir: PGDATA-relative subdirectory that will contain the files.
245 * buffer_tranche_id: tranche ID to use for the SLRU's per-buffer LWLocks.
246 * bank_tranche_id: tranche ID to use for the bank LWLocks.
247 * sync_handler: which set of functions to use to handle sync requests
248 * long_segment_names: use short or long segment names
249 */
250void
251SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
252 const char *subdir, int buffer_tranche_id, int bank_tranche_id,
253 SyncRequestHandler sync_handler, bool long_segment_names)
254{
255 SlruShared shared;
256 bool found;
257 int nbanks = nslots / SLRU_BANK_SIZE;
258
260
262 SimpleLruShmemSize(nslots, nlsns),
263 &found);
264
266 {
267 /* Initialize locks and shared memory area */
268 char *ptr;
269 Size offset;
270
271 Assert(!found);
272
273 memset(shared, 0, sizeof(SlruSharedData));
274
275 shared->num_slots = nslots;
276 shared->lsn_groups_per_page = nlsns;
277
279
281
282 ptr = (char *) shared;
283 offset = MAXALIGN(sizeof(SlruSharedData));
284 shared->page_buffer = (char **) (ptr + offset);
285 offset += MAXALIGN(nslots * sizeof(char *));
286 shared->page_status = (SlruPageStatus *) (ptr + offset);
287 offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
288 shared->page_dirty = (bool *) (ptr + offset);
289 offset += MAXALIGN(nslots * sizeof(bool));
290 shared->page_number = (int64 *) (ptr + offset);
291 offset += MAXALIGN(nslots * sizeof(int64));
292 shared->page_lru_count = (int *) (ptr + offset);
293 offset += MAXALIGN(nslots * sizeof(int));
294
295 /* Initialize LWLocks */
296 shared->buffer_locks = (LWLockPadded *) (ptr + offset);
297 offset += MAXALIGN(nslots * sizeof(LWLockPadded));
298 shared->bank_locks = (LWLockPadded *) (ptr + offset);
299 offset += MAXALIGN(nbanks * sizeof(LWLockPadded));
300 shared->bank_cur_lru_count = (int *) (ptr + offset);
301 offset += MAXALIGN(nbanks * sizeof(int));
302
303 if (nlsns > 0)
304 {
305 shared->group_lsn = (XLogRecPtr *) (ptr + offset);
306 offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
307 }
308
309 ptr += BUFFERALIGN(offset);
310 for (int slotno = 0; slotno < nslots; slotno++)
311 {
314
315 shared->page_buffer[slotno] = ptr;
317 shared->page_dirty[slotno] = false;
318 shared->page_lru_count[slotno] = 0;
319 ptr += BLCKSZ;
320 }
321
322 /* Initialize the slot banks. */
323 for (int bankno = 0; bankno < nbanks; bankno++)
324 {
326 shared->bank_cur_lru_count[bankno] = 0;
327 }
328
329 /* Should fit to estimated shmem size */
330 Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
331 }
332 else
333 {
334 Assert(found);
335 Assert(shared->num_slots == nslots);
336 }
337
338 /*
339 * Initialize the unshared control struct, including directory path. We
340 * assume caller set PagePrecedes.
341 */
342 ctl->shared = shared;
343 ctl->sync_handler = sync_handler;
344 ctl->long_segment_names = long_segment_names;
345 ctl->nbanks = nbanks;
346 strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
347}
348
349/*
350 * Helper function for GUC check_hook to check whether slru buffers are in
351 * multiples of SLRU_BANK_SIZE.
352 */
353bool
354check_slru_buffers(const char *name, int *newval)
355{
356 /* Valid values are multiples of SLRU_BANK_SIZE */
357 if (*newval % SLRU_BANK_SIZE == 0)
358 return true;
359
360 GUC_check_errdetail("\"%s\" must be a multiple of %d.", name,
362 return false;
363}
364
365/*
366 * Initialize (or reinitialize) a page to zeroes.
367 *
368 * The page is not actually written, just set up in shared memory.
369 * The slot number of the new page is returned.
370 *
371 * Bank lock must be held at entry, and will be held at exit.
372 */
373int
375{
376 SlruShared shared = ctl->shared;
377 int slotno;
378
380
381 /* Find a suitable buffer slot for the page */
382 slotno = SlruSelectLRUPage(ctl, pageno);
384 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
385 !shared->page_dirty[slotno]) ||
386 shared->page_number[slotno] == pageno);
387
388 /* Mark the slot as containing this page */
389 shared->page_number[slotno] = pageno;
391 shared->page_dirty[slotno] = true;
392 SlruRecentlyUsed(shared, slotno);
393
394 /* Set the buffer to zeroes */
395 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
396
397 /* Set the LSNs for this new page to zero */
399
400 /*
401 * Assume this page is now the latest active page.
402 *
403 * Note that because both this routine and SlruSelectLRUPage run with a
404 * SLRU bank lock held, it is not possible for this to be zeroing a page
405 * that SlruSelectLRUPage is going to evict simultaneously. Therefore,
406 * there's no memory barrier here.
407 */
408 pg_atomic_write_u64(&shared->latest_page_number, pageno);
409
410 /* update the stats counter of zeroed pages */
412
413 return slotno;
414}
415
416/*
417 * Zero all the LSNs we store for this slru page.
418 *
419 * This should be called each time we create a new page, and each time we read
420 * in a page from disk into an existing buffer. (Such an old page cannot
421 * have any interesting LSNs, since we'd have flushed them before writing
422 * the page in the first place.)
423 *
424 * This assumes that InvalidXLogRecPtr is bitwise-all-0.
425 */
426static void
428{
429 SlruShared shared = ctl->shared;
430
431 if (shared->lsn_groups_per_page > 0)
432 MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
433 shared->lsn_groups_per_page * sizeof(XLogRecPtr));
434}
435
436/*
437 * This is a convenience wrapper for the common case of zeroing a page and
438 * immediately flushing it to disk.
439 *
440 * SLRU bank lock is acquired and released here.
441 */
442void
444{
445 int slotno;
446 LWLock *lock;
447
448 lock = SimpleLruGetBankLock(ctl, pageno);
450
451 /* Create and zero the page */
452 slotno = SimpleLruZeroPage(ctl, pageno);
453
454 /* Make sure it's written out */
456 Assert(!ctl->shared->page_dirty[slotno]);
457
458 LWLockRelease(lock);
459}
460
461/*
462 * Wait for any active I/O on a page slot to finish. (This does not
463 * guarantee that new I/O hasn't been started before we return, though.
464 * In fact the slot might not even contain the same page anymore.)
465 *
466 * Bank lock must be held at entry, and will be held at exit.
467 */
468static void
470{
471 SlruShared shared = ctl->shared;
473
475
476 /* See notes at top of file */
481
482 /*
483 * If the slot is still in an io-in-progress state, then either someone
484 * already started a new I/O on the slot, or a previous I/O failed and
485 * neglected to reset the page state. That shouldn't happen, really, but
486 * it seems worth a few extra cycles to check and recover from it. We can
487 * cheaply test for failure by seeing if the buffer lock is still held (we
488 * assume that transaction abort would release the lock).
489 */
492 {
494 {
495 /* indeed, the I/O must have failed */
498 else /* write_in_progress */
499 {
501 shared->page_dirty[slotno] = true;
502 }
504 }
505 }
506}
507
508/*
509 * Find a page in a shared buffer, reading it in if necessary.
510 * The page number must correspond to an already-initialized page.
511 *
512 * If write_ok is true then it is OK to return a page that is in
513 * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
514 * that modification of the page is safe. If write_ok is false then we
515 * will not return the page until it is not undergoing active I/O.
516 *
517 * The passed-in xid is used only for error reporting, and may be
518 * InvalidTransactionId if no specific xid is associated with the action.
519 *
520 * Return value is the shared-buffer slot number now holding the page.
521 * The buffer's LRU access info is updated.
522 *
523 * The correct bank lock must be held at entry, and will be held at exit.
524 */
525int
527 TransactionId xid)
528{
529 SlruShared shared = ctl->shared;
531
533
534 /* Outer loop handles restart if we must wait for someone else's I/O */
535 for (;;)
536 {
537 int slotno;
538 bool ok;
539
540 /* See if page already is in memory; if not, pick victim slot */
541 slotno = SlruSelectLRUPage(ctl, pageno);
542
543 /* Did we find the page in memory? */
544 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
545 shared->page_number[slotno] == pageno)
546 {
547 /*
548 * If page is still being read in, we must wait for I/O. Likewise
549 * if the page is being written and the caller said that's not OK.
550 */
553 !write_ok))
554 {
556 /* Now we must recheck state from the top */
557 continue;
558 }
559 /* Otherwise, it's ready to use */
560 SlruRecentlyUsed(shared, slotno);
561
562 /* update the stats counter of pages found in the SLRU */
564
565 return slotno;
566 }
567
568 /* We found no match; assert we selected a freeable slot */
570 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
571 !shared->page_dirty[slotno]));
572
573 /* Mark the slot read-busy */
574 shared->page_number[slotno] = pageno;
576 shared->page_dirty[slotno] = false;
577
578 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
580
581 /* Release bank lock while doing I/O */
583
584 /* Do the read */
585 ok = SlruPhysicalReadPage(ctl, pageno, slotno);
586
587 /* Set the LSNs for this newly read-in page to zero */
589
590 /* Re-acquire bank control lock and update page state */
592
593 Assert(shared->page_number[slotno] == pageno &&
595 !shared->page_dirty[slotno]);
596
598
600
601 /* Now it's okay to ereport if we failed */
602 if (!ok)
603 SlruReportIOError(ctl, pageno, xid);
604
605 SlruRecentlyUsed(shared, slotno);
606
607 /* update the stats counter of pages not found in SLRU */
609
610 return slotno;
611 }
612}
613
614/*
615 * Find a page in a shared buffer, reading it in if necessary.
616 * The page number must correspond to an already-initialized page.
617 * The caller must intend only read-only access to the page.
618 *
619 * The passed-in xid is used only for error reporting, and may be
620 * InvalidTransactionId if no specific xid is associated with the action.
621 *
622 * Return value is the shared-buffer slot number now holding the page.
623 * The buffer's LRU access info is updated.
624 *
625 * Bank control lock must NOT be held at entry, but will be held at exit.
626 * It is unspecified whether the lock will be shared or exclusive.
627 */
628int
630{
631 SlruShared shared = ctl->shared;
633 int bankno = pageno % ctl->nbanks;
636
637 /* Try to find the page while holding only shared lock */
639
640 /* See if page is already in a buffer */
641 for (int slotno = bankstart; slotno < bankend; slotno++)
642 {
643 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
644 shared->page_number[slotno] == pageno &&
646 {
647 /* See comments for SlruRecentlyUsed() */
648 SlruRecentlyUsed(shared, slotno);
649
650 /* update the stats counter of pages found in the SLRU */
652
653 return slotno;
654 }
655 }
656
657 /* No luck, so switch to normal exclusive lock and do regular read */
660
661 return SimpleLruReadPage(ctl, pageno, true, xid);
662}
663
664/*
665 * Write a page from a shared buffer, if necessary.
666 * Does nothing if the specified slot is not dirty.
667 *
668 * NOTE: only one write attempt is made here. Hence, it is possible that
669 * the page is still dirty at exit (if someone else re-dirtied it during
670 * the write). However, we *do* attempt a fresh write even if the page
671 * is already being written; this is for checkpoints.
672 *
673 * Bank lock must be held at entry, and will be held at exit.
674 */
675static void
677{
678 SlruShared shared = ctl->shared;
679 int64 pageno = shared->page_number[slotno];
681 bool ok;
682
685
686 /* If a write is in progress, wait for it to finish */
688 shared->page_number[slotno] == pageno)
689 {
691 }
692
693 /*
694 * Do nothing if page is not dirty, or if buffer no longer contains the
695 * same page we were called for.
696 */
697 if (!shared->page_dirty[slotno] ||
698 shared->page_status[slotno] != SLRU_PAGE_VALID ||
699 shared->page_number[slotno] != pageno)
700 return;
701
702 /*
703 * Mark the slot write-busy, and clear the dirtybit. After this point, a
704 * transaction status update on this page will mark it dirty again.
705 */
707 shared->page_dirty[slotno] = false;
708
709 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
711
712 /* Release bank lock while doing I/O */
714
715 /* Do the write */
717
718 /* If we failed, and we're in a flush, better close the files */
719 if (!ok && fdata)
720 {
721 for (int i = 0; i < fdata->num_files; i++)
723 }
724
725 /* Re-acquire bank lock and update page state */
727
728 Assert(shared->page_number[slotno] == pageno &&
730
731 /* If we failed to write, mark the page dirty again */
732 if (!ok)
733 shared->page_dirty[slotno] = true;
734
736
738
739 /* Now it's okay to ereport if we failed */
740 if (!ok)
742
743 /* If part of a checkpoint, count this as a SLRU buffer written. */
744 if (fdata)
745 {
748 }
749}
750
751/*
752 * Wrapper of SlruInternalWritePage, for external callers.
753 * fdata is always passed a NULL here.
754 */
755void
757{
758 Assert(ctl->shared->page_status[slotno] != SLRU_PAGE_EMPTY);
759
761}
762
763/*
764 * Return whether the given page exists on disk.
765 *
766 * A false return means that either the file does not exist, or that it's not
767 * large enough to contain the given page.
768 */
769bool
771{
772 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
773 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
774 int offset = rpageno * BLCKSZ;
775 char path[MAXPGPATH];
776 int fd;
777 bool result;
779
780 /* update the stats counter of checked pages */
781 pgstat_count_slru_blocks_exists(ctl->shared->slru_stats_idx);
782
783 SlruFileName(ctl, path, segno);
784
786 if (fd < 0)
787 {
788 /* expected: file doesn't exist */
789 if (errno == ENOENT)
790 return false;
791
792 /* report error normally */
795 SlruReportIOError(ctl, pageno, 0);
796 }
797
798 if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
799 {
802 SlruReportIOError(ctl, pageno, 0);
803 }
804
805 result = endpos >= (off_t) (offset + BLCKSZ);
806
807 if (CloseTransientFile(fd) != 0)
808 {
811 return false;
812 }
813
814 return result;
815}
816
817/*
818 * Physical read of a (previously existing) page into a buffer slot
819 *
820 * On failure, we cannot just ereport(ERROR) since caller has put state in
821 * shared memory that must be undone. So, we return false and save enough
822 * info in static variables to let SlruReportIOError make the report.
823 *
824 * For now, assume it's not worth keeping a file pointer open across
825 * read/write operations. We could cache one virtual file pointer ...
826 */
827static bool
829{
830 SlruShared shared = ctl->shared;
831 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
832 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
833 off_t offset = rpageno * BLCKSZ;
834 char path[MAXPGPATH];
835 int fd;
836
837 SlruFileName(ctl, path, segno);
838
839 /*
840 * In a crash-and-restart situation, it's possible for us to receive
841 * commands to set the commit status of transactions whose bits are in
842 * already-truncated segments of the commit log (see notes in
843 * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
844 * where the file doesn't exist, and return zeroes instead.
845 */
847 if (fd < 0)
848 {
849 if (errno != ENOENT || !InRecovery)
850 {
853 return false;
854 }
855
856 ereport(LOG,
857 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
858 path)));
859 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
860 return true;
861 }
862
863 errno = 0;
865 if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
866 {
871 return false;
872 }
874
875 if (CloseTransientFile(fd) != 0)
876 {
879 return false;
880 }
881
882 return true;
883}
884
885/*
886 * Physical write of a page from a buffer slot
887 *
888 * On failure, we cannot just ereport(ERROR) since caller has put state in
889 * shared memory that must be undone. So, we return false and save enough
890 * info in static variables to let SlruReportIOError make the report.
891 *
892 * For now, assume it's not worth keeping a file pointer open across
893 * independent read/write operations. We do batch operations during
894 * SimpleLruWriteAll, though.
895 *
896 * fdata is NULL for a standalone write, pointer to open-file info during
897 * SimpleLruWriteAll.
898 */
899static bool
901{
902 SlruShared shared = ctl->shared;
903 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
904 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
905 off_t offset = rpageno * BLCKSZ;
906 char path[MAXPGPATH];
907 int fd = -1;
908
909 /* update the stats counter of written pages */
911
912 /*
913 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
914 * write out data before associated WAL records. This is the same action
915 * performed during FlushBuffer() in the main buffer manager.
916 */
917 if (shared->group_lsn != NULL)
918 {
919 /*
920 * We must determine the largest async-commit LSN for the page. This
921 * is a bit tedious, but since this entire function is a slow path
922 * anyway, it seems better to do this here than to maintain a per-page
923 * LSN variable (which'd need an extra comparison in the
924 * transaction-commit path).
925 */
927 int lsnindex;
928
930 max_lsn = shared->group_lsn[lsnindex++];
931 for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
932 {
934
935 if (max_lsn < this_lsn)
937 }
938
940 {
941 /*
942 * As noted above, elog(ERROR) is not acceptable here, so if
943 * XLogFlush were to fail, we must PANIC. This isn't much of a
944 * restriction because XLogFlush is just about all critical
945 * section anyway, but let's make sure.
946 */
950 }
951 }
952
953 /*
954 * During a SimpleLruWriteAll, we may already have the desired file open.
955 */
956 if (fdata)
957 {
958 for (int i = 0; i < fdata->num_files; i++)
959 {
960 if (fdata->segno[i] == segno)
961 {
962 fd = fdata->fd[i];
963 break;
964 }
965 }
966 }
967
968 if (fd < 0)
969 {
970 /*
971 * If the file doesn't already exist, we should create it. It is
972 * possible for this to need to happen when writing a page that's not
973 * first in its segment; we assume the OS can cope with that. (Note:
974 * it might seem that it'd be okay to create files only when
975 * SimpleLruZeroPage is called for the first page of a segment.
976 * However, if after a crash and restart the REDO logic elects to
977 * replay the log from a checkpoint before the latest one, then it's
978 * possible that we will get commands to set transaction status of
979 * transactions that have already been truncated from the commit log.
980 * Easiest way to deal with that is to accept references to
981 * nonexistent files here and in SlruPhysicalReadPage.)
982 *
983 * Note: it is possible for more than one backend to be executing this
984 * code simultaneously for different pages of the same file. Hence,
985 * don't use O_EXCL or O_TRUNC or anything like that.
986 */
987 SlruFileName(ctl, path, segno);
989 if (fd < 0)
990 {
993 return false;
994 }
995
996 if (fdata)
997 {
998 if (fdata->num_files < MAX_WRITEALL_BUFFERS)
999 {
1000 fdata->fd[fdata->num_files] = fd;
1001 fdata->segno[fdata->num_files] = segno;
1002 fdata->num_files++;
1003 }
1004 else
1005 {
1006 /*
1007 * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
1008 * fall back to treating it as a standalone write.
1009 */
1010 fdata = NULL;
1011 }
1012 }
1013 }
1014
1015 errno = 0;
1017 if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
1018 {
1020 /* if write didn't set errno, assume problem is no disk space */
1021 if (errno == 0)
1022 errno = ENOSPC;
1024 slru_errno = errno;
1025 if (!fdata)
1027 return false;
1028 }
1030
1031 /* Queue up a sync request for the checkpointer. */
1032 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1033 {
1034 FileTag tag;
1035
1036 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1037 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
1038 {
1039 /* No space to enqueue sync request. Do it synchronously. */
1041 if (pg_fsync(fd) != 0)
1042 {
1045 slru_errno = errno;
1047 return false;
1048 }
1050 }
1051 }
1052
1053 /* Close file, unless part of flush request. */
1054 if (!fdata)
1055 {
1056 if (CloseTransientFile(fd) != 0)
1057 {
1059 slru_errno = errno;
1060 return false;
1061 }
1062 }
1063
1064 return true;
1065}
1066
1067/*
1068 * Issue the error message after failure of SlruPhysicalReadPage or
1069 * SlruPhysicalWritePage. Call this after cleaning up shared-memory state.
1070 */
1071static void
1073{
1074 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
1075 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
1076 int offset = rpageno * BLCKSZ;
1077 char path[MAXPGPATH];
1078
1079 SlruFileName(ctl, path, segno);
1080 errno = slru_errno;
1081 switch (slru_errcause)
1082 {
1083 case SLRU_OPEN_FAILED:
1084 ereport(ERROR,
1086 errmsg("could not access status of transaction %u", xid),
1087 errdetail("Could not open file \"%s\": %m.", path)));
1088 break;
1089 case SLRU_SEEK_FAILED:
1090 ereport(ERROR,
1092 errmsg("could not access status of transaction %u", xid),
1093 errdetail("Could not seek in file \"%s\" to offset %d: %m.",
1094 path, offset)));
1095 break;
1096 case SLRU_READ_FAILED:
1097 if (errno)
1098 ereport(ERROR,
1100 errmsg("could not access status of transaction %u", xid),
1101 errdetail("Could not read from file \"%s\" at offset %d: %m.",
1102 path, offset)));
1103 else
1104 ereport(ERROR,
1105 (errmsg("could not access status of transaction %u", xid),
1106 errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
1107 break;
1108 case SLRU_WRITE_FAILED:
1109 if (errno)
1110 ereport(ERROR,
1112 errmsg("could not access status of transaction %u", xid),
1113 errdetail("Could not write to file \"%s\" at offset %d: %m.",
1114 path, offset)));
1115 else
1116 ereport(ERROR,
1117 (errmsg("could not access status of transaction %u", xid),
1118 errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1119 path, offset)));
1120 break;
1121 case SLRU_FSYNC_FAILED:
1124 errmsg("could not access status of transaction %u", xid),
1125 errdetail("Could not fsync file \"%s\": %m.",
1126 path)));
1127 break;
1128 case SLRU_CLOSE_FAILED:
1129 ereport(ERROR,
1131 errmsg("could not access status of transaction %u", xid),
1132 errdetail("Could not close file \"%s\": %m.",
1133 path)));
1134 break;
1135 default:
1136 /* can't get here, we trust */
1137 elog(ERROR, "unrecognized SimpleLru error cause: %d",
1138 (int) slru_errcause);
1139 break;
1140 }
1141}
1142
1143/*
1144 * Mark a buffer slot "most recently used".
1145 */
1146static inline void
1148{
1151
1153
1154 /*
1155 * The reason for the if-test is that there are often many consecutive
1156 * accesses to the same page (particularly the latest page). By
1157 * suppressing useless increments of bank_cur_lru_count, we reduce the
1158 * probability that old pages' counts will "wrap around" and make them
1159 * appear recently used.
1160 *
1161 * We allow this code to be executed concurrently by multiple processes
1162 * within SimpleLruReadPage_ReadOnly(). As long as int reads and writes
1163 * are atomic, this should not cause any completely-bogus values to enter
1164 * the computation. However, it is possible for either bank_cur_lru_count
1165 * or individual page_lru_count entries to be "reset" to lower values than
1166 * they should have, in case a process is delayed while it executes this
1167 * function. With care in SlruSelectLRUPage(), this does little harm, and
1168 * in any case the absolute worst possible consequence is a nonoptimal
1169 * choice of page to evict. The gain from allowing concurrent reads of
1170 * SLRU pages seems worth it.
1171 */
1172 if (new_lru_count != shared->page_lru_count[slotno])
1173 {
1176 }
1177}
1178
1179/*
1180 * Select the slot to re-use when we need a free slot for the given page.
1181 *
1182 * The target page number is passed not only because we need to know the
1183 * correct bank to use, but also because we need to consider the possibility
1184 * that some other process reads in the target page while we are doing I/O to
1185 * free a slot. Hence, check or recheck to see if any slot already holds the
1186 * target page, and return that slot if so. Thus, the returned slot is
1187 * *either* a slot already holding the pageno (could be any state except
1188 * EMPTY), *or* a freeable slot (state EMPTY or CLEAN).
1189 *
1190 * The correct bank lock must be held at entry, and will be held at exit.
1191 */
1192static int
1194{
1195 SlruShared shared = ctl->shared;
1196
1197 /* Outer loop handles restart after I/O */
1198 for (;;)
1199 {
1200 int cur_count;
1201 int bestvalidslot = 0; /* keep compiler quiet */
1202 int best_valid_delta = -1;
1203 int64 best_valid_page_number = 0; /* keep compiler quiet */
1204 int bestinvalidslot = 0; /* keep compiler quiet */
1205 int best_invalid_delta = -1;
1206 int64 best_invalid_page_number = 0; /* keep compiler quiet */
1207 int bankno = pageno % ctl->nbanks;
1210
1212
1213 /* See if page already has a buffer assigned */
1214 for (int slotno = bankstart; slotno < bankend; slotno++)
1215 {
1216 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
1217 shared->page_number[slotno] == pageno)
1218 return slotno;
1219 }
1220
1221 /*
1222 * If we find any EMPTY slot, just select that one. Else choose a
1223 * victim page to replace. We normally take the least recently used
1224 * valid page, but we will never take the slot containing
1225 * latest_page_number, even if it appears least recently used. We
1226 * will select a slot that is already I/O busy only if there is no
1227 * other choice: a read-busy slot will not be least recently used once
1228 * the read finishes, and waiting for an I/O on a write-busy slot is
1229 * inferior to just picking some other slot. Testing shows the slot
1230 * we pick instead will often be clean, allowing us to begin a read at
1231 * once.
1232 *
1233 * Normally the page_lru_count values will all be different and so
1234 * there will be a well-defined LRU page. But since we allow
1235 * concurrent execution of SlruRecentlyUsed() within
1236 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1237 * acquire the same lru_count values. In that case we break ties by
1238 * choosing the furthest-back page.
1239 *
1240 * Notice that this next line forcibly advances cur_lru_count to a
1241 * value that is certainly beyond any value that will be in the
1242 * page_lru_count array after the loop finishes. This ensures that
1243 * the next execution of SlruRecentlyUsed will mark the page newly
1244 * used, even if it's for a page that has the current counter value.
1245 * That gets us back on the path to having good data when there are
1246 * multiple pages with the same lru_count.
1247 */
1248 cur_count = (shared->bank_cur_lru_count[bankno])++;
1249 for (int slotno = bankstart; slotno < bankend; slotno++)
1250 {
1251 int this_delta;
1253
1254 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1255 return slotno;
1256
1258 if (this_delta < 0)
1259 {
1260 /*
1261 * Clean up in case shared updates have caused cur_count
1262 * increments to get "lost". We back off the page counts,
1263 * rather than trying to increase cur_count, to avoid any
1264 * question of infinite loops or failure in the presence of
1265 * wrapped-around counts.
1266 */
1267 shared->page_lru_count[slotno] = cur_count;
1268 this_delta = 0;
1269 }
1270
1271 /*
1272 * If this page is the one most recently zeroed, don't consider it
1273 * an eviction candidate. See comments in SimpleLruZeroPage for an
1274 * explanation about the lack of a memory barrier here.
1275 */
1277 if (this_page_number ==
1279 continue;
1280
1281 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1282 {
1285 ctl->PagePrecedes(this_page_number,
1287 {
1291 }
1292 }
1293 else
1294 {
1297 ctl->PagePrecedes(this_page_number,
1299 {
1303 }
1304 }
1305 }
1306
1307 /*
1308 * If all pages (except possibly the latest one) are I/O busy, we'll
1309 * have to wait for an I/O to complete and then retry. In that
1310 * unhappy case, we choose to wait for the I/O on the least recently
1311 * used slot, on the assumption that it was likely initiated first of
1312 * all the I/Os in progress and may therefore finish first.
1313 */
1314 if (best_valid_delta < 0)
1315 {
1317 continue;
1318 }
1319
1320 /*
1321 * If the selected page is clean, we're set.
1322 */
1323 if (!shared->page_dirty[bestvalidslot])
1324 return bestvalidslot;
1325
1326 /*
1327 * Write the page.
1328 */
1330
1331 /*
1332 * Now loop back and try again. This is the easiest way of dealing
1333 * with corner cases such as the victim page being re-dirtied while we
1334 * wrote it.
1335 */
1336 }
1337}
1338
1339/*
1340 * Write dirty pages to disk during checkpoint or database shutdown. Flushing
1341 * is deferred until the next call to ProcessSyncRequests(), though we do fsync
1342 * the containing directory here to make sure that newly created directory
1343 * entries are on disk.
1344 */
1345void
1347{
1348 SlruShared shared = ctl->shared;
1350 int64 pageno = 0;
1351 int prevbank = SlotGetBankNumber(0);
1352 bool ok;
1353
1354 /* update the stats counter of flushes */
1356
1357 /*
1358 * Find and write dirty pages
1359 */
1360 fdata.num_files = 0;
1361
1363
1364 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1365 {
1367
1368 /*
1369 * If the current bank lock is not same as the previous bank lock then
1370 * release the previous lock and acquire the new lock.
1371 */
1372 if (curbank != prevbank)
1373 {
1376 prevbank = curbank;
1377 }
1378
1379 /* Do nothing if slot is unused */
1380 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1381 continue;
1382
1384
1385 /*
1386 * In some places (e.g. checkpoints), we cannot assert that the slot
1387 * is clean now, since another process might have re-dirtied it
1388 * already. That's okay.
1389 */
1391 shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1392 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1393 !shared->page_dirty[slotno]));
1394 }
1395
1397
1398 /*
1399 * Now close any files that were open
1400 */
1401 ok = true;
1402 for (int i = 0; i < fdata.num_files; i++)
1403 {
1404 if (CloseTransientFile(fdata.fd[i]) != 0)
1405 {
1407 slru_errno = errno;
1408 pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1409 ok = false;
1410 }
1411 }
1412 if (!ok)
1414
1415 /* Ensure that directory entries for new files are on disk. */
1416 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1417 fsync_fname(ctl->Dir, true);
1418}
1419
1420/*
1421 * Remove all segments before the one holding the passed page number
1422 *
1423 * All SLRUs prevent concurrent calls to this function, either with an LWLock
1424 * or by calling it only as part of a checkpoint. Mutual exclusion must begin
1425 * before computing cutoffPage. Mutual exclusion must end after any limit
1426 * update that would permit other backends to write fresh data into the
1427 * segment immediately preceding the one containing cutoffPage. Otherwise,
1428 * when the SLRU is quite full, SimpleLruTruncate() might delete that segment
1429 * after it has accrued freshly-written data.
1430 */
1431void
1433{
1434 SlruShared shared = ctl->shared;
1435 int prevbank;
1436
1437 /* update the stats counter of truncates */
1439
1440 /*
1441 * Scan shared memory and remove any pages preceding the cutoff page, to
1442 * ensure we won't rewrite them later. (Since this is normally called in
1443 * or just after a checkpoint, any dirty pages should have been flushed
1444 * already ... we're just being extra careful here.)
1445 */
1446restart:
1447
1448 /*
1449 * An important safety check: the current endpoint page must not be
1450 * eligible for removal. This check is just a backstop against wraparound
1451 * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1452 * outdated value; therefore we don't add a memory barrier.
1453 */
1454 if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
1455 cutoffPage))
1456 {
1457 ereport(LOG,
1458 (errmsg("could not truncate directory \"%s\": apparent wraparound",
1459 ctl->Dir)));
1460 return;
1461 }
1462
1465 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1466 {
1468
1469 /*
1470 * If the current bank lock is not same as the previous bank lock then
1471 * release the previous lock and acquire the new lock.
1472 */
1473 if (curbank != prevbank)
1474 {
1477 prevbank = curbank;
1478 }
1479
1480 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1481 continue;
1482 if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1483 continue;
1484
1485 /*
1486 * If page is clean, just change state to EMPTY (expected case).
1487 */
1488 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1489 !shared->page_dirty[slotno])
1490 {
1492 continue;
1493 }
1494
1495 /*
1496 * Hmm, we have (or may have) I/O operations acting on the page, so
1497 * we've got to wait for them to finish and then start again. This is
1498 * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1499 * wouldn't it be OK to just discard it without writing it?
1500 * SlruMayDeleteSegment() uses a stricter qualification, so we might
1501 * not delete this page in the end; even if we don't delete it, we
1502 * won't have cause to read its data again. For now, keep the logic
1503 * the same as it was.)
1504 */
1505 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1507 else
1509
1511 goto restart;
1512 }
1513
1515
1516 /* Now we can remove the old segment(s) */
1518}
1519
1520/*
1521 * Delete an individual SLRU segment.
1522 *
1523 * NB: This does not touch the SLRU buffers themselves, callers have to ensure
1524 * they either can't yet contain anything, or have already been cleaned out.
1525 */
1526static void
1528{
1529 char path[MAXPGPATH];
1530
1531 /* Forget any fsync requests queued for this segment. */
1532 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1533 {
1534 FileTag tag;
1535
1536 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1538 }
1539
1540 /* Unlink the file. */
1541 SlruFileName(ctl, path, segno);
1542 ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1543 unlink(path);
1544}
1545
1546/*
1547 * Delete an individual SLRU segment, identified by the segment number.
1548 */
1549void
1551{
1552 SlruShared shared = ctl->shared;
1553 int prevbank = SlotGetBankNumber(0);
1554 bool did_write;
1555
1556 /* Clean out any possibly existing references to the segment. */
1558restart:
1559 did_write = false;
1560 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1561 {
1564
1565 /*
1566 * If the current bank lock is not same as the previous bank lock then
1567 * release the previous lock and acquire the new lock.
1568 */
1569 if (curbank != prevbank)
1570 {
1573 prevbank = curbank;
1574 }
1575
1576 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1577 continue;
1578
1580 /* not the segment we're looking for */
1581 if (pagesegno != segno)
1582 continue;
1583
1584 /* If page is clean, just change state to EMPTY (expected case). */
1585 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1586 !shared->page_dirty[slotno])
1587 {
1589 continue;
1590 }
1591
1592 /* Same logic as SimpleLruTruncate() */
1593 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1595 else
1597
1598 did_write = true;
1599 }
1600
1601 /*
1602 * Be extra careful and re-check. The IO functions release the control
1603 * lock, so new pages could have been read in.
1604 */
1605 if (did_write)
1606 goto restart;
1607
1609
1611}
1612
1613/*
1614 * Determine whether a segment is okay to delete.
1615 *
1616 * segpage is the first page of the segment, and cutoffPage is the oldest (in
1617 * PagePrecedes order) page in the SLRU containing still-useful data. Since
1618 * every core PagePrecedes callback implements "wrap around", check the
1619 * segment's first and last pages:
1620 *
1621 * first<cutoff && last<cutoff: yes
1622 * first<cutoff && last>=cutoff: no; cutoff falls inside this segment
1623 * first>=cutoff && last<cutoff: no; wrap point falls inside this segment
1624 * first>=cutoff && last>=cutoff: no; every page of this segment is too young
1625 */
1626static bool
1628{
1630
1632
1633 return (ctl->PagePrecedes(segpage, cutoffPage) &&
1634 ctl->PagePrecedes(seg_last_page, cutoffPage));
1635}
1636
1637#ifdef USE_ASSERT_CHECKING
1638static void
1640{
1642 rhs;
1644 oldestPage;
1646 oldestXact;
1647
1648 /*
1649 * Compare an XID pair having undefined order (see RFC 1982), a pair at
1650 * "opposite ends" of the XID space. TransactionIdPrecedes() treats each
1651 * as preceding the other. If RHS is oldestXact, LHS is the first XID we
1652 * must not assign.
1653 */
1654 lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */
1655 rhs = lhs + (1U << 31);
1664 Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page));
1665 Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page));
1666 Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page));
1667 Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
1668 Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
1669 Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
1670 Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
1671 || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */
1672 Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
1673 || (1U << 31) % per_page != 0);
1674 Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
1675 Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
1676 Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
1677
1678 /*
1679 * GetNewTransactionId() has assigned the last XID it can safely use, and
1680 * that XID is in the *LAST* page of the second segment. We must not
1681 * delete that segment.
1682 */
1684 newestXact = newestPage * per_page + offset;
1686 oldestXact = newestXact + 1;
1687 oldestXact -= 1U << 31;
1688 oldestPage = oldestXact / per_page;
1690 (newestPage -
1692 oldestPage));
1693
1694 /*
1695 * GetNewTransactionId() has assigned the last XID it can safely use, and
1696 * that XID is in the *FIRST* page of the second segment. We must not
1697 * delete that segment.
1698 */
1700 newestXact = newestPage * per_page + offset;
1702 oldestXact = newestXact + 1;
1703 oldestXact -= 1U << 31;
1704 oldestPage = oldestXact / per_page;
1706 (newestPage -
1708 oldestPage));
1709}
1710
1711/*
1712 * Unit-test a PagePrecedes function.
1713 *
1714 * This assumes every uint32 >= FirstNormalTransactionId is a valid key. It
1715 * assumes each value occupies a contiguous, fixed-size region of SLRU bytes.
1716 * (MultiXactMemberCtl separates flags from XIDs. NotifyCtl has
1717 * variable-length entries, no keys, and no random access. These unit tests
1718 * do not apply to them.)
1719 */
1720void
1722{
1723 /* Test first, middle and last entries of a page. */
1727}
1728#endif
1729
1730/*
1731 * SlruScanDirectory callback
1732 * This callback reports true if there's any segment wholly prior to the
1733 * one containing the page passed as "data".
1734 */
1735bool
1737 void *data)
1738{
1739 int64 cutoffPage = *(int64 *) data;
1740
1742 return true; /* found one; don't iterate any more */
1743
1744 return false; /* keep going */
1745}
1746
1747/*
1748 * SlruScanDirectory callback.
1749 * This callback deletes segments prior to the one passed in as "data".
1750 */
1751static bool
1753 void *data)
1754{
1755 int64 cutoffPage = *(int64 *) data;
1756
1759
1760 return false; /* keep going */
1761}
1762
1763/*
1764 * SlruScanDirectory callback.
1765 * This callback deletes all segments.
1766 */
1767bool
1769{
1771
1772 return false; /* keep going */
1773}
1774
1775/*
1776 * An internal function used by SlruScanDirectory().
1777 *
1778 * Returns true if a file with a name of a given length may be a correct
1779 * SLRU segment.
1780 */
1781static inline bool
1783{
1784 if (ctl->long_segment_names)
1785 return (len == 15); /* see SlruFileName() */
1786 else
1787
1788 /*
1789 * Commit 638cf09e76d allowed 5-character lengths. Later commit
1790 * 73c986adde5 allowed 6-character length.
1791 *
1792 * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1793 * numbers, and the corresponding 15-character file names, which may
1794 * eventually deprecate the support for 4, 5, and 6-character names.
1795 */
1796 return (len == 4 || len == 5 || len == 6);
1797}
1798
1799/*
1800 * Scan the SimpleLru directory and apply a callback to each file found in it.
1801 *
1802 * If the callback returns true, the scan is stopped. The last return value
1803 * from the callback is returned.
1804 *
1805 * The callback receives the following arguments: 1. the SlruCtl struct for the
1806 * slru being truncated; 2. the filename being considered; 3. the page number
1807 * for the first page of that file; 4. a pointer to the opaque data given to us
1808 * by the caller.
1809 *
1810 * Note that the ordering in which the directory is scanned is not guaranteed.
1811 *
1812 * Note that no locking is applied.
1813 */
1814bool
1816{
1817 bool retval = false;
1818 DIR *cldir;
1819 struct dirent *clde;
1820 int64 segno;
1821 int64 segpage;
1822
1823 cldir = AllocateDir(ctl->Dir);
1824 while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1825 {
1826 size_t len;
1827
1828 len = strlen(clde->d_name);
1829
1831 strspn(clde->d_name, "0123456789ABCDEF") == len)
1832 {
1833 segno = strtoi64(clde->d_name, NULL, 16);
1835
1836 elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1837 ctl->Dir, clde->d_name);
1838 retval = callback(ctl, clde->d_name, segpage, data);
1839 if (retval)
1840 break;
1841 }
1842 }
1843 FreeDir(cldir);
1844
1845 return retval;
1846}
1847
1848/*
1849 * Individual SLRUs (clog, ...) have to provide a sync.c handler function so
1850 * that they can provide the correct "SlruCtl" (otherwise we don't know how to
1851 * build the path), but they just forward to this common implementation that
1852 * performs the fsync.
1853 */
1854int
1855SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
1856{
1857 int fd;
1858 int save_errno;
1859 int result;
1860
1861 SlruFileName(ctl, path, ftag->segno);
1862
1864 if (fd < 0)
1865 return -1;
1866
1868 result = pg_fsync(fd);
1870 save_errno = errno;
1871
1873
1874 errno = save_errno;
1875 return result;
1876}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:453
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition atomics.h:467
#define Min(x, y)
Definition c.h:997
#define MAXALIGN(LEN)
Definition c.h:826
#define Max(x, y)
Definition c.h:991
#define BUFFERALIGN(LEN)
Definition c.h:828
#define Assert(condition)
Definition c.h:873
int64_t int64
Definition c.h:543
#define PG_BINARY
Definition c.h:1281
uint32_t uint32
Definition c.h:546
#define MemSet(start, val, len)
Definition c.h:1013
uint32 TransactionId
Definition c.h:666
size_t Size
Definition c.h:619
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
int errcode_for_file_access(void)
Definition elog.c:886
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define LOG
Definition elog.h:31
#define DEBUG2
Definition elog.h:29
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
int FreeDir(DIR *dir)
Definition fd.c:3005
int CloseTransientFile(int fd)
Definition fd.c:2851
void fsync_fname(const char *fname, bool isdir)
Definition fd.c:753
int data_sync_elevel(int elevel)
Definition fd.c:3982
DIR * AllocateDir(const char *dirname)
Definition fd.c:2887
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2953
int pg_fsync(int fd)
Definition fd.c:386
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2674
int NBuffers
Definition globals.c:142
bool IsUnderPostmaster
Definition globals.c:120
#define newval
#define GUC_check_errdetail
Definition guc.h:505
int i
Definition isn.c:77
bool LWLockHeldByMe(LWLock *lock)
Definition lwlock.c:1911
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1955
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:698
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1347
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
#define MAXPGPATH
#define SLRU_PAGES_PER_SEGMENT
const void size_t len
const void * data
static char * filename
Definition pg_dumpall.c:120
static XLogRecPtr endpos
void pgstat_count_slru_blocks_zeroed(int slru_idx)
void pgstat_count_slru_blocks_hit(int slru_idx)
void pgstat_count_slru_truncate(int slru_idx)
void pgstat_count_slru_blocks_read(int slru_idx)
void pgstat_count_slru_blocks_written(int slru_idx)
void pgstat_count_slru_flush(int slru_idx)
void pgstat_count_slru_blocks_exists(int slru_idx)
PgStat_CheckpointerStats PendingCheckpointerStats
int pgstat_get_slru_index(const char *name)
#define pg_pwrite
Definition port.h:248
#define pg_pread
Definition port.h:247
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
static int fd(const char *x, int i)
tree ctl
Definition radixtree.h:1838
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:389
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
Definition slru.c:252
static int SlruFileName(SlruCtl ctl, char *path, int64 segno)
Definition slru.c:91
static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno)
Definition slru.c:829
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition slru.c:630
#define INIT_SLRUFILETAG(a, xx_handler, xx_segno)
Definition slru.c:156
void SimpleLruWritePage(SlruCtl ctl, int slotno)
Definition slru.c:757
void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
Definition slru.c:1347
static bool SlruMayDeleteSegment(SlruCtl ctl, int64 segpage, int64 cutoffPage)
Definition slru.c:1628
static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition slru.c:1073
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
Definition slru.c:428
#define SLRU_BANK_SIZE
Definition slru.c:143
int SimpleLruAutotuneBuffers(int divisor, int max)
Definition slru.c:231
static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruWriteAll fdata)
Definition slru.c:901
static bool SlruCorrectSegmentFilenameLength(SlruCtl ctl, size_t len)
Definition slru.c:1783
static SlruErrorCause slru_errcause
Definition slru.c:174
#define MAX_WRITEALL_BUFFERS
Definition slru.c:123
static void SimpleLruWaitIO(SlruCtl ctl, int slotno)
Definition slru.c:470
static int slru_errno
Definition slru.c:175
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
Definition slru.c:771
void SlruDeleteSegment(SlruCtl ctl, int64 segno)
Definition slru.c:1551
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
Definition slru.c:677
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition slru.c:1816
bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition slru.c:1769
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition slru.c:527
int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
Definition slru.c:1856
static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
Definition slru.c:1194
#define SlotGetBankNumber(slotno)
Definition slru.c:148
int SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
Definition slru.c:375
void SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno)
Definition slru.c:444
void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
Definition slru.c:1433
static void SlruInternalDeleteSegment(SlruCtl ctl, int64 segno)
Definition slru.c:1528
SlruErrorCause
Definition slru.c:165
@ SLRU_WRITE_FAILED
Definition slru.c:169
@ SLRU_FSYNC_FAILED
Definition slru.c:170
@ SLRU_SEEK_FAILED
Definition slru.c:167
@ SLRU_OPEN_FAILED
Definition slru.c:166
@ SLRU_CLOSE_FAILED
Definition slru.c:171
@ SLRU_READ_FAILED
Definition slru.c:168
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition slru.c:198
bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition slru.c:1737
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition slru.c:1753
static void SlruRecentlyUsed(SlruShared shared, int slotno)
Definition slru.c:1148
bool check_slru_buffers(const char *name, int *newval)
Definition slru.c:355
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition slru.h:160
SlruSharedData * SlruShared
Definition slru.h:106
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition slru.h:185
bool(* SlruScanCallback)(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition slru.h:190
#define SLRU_MAX_ALLOWED_BUFFERS
Definition slru.h:24
SlruPageStatus
Definition slru.h:33
@ SLRU_PAGE_VALID
Definition slru.h:36
@ SLRU_PAGE_WRITE_IN_PROGRESS
Definition slru.h:37
@ SLRU_PAGE_READ_IN_PROGRESS
Definition slru.h:35
@ SLRU_PAGE_EMPTY
Definition slru.h:34
int ckpt_slru_written
Definition xlog.h:179
Definition dirent.c:26
uint64 segno
Definition sync.h:55
PgStat_Counter slru_written
Definition pgstat.h:267
int slru_stats_idx
Definition slru.h:103
int64 * page_number
Definition slru.h:58
int num_slots
Definition slru.h:49
LWLockPadded * bank_locks
Definition slru.h:65
int * page_lru_count
Definition slru.h:59
pg_atomic_uint64 latest_page_number
Definition slru.h:100
XLogRecPtr * group_lsn
Definition slru.h:92
int * bank_cur_lru_count
Definition slru.h:82
int lsn_groups_per_page
Definition slru.h:93
SlruPageStatus * page_status
Definition slru.h:56
bool * page_dirty
Definition slru.h:57
LWLockPadded * buffer_locks
Definition slru.h:62
char ** page_buffer
Definition slru.h:55
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition sync.c:580
SyncRequestHandler
Definition sync.h:36
@ SYNC_HANDLER_NONE
Definition sync.h:42
@ SYNC_FORGET_REQUEST
Definition sync.h:27
@ SYNC_REQUEST
Definition sync.h:25
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
#define InvalidTransactionId
Definition transam.h:31
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
LWLock lock
Definition lwlock.h:70
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85
const char * name
CheckpointStatsData CheckpointStats
Definition xlog.c:212
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2783
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
uint64 XLogRecPtr
Definition xlogdefs.h:21
bool InRecovery
Definition xlogutils.c:50

◆ MAX_WRITEALL_BUFFERS

#define MAX_WRITEALL_BUFFERS   16

Definition at line 123 of file slru.c.

◆ SlotGetBankNumber

#define SlotGetBankNumber (   slotno)    ((slotno) >> SLRU_BANK_BITSHIFT)

Definition at line 148 of file slru.c.

◆ SLRU_BANK_BITSHIFT

#define SLRU_BANK_BITSHIFT   4

Definition at line 142 of file slru.c.

◆ SLRU_BANK_SIZE

#define SLRU_BANK_SIZE   (1 << SLRU_BANK_BITSHIFT)

Definition at line 143 of file slru.c.

Typedef Documentation

◆ SlruWriteAll

Definition at line 132 of file slru.c.

◆ SlruWriteAllData

Enumeration Type Documentation

◆ SlruErrorCause

Enumerator
SLRU_OPEN_FAILED 
SLRU_SEEK_FAILED 
SLRU_READ_FAILED 
SLRU_WRITE_FAILED 
SLRU_FSYNC_FAILED 
SLRU_CLOSE_FAILED 

Definition at line 164 of file slru.c.

Function Documentation

◆ check_slru_buffers()

bool check_slru_buffers ( const char name,
int newval 
)

Definition at line 355 of file slru.c.

356{
357 /* Valid values are multiples of SLRU_BANK_SIZE */
358 if (*newval % SLRU_BANK_SIZE == 0)
359 return true;
360
361 GUC_check_errdetail("\"%s\" must be a multiple of %d.", name,
363 return false;
364}

References GUC_check_errdetail, name, newval, and SLRU_BANK_SIZE.

Referenced by check_commit_ts_buffers(), check_multixact_member_buffers(), check_multixact_offset_buffers(), check_notify_buffers(), check_serial_buffers(), check_subtrans_buffers(), and check_transaction_buffers().

◆ SimpleLruAutotuneBuffers()

int SimpleLruAutotuneBuffers ( int  divisor,
int  max 
)

Definition at line 231 of file slru.c.

232{
233 return Min(max - (max % SLRU_BANK_SIZE),
236}

References fb(), Max, Min, NBuffers, and SLRU_BANK_SIZE.

Referenced by CLOGShmemBuffers(), CommitTsShmemBuffers(), and SUBTRANSShmemBuffers().

◆ SimpleLruDoesPhysicalPageExist()

bool SimpleLruDoesPhysicalPageExist ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 771 of file slru.c.

772{
773 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
774 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
775 int offset = rpageno * BLCKSZ;
776 char path[MAXPGPATH];
777 int fd;
778 bool result;
780
781 /* update the stats counter of checked pages */
782 pgstat_count_slru_blocks_exists(ctl->shared->slru_stats_idx);
783
784 SlruFileName(ctl, path, segno);
785
787 if (fd < 0)
788 {
789 /* expected: file doesn't exist */
790 if (errno == ENOENT)
791 return false;
792
793 /* report error normally */
796 SlruReportIOError(ctl, pageno, 0);
797 }
798
799 if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
800 {
803 SlruReportIOError(ctl, pageno, 0);
804 }
805
806 result = endpos >= (off_t) (offset + BLCKSZ);
807
808 if (CloseTransientFile(fd) != 0)
809 {
812 return false;
813 }
814
815 return result;
816}

References CloseTransientFile(), ctl, endpos, fb(), fd(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pgstat_count_slru_blocks_exists(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_SEEK_FAILED, SlruFileName(), and SlruReportIOError().

Referenced by ActivateCommitTs(), find_multixact_start(), and test_slru_page_exists().

◆ SimpleLruInit()

void SimpleLruInit ( SlruCtl  ctl,
const char name,
int  nslots,
int  nlsns,
const char subdir,
int  buffer_tranche_id,
int  bank_tranche_id,
SyncRequestHandler  sync_handler,
bool  long_segment_names 
)

Definition at line 252 of file slru.c.

255{
256 SlruShared shared;
257 bool found;
258 int nbanks = nslots / SLRU_BANK_SIZE;
259
261
263 SimpleLruShmemSize(nslots, nlsns),
264 &found);
265
267 {
268 /* Initialize locks and shared memory area */
269 char *ptr;
270 Size offset;
271
272 Assert(!found);
273
274 memset(shared, 0, sizeof(SlruSharedData));
275
276 shared->num_slots = nslots;
277 shared->lsn_groups_per_page = nlsns;
278
280
282
283 ptr = (char *) shared;
284 offset = MAXALIGN(sizeof(SlruSharedData));
285 shared->page_buffer = (char **) (ptr + offset);
286 offset += MAXALIGN(nslots * sizeof(char *));
287 shared->page_status = (SlruPageStatus *) (ptr + offset);
288 offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
289 shared->page_dirty = (bool *) (ptr + offset);
290 offset += MAXALIGN(nslots * sizeof(bool));
291 shared->page_number = (int64 *) (ptr + offset);
292 offset += MAXALIGN(nslots * sizeof(int64));
293 shared->page_lru_count = (int *) (ptr + offset);
294 offset += MAXALIGN(nslots * sizeof(int));
295
296 /* Initialize LWLocks */
297 shared->buffer_locks = (LWLockPadded *) (ptr + offset);
298 offset += MAXALIGN(nslots * sizeof(LWLockPadded));
299 shared->bank_locks = (LWLockPadded *) (ptr + offset);
300 offset += MAXALIGN(nbanks * sizeof(LWLockPadded));
301 shared->bank_cur_lru_count = (int *) (ptr + offset);
302 offset += MAXALIGN(nbanks * sizeof(int));
303
304 if (nlsns > 0)
305 {
306 shared->group_lsn = (XLogRecPtr *) (ptr + offset);
307 offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
308 }
309
310 ptr += BUFFERALIGN(offset);
311 for (int slotno = 0; slotno < nslots; slotno++)
312 {
315
316 shared->page_buffer[slotno] = ptr;
318 shared->page_dirty[slotno] = false;
319 shared->page_lru_count[slotno] = 0;
320 ptr += BLCKSZ;
321 }
322
323 /* Initialize the slot banks. */
324 for (int bankno = 0; bankno < nbanks; bankno++)
325 {
327 shared->bank_cur_lru_count[bankno] = 0;
328 }
329
330 /* Should fit to estimated shmem size */
331 Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
332 }
333 else
334 {
335 Assert(found);
336 Assert(shared->num_slots == nslots);
337 }
338
339 /*
340 * Initialize the unshared control struct, including directory path. We
341 * assume caller set PagePrecedes.
342 */
343 ctl->shared = shared;
344 ctl->sync_handler = sync_handler;
345 ctl->long_segment_names = long_segment_names;
346 ctl->nbanks = nbanks;
347 strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
348}

References Assert, SlruSharedData::bank_cur_lru_count, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, BUFFERALIGN, ctl, fb(), SlruSharedData::group_lsn, IsUnderPostmaster, SlruSharedData::latest_page_number, LWLockPadded::lock, SlruSharedData::lsn_groups_per_page, LWLockInitialize(), MAXALIGN, name, SlruSharedData::num_slots, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_init_u64(), pgstat_get_slru_index(), ShmemInitStruct(), SimpleLruShmemSize(), SLRU_BANK_SIZE, SLRU_MAX_ALLOWED_BUFFERS, SLRU_PAGE_EMPTY, SlruSharedData::slru_stats_idx, and strlcpy().

Referenced by AsyncShmemInit(), CLOGShmemInit(), CommitTsShmemInit(), MultiXactShmemInit(), SerialInit(), SUBTRANSShmemInit(), and test_slru_shmem_startup().

◆ SimpleLruReadPage()

int SimpleLruReadPage ( SlruCtl  ctl,
int64  pageno,
bool  write_ok,
TransactionId  xid 
)

Definition at line 527 of file slru.c.

529{
530 SlruShared shared = ctl->shared;
532
534
535 /* Outer loop handles restart if we must wait for someone else's I/O */
536 for (;;)
537 {
538 int slotno;
539 bool ok;
540
541 /* See if page already is in memory; if not, pick victim slot */
542 slotno = SlruSelectLRUPage(ctl, pageno);
543
544 /* Did we find the page in memory? */
545 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
546 shared->page_number[slotno] == pageno)
547 {
548 /*
549 * If page is still being read in, we must wait for I/O. Likewise
550 * if the page is being written and the caller said that's not OK.
551 */
554 !write_ok))
555 {
557 /* Now we must recheck state from the top */
558 continue;
559 }
560 /* Otherwise, it's ready to use */
561 SlruRecentlyUsed(shared, slotno);
562
563 /* update the stats counter of pages found in the SLRU */
565
566 return slotno;
567 }
568
569 /* We found no match; assert we selected a freeable slot */
571 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
572 !shared->page_dirty[slotno]));
573
574 /* Mark the slot read-busy */
575 shared->page_number[slotno] = pageno;
577 shared->page_dirty[slotno] = false;
578
579 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
581
582 /* Release bank lock while doing I/O */
584
585 /* Do the read */
586 ok = SlruPhysicalReadPage(ctl, pageno, slotno);
587
588 /* Set the LSNs for this newly read-in page to zero */
590
591 /* Re-acquire bank control lock and update page state */
593
594 Assert(shared->page_number[slotno] == pageno &&
596 !shared->page_dirty[slotno]);
597
599
601
602 /* Now it's okay to ereport if we failed */
603 if (!ok)
604 SlruReportIOError(ctl, pageno, xid);
605
606 SlruRecentlyUsed(shared, slotno);
607
608 /* update the stats counter of pages not found in SLRU */
610
611 return slotno;
612 }
613}

References Assert, SlruSharedData::buffer_locks, ctl, fb(), LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_blocks_hit(), pgstat_count_slru_blocks_read(), SimpleLruGetBankLock(), SimpleLruWaitIO(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, SlruSharedData::slru_stats_idx, SlruPhysicalReadPage(), SlruRecentlyUsed(), SlruReportIOError(), and SlruSelectLRUPage().

Referenced by AsyncNotifyFreezeXids(), asyncQueueAddEntries(), GetMultiXactIdMembers(), RecordNewMultiXact(), SerialAdd(), SetXidCommitTsInPage(), SimpleLruReadPage_ReadOnly(), SubTransSetParent(), test_slru_page_read(), TransactionIdSetPageStatusInternal(), TrimCLOG(), and TrimMultiXact().

◆ SimpleLruReadPage_ReadOnly()

int SimpleLruReadPage_ReadOnly ( SlruCtl  ctl,
int64  pageno,
TransactionId  xid 
)

Definition at line 630 of file slru.c.

631{
632 SlruShared shared = ctl->shared;
634 int bankno = pageno % ctl->nbanks;
637
638 /* Try to find the page while holding only shared lock */
640
641 /* See if page is already in a buffer */
642 for (int slotno = bankstart; slotno < bankend; slotno++)
643 {
644 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
645 shared->page_number[slotno] == pageno &&
647 {
648 /* See comments for SlruRecentlyUsed() */
649 SlruRecentlyUsed(shared, slotno);
650
651 /* update the stats counter of pages found in the SLRU */
653
654 return slotno;
655 }
656 }
657
658 /* No luck, so switch to normal exclusive lock and do regular read */
661
662 return SimpleLruReadPage(ctl, pageno, true, xid);
663}

References ctl, fb(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), SlruSharedData::page_number, SlruSharedData::page_status, pgstat_count_slru_blocks_hit(), SimpleLruGetBankLock(), SimpleLruReadPage(), SLRU_BANK_SIZE, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SlruSharedData::slru_stats_idx, and SlruRecentlyUsed().

Referenced by asyncQueueProcessPageEntries(), find_multixact_start(), SerialGetMinConflictCommitSeqNo(), SubTransGetParent(), test_slru_page_readonly(), TransactionIdGetCommitTsData(), and TransactionIdGetStatus().

◆ SimpleLruShmemSize()

Size SimpleLruShmemSize ( int  nslots,
int  nlsns 
)

Definition at line 198 of file slru.c.

199{
200 int nbanks = nslots / SLRU_BANK_SIZE;
201 Size sz;
202
204 Assert(nslots % SLRU_BANK_SIZE == 0);
205
206 /* we assume nslots isn't so large as to risk overflow */
207 sz = MAXALIGN(sizeof(SlruSharedData));
208 sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
209 sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
210 sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
211 sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */
212 sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
213 sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */
214 sz += MAXALIGN(nbanks * sizeof(LWLockPadded)); /* bank_locks[] */
215 sz += MAXALIGN(nbanks * sizeof(int)); /* bank_cur_lru_count[] */
216
217 if (nlsns > 0)
218 sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
219
220 return BUFFERALIGN(sz) + BLCKSZ * nslots;
221}

References Assert, BUFFERALIGN, fb(), MAXALIGN, SLRU_BANK_SIZE, and SLRU_MAX_ALLOWED_BUFFERS.

Referenced by AsyncShmemSize(), CLOGShmemSize(), CommitTsShmemSize(), MultiXactShmemSize(), PredicateLockShmemSize(), SimpleLruInit(), SUBTRANSShmemSize(), and test_slru_shmem_request().

◆ SimpleLruTruncate()

void SimpleLruTruncate ( SlruCtl  ctl,
int64  cutoffPage 
)

Definition at line 1433 of file slru.c.

1434{
1435 SlruShared shared = ctl->shared;
1436 int prevbank;
1437
1438 /* update the stats counter of truncates */
1440
1441 /*
1442 * Scan shared memory and remove any pages preceding the cutoff page, to
1443 * ensure we won't rewrite them later. (Since this is normally called in
1444 * or just after a checkpoint, any dirty pages should have been flushed
1445 * already ... we're just being extra careful here.)
1446 */
1447restart:
1448
1449 /*
1450 * An important safety check: the current endpoint page must not be
1451 * eligible for removal. This check is just a backstop against wraparound
1452 * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
1453 * outdated value; therefore we don't add a memory barrier.
1454 */
1455 if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
1456 cutoffPage))
1457 {
1458 ereport(LOG,
1459 (errmsg("could not truncate directory \"%s\": apparent wraparound",
1460 ctl->Dir)));
1461 return;
1462 }
1463
1466 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1467 {
1469
1470 /*
1471 * If the current bank lock is not same as the previous bank lock then
1472 * release the previous lock and acquire the new lock.
1473 */
1474 if (curbank != prevbank)
1475 {
1478 prevbank = curbank;
1479 }
1480
1481 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1482 continue;
1483 if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
1484 continue;
1485
1486 /*
1487 * If page is clean, just change state to EMPTY (expected case).
1488 */
1489 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1490 !shared->page_dirty[slotno])
1491 {
1493 continue;
1494 }
1495
1496 /*
1497 * Hmm, we have (or may have) I/O operations acting on the page, so
1498 * we've got to wait for them to finish and then start again. This is
1499 * the same logic as in SlruSelectLRUPage. (XXX if page is dirty,
1500 * wouldn't it be OK to just discard it without writing it?
1501 * SlruMayDeleteSegment() uses a stricter qualification, so we might
1502 * not delete this page in the end; even if we don't delete it, we
1503 * won't have cause to read its data again. For now, keep the logic
1504 * the same as it was.)
1505 */
1506 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1508 else
1510
1512 goto restart;
1513 }
1514
1516
1517 /* Now we can remove the old segment(s) */
1519}

References SlruSharedData::bank_locks, ctl, ereport, errmsg(), fb(), SlruSharedData::latest_page_number, LWLockPadded::lock, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_read_u64(), pgstat_count_slru_truncate(), SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruScanDirCbDeleteCutoff(), and SlruScanDirectory().

Referenced by asyncQueueAdvanceTail(), CheckPointPredicate(), clog_redo(), commit_ts_redo(), PerformMembersTruncation(), PerformOffsetsTruncation(), test_slru_page_truncate(), TruncateCLOG(), TruncateCommitTs(), and TruncateSUBTRANS().

◆ SimpleLruWaitIO()

static void SimpleLruWaitIO ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 470 of file slru.c.

471{
472 SlruShared shared = ctl->shared;
474
476
477 /* See notes at top of file */
482
483 /*
484 * If the slot is still in an io-in-progress state, then either someone
485 * already started a new I/O on the slot, or a previous I/O failed and
486 * neglected to reset the page state. That shouldn't happen, really, but
487 * it seems worth a few extra cycles to check and recover from it. We can
488 * cheaply test for failure by seeing if the buffer lock is still held (we
489 * assume that transaction abort would release the lock).
490 */
493 {
495 {
496 /* indeed, the I/O must have failed */
499 else /* write_in_progress */
500 {
502 shared->page_dirty[slotno] = true;
503 }
505 }
506 }
507}

References Assert, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, ctl, fb(), LWLockPadded::lock, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockConditionalAcquire(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_status, SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_READ_IN_PROGRESS, SLRU_PAGE_VALID, and SLRU_PAGE_WRITE_IN_PROGRESS.

Referenced by SimpleLruReadPage(), SimpleLruTruncate(), SlruDeleteSegment(), SlruInternalWritePage(), and SlruSelectLRUPage().

◆ SimpleLruWriteAll()

void SimpleLruWriteAll ( SlruCtl  ctl,
bool  allow_redirtied 
)

Definition at line 1347 of file slru.c.

1348{
1349 SlruShared shared = ctl->shared;
1351 int64 pageno = 0;
1352 int prevbank = SlotGetBankNumber(0);
1353 bool ok;
1354
1355 /* update the stats counter of flushes */
1357
1358 /*
1359 * Find and write dirty pages
1360 */
1361 fdata.num_files = 0;
1362
1364
1365 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1366 {
1368
1369 /*
1370 * If the current bank lock is not same as the previous bank lock then
1371 * release the previous lock and acquire the new lock.
1372 */
1373 if (curbank != prevbank)
1374 {
1377 prevbank = curbank;
1378 }
1379
1380 /* Do nothing if slot is unused */
1381 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1382 continue;
1383
1385
1386 /*
1387 * In some places (e.g. checkpoints), we cannot assert that the slot
1388 * is clean now, since another process might have re-dirtied it
1389 * already. That's okay.
1390 */
1392 shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
1393 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1394 !shared->page_dirty[slotno]));
1395 }
1396
1398
1399 /*
1400 * Now close any files that were open
1401 */
1402 ok = true;
1403 for (int i = 0; i < fdata.num_files; i++)
1404 {
1405 if (CloseTransientFile(fdata.fd[i]) != 0)
1406 {
1408 slru_errno = errno;
1409 pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
1410 ok = false;
1411 }
1412 }
1413 if (!ok)
1415
1416 /* Ensure that directory entries for new files are on disk. */
1417 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1418 fsync_fname(ctl->Dir, true);
1419}

References Assert, SlruSharedData::bank_locks, CloseTransientFile(), ctl, fb(), fsync_fname(), i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_status, pgstat_count_slru_flush(), SlotGetBankNumber, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SlruInternalWritePage(), SlruReportIOError(), and SYNC_HANDLER_NONE.

Referenced by CheckPointCLOG(), CheckPointCommitTs(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointSUBTRANS(), find_multixact_start(), and test_slru_page_writeall().

◆ SimpleLruWritePage()

void SimpleLruWritePage ( SlruCtl  ctl,
int  slotno 
)

Definition at line 757 of file slru.c.

758{
759 Assert(ctl->shared->page_status[slotno] != SLRU_PAGE_EMPTY);
760
762}

References Assert, ctl, fb(), SLRU_PAGE_EMPTY, and SlruInternalWritePage().

Referenced by SimpleLruZeroAndWritePage(), and test_slru_page_write().

◆ SimpleLruZeroAndWritePage()

void SimpleLruZeroAndWritePage ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 444 of file slru.c.

445{
446 int slotno;
447 LWLock *lock;
448
449 lock = SimpleLruGetBankLock(ctl, pageno);
451
452 /* Create and zero the page */
453 slotno = SimpleLruZeroPage(ctl, pageno);
454
455 /* Make sure it's written out */
457 Assert(!ctl->shared->page_dirty[slotno]);
458
459 LWLockRelease(lock);
460}

References Assert, ctl, fb(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SimpleLruGetBankLock(), SimpleLruWritePage(), and SimpleLruZeroPage().

Referenced by ActivateCommitTs(), BootStrapCLOG(), BootStrapMultiXact(), BootStrapSUBTRANS(), clog_redo(), commit_ts_redo(), and multixact_redo().

◆ SimpleLruZeroLSNs()

static void SimpleLruZeroLSNs ( SlruCtl  ctl,
int  slotno 
)
static

Definition at line 428 of file slru.c.

429{
430 SlruShared shared = ctl->shared;
431
432 if (shared->lsn_groups_per_page > 0)
433 MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
434 shared->lsn_groups_per_page * sizeof(XLogRecPtr));
435}

References ctl, fb(), SlruSharedData::group_lsn, SlruSharedData::lsn_groups_per_page, and MemSet.

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SimpleLruZeroPage()

int SimpleLruZeroPage ( SlruCtl  ctl,
int64  pageno 
)

Definition at line 375 of file slru.c.

376{
377 SlruShared shared = ctl->shared;
378 int slotno;
379
381
382 /* Find a suitable buffer slot for the page */
383 slotno = SlruSelectLRUPage(ctl, pageno);
385 (shared->page_status[slotno] == SLRU_PAGE_VALID &&
386 !shared->page_dirty[slotno]) ||
387 shared->page_number[slotno] == pageno);
388
389 /* Mark the slot as containing this page */
390 shared->page_number[slotno] = pageno;
392 shared->page_dirty[slotno] = true;
393 SlruRecentlyUsed(shared, slotno);
394
395 /* Set the buffer to zeroes */
396 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
397
398 /* Set the LSNs for this new page to zero */
400
401 /*
402 * Assume this page is now the latest active page.
403 *
404 * Note that because both this routine and SlruSelectLRUPage run with a
405 * SLRU bank lock held, it is not possible for this to be zeroing a page
406 * that SlruSelectLRUPage is going to evict simultaneously. Therefore,
407 * there's no memory barrier here.
408 */
409 pg_atomic_write_u64(&shared->latest_page_number, pageno);
410
411 /* update the stats counter of zeroed pages */
413
414 return slotno;
415}

References Assert, ctl, fb(), SlruSharedData::latest_page_number, LW_EXCLUSIVE, LWLockHeldByMeInMode(), MemSet, SlruSharedData::page_buffer, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_write_u64(), pgstat_count_slru_blocks_zeroed(), SimpleLruGetBankLock(), SimpleLruZeroLSNs(), SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SlruSharedData::slru_stats_idx, SlruRecentlyUsed(), and SlruSelectLRUPage().

Referenced by asyncQueueAddEntries(), ExtendCLOG(), ExtendCommitTs(), ExtendMultiXactMember(), ExtendMultiXactOffset(), ExtendSUBTRANS(), SerialAdd(), SimpleLruZeroAndWritePage(), StartupSUBTRANS(), test_slru_page_write(), and TrimMultiXact().

◆ SlruCorrectSegmentFilenameLength()

static bool SlruCorrectSegmentFilenameLength ( SlruCtl  ctl,
size_t  len 
)
inlinestatic

Definition at line 1783 of file slru.c.

1784{
1785 if (ctl->long_segment_names)
1786 return (len == 15); /* see SlruFileName() */
1787 else
1788
1789 /*
1790 * Commit 638cf09e76d allowed 5-character lengths. Later commit
1791 * 73c986adde5 allowed 6-character length.
1792 *
1793 * Note: There is an ongoing plan to migrate all SLRUs to 64-bit page
1794 * numbers, and the corresponding 15-character file names, which may
1795 * eventually deprecate the support for 4, 5, and 6-character names.
1796 */
1797 return (len == 4 || len == 5 || len == 6);
1798}

References ctl, and len.

Referenced by SlruScanDirectory().

◆ SlruDeleteSegment()

void SlruDeleteSegment ( SlruCtl  ctl,
int64  segno 
)

Definition at line 1551 of file slru.c.

1552{
1553 SlruShared shared = ctl->shared;
1554 int prevbank = SlotGetBankNumber(0);
1555 bool did_write;
1556
1557 /* Clean out any possibly existing references to the segment. */
1559restart:
1560 did_write = false;
1561 for (int slotno = 0; slotno < shared->num_slots; slotno++)
1562 {
1565
1566 /*
1567 * If the current bank lock is not same as the previous bank lock then
1568 * release the previous lock and acquire the new lock.
1569 */
1570 if (curbank != prevbank)
1571 {
1574 prevbank = curbank;
1575 }
1576
1577 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1578 continue;
1579
1581 /* not the segment we're looking for */
1582 if (pagesegno != segno)
1583 continue;
1584
1585 /* If page is clean, just change state to EMPTY (expected case). */
1586 if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
1587 !shared->page_dirty[slotno])
1588 {
1590 continue;
1591 }
1592
1593 /* Same logic as SimpleLruTruncate() */
1594 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1596 else
1598
1599 did_write = true;
1600 }
1601
1602 /*
1603 * Be extra careful and re-check. The IO functions release the control
1604 * lock, so new pages could have been read in.
1605 */
1606 if (did_write)
1607 goto restart;
1608
1610
1612}

References SlruSharedData::bank_locks, ctl, fb(), LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SlruSharedData::num_slots, SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, SlruWriteAllData::segno, SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruInternalWritePage().

Referenced by test_slru_page_delete().

◆ SlruFileName()

static int SlruFileName ( SlruCtl  ctl,
char path,
int64  segno 
)
inlinestatic

Definition at line 91 of file slru.c.

92{
93 if (ctl->long_segment_names)
94 {
95 /*
96 * We could use 16 characters here but the disadvantage would be that
97 * the SLRU segments will be hard to distinguish from WAL segments.
98 *
99 * For this reason we use 15 characters. It is enough but also means
100 * that in the future we can't decrease SLRU_PAGES_PER_SEGMENT easily.
101 */
102 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
103 return snprintf(path, MAXPGPATH, "%s/%015" PRIX64, ctl->Dir, segno);
104 }
105 else
106 {
107 /*
108 * Despite the fact that %04X format string is used up to 24 bit
109 * integers are allowed. See SlruCorrectSegmentFilenameLength()
110 */
111 Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
112 return snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir,
113 (unsigned int) segno);
114 }
115}
#define INT64CONST(x)
Definition c.h:560
#define snprintf
Definition port.h:260

References Assert, ctl, fb(), INT64CONST, MAXPGPATH, and snprintf.

Referenced by SimpleLruDoesPhysicalPageExist(), SlruInternalDeleteSegment(), SlruPhysicalReadPage(), SlruPhysicalWritePage(), SlruReportIOError(), and SlruSyncFileTag().

◆ SlruInternalDeleteSegment()

static void SlruInternalDeleteSegment ( SlruCtl  ctl,
int64  segno 
)
static

Definition at line 1528 of file slru.c.

1529{
1530 char path[MAXPGPATH];
1531
1532 /* Forget any fsync requests queued for this segment. */
1533 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1534 {
1535 FileTag tag;
1536
1537 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1539 }
1540
1541 /* Unlink the file. */
1542 SlruFileName(ctl, path, segno);
1543 ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
1544 unlink(path);
1545}

References ctl, DEBUG2, ereport, errmsg_internal(), fb(), INIT_SLRUFILETAG, MAXPGPATH, RegisterSyncRequest(), SlruWriteAllData::segno, SlruFileName(), SYNC_FORGET_REQUEST, and SYNC_HANDLER_NONE.

Referenced by SlruDeleteSegment(), SlruScanDirCbDeleteAll(), and SlruScanDirCbDeleteCutoff().

◆ SlruInternalWritePage()

static void SlruInternalWritePage ( SlruCtl  ctl,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 677 of file slru.c.

678{
679 SlruShared shared = ctl->shared;
680 int64 pageno = shared->page_number[slotno];
682 bool ok;
683
686
687 /* If a write is in progress, wait for it to finish */
689 shared->page_number[slotno] == pageno)
690 {
692 }
693
694 /*
695 * Do nothing if page is not dirty, or if buffer no longer contains the
696 * same page we were called for.
697 */
698 if (!shared->page_dirty[slotno] ||
699 shared->page_status[slotno] != SLRU_PAGE_VALID ||
700 shared->page_number[slotno] != pageno)
701 return;
702
703 /*
704 * Mark the slot write-busy, and clear the dirtybit. After this point, a
705 * transaction status update on this page will mark it dirty again.
706 */
708 shared->page_dirty[slotno] = false;
709
710 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
712
713 /* Release bank lock while doing I/O */
715
716 /* Do the write */
718
719 /* If we failed, and we're in a flush, better close the files */
720 if (!ok && fdata)
721 {
722 for (int i = 0; i < fdata->num_files; i++)
724 }
725
726 /* Re-acquire bank lock and update page state */
728
729 Assert(shared->page_number[slotno] == pageno &&
731
732 /* If we failed to write, mark the page dirty again */
733 if (!ok)
734 shared->page_dirty[slotno] = true;
735
737
739
740 /* Now it's okay to ereport if we failed */
741 if (!ok)
743
744 /* If part of a checkpoint, count this as a SLRU buffer written. */
745 if (fdata)
746 {
749 }
750}

References Assert, SlruSharedData::bank_locks, SlruSharedData::buffer_locks, CheckpointStats, CheckpointStatsData::ckpt_slru_written, CloseTransientFile(), ctl, fb(), i, InvalidTransactionId, LWLockPadded::lock, LW_EXCLUSIVE, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), SlruSharedData::page_dirty, SlruSharedData::page_number, SlruSharedData::page_status, PendingCheckpointerStats, SimpleLruGetBankLock(), SimpleLruWaitIO(), SlotGetBankNumber, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, SLRU_PAGE_WRITE_IN_PROGRESS, PgStat_CheckpointerStats::slru_written, SlruPhysicalWritePage(), and SlruReportIOError().

Referenced by SimpleLruTruncate(), SimpleLruWriteAll(), SimpleLruWritePage(), SlruDeleteSegment(), and SlruSelectLRUPage().

◆ SlruMayDeleteSegment()

static bool SlruMayDeleteSegment ( SlruCtl  ctl,
int64  segpage,
int64  cutoffPage 
)
static

Definition at line 1628 of file slru.c.

1629{
1631
1633
1634 return (ctl->PagePrecedes(segpage, cutoffPage) &&
1635 ctl->PagePrecedes(seg_last_page, cutoffPage));
1636}

References Assert, ctl, fb(), and SLRU_PAGES_PER_SEGMENT.

Referenced by SlruScanDirCbDeleteCutoff(), and SlruScanDirCbReportPresence().

◆ SlruPhysicalReadPage()

static bool SlruPhysicalReadPage ( SlruCtl  ctl,
int64  pageno,
int  slotno 
)
static

Definition at line 829 of file slru.c.

830{
831 SlruShared shared = ctl->shared;
832 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
833 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
834 off_t offset = rpageno * BLCKSZ;
835 char path[MAXPGPATH];
836 int fd;
837
838 SlruFileName(ctl, path, segno);
839
840 /*
841 * In a crash-and-restart situation, it's possible for us to receive
842 * commands to set the commit status of transactions whose bits are in
843 * already-truncated segments of the commit log (see notes in
844 * SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
845 * where the file doesn't exist, and return zeroes instead.
846 */
848 if (fd < 0)
849 {
850 if (errno != ENOENT || !InRecovery)
851 {
854 return false;
855 }
856
857 ereport(LOG,
858 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
859 path)));
860 MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
861 return true;
862 }
863
864 errno = 0;
866 if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
867 {
872 return false;
873 }
875
876 if (CloseTransientFile(fd) != 0)
877 {
880 return false;
881 }
882
883 return true;
884}

References CloseTransientFile(), ctl, ereport, errmsg(), fb(), fd(), InRecovery, LOG, MAXPGPATH, MemSet, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_pread, pgstat_report_wait_end(), pgstat_report_wait_start(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, and SlruFileName().

Referenced by SimpleLruReadPage().

◆ SlruPhysicalWritePage()

static bool SlruPhysicalWritePage ( SlruCtl  ctl,
int64  pageno,
int  slotno,
SlruWriteAll  fdata 
)
static

Definition at line 901 of file slru.c.

902{
903 SlruShared shared = ctl->shared;
904 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
905 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
906 off_t offset = rpageno * BLCKSZ;
907 char path[MAXPGPATH];
908 int fd = -1;
909
910 /* update the stats counter of written pages */
912
913 /*
914 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
915 * write out data before associated WAL records. This is the same action
916 * performed during FlushBuffer() in the main buffer manager.
917 */
918 if (shared->group_lsn != NULL)
919 {
920 /*
921 * We must determine the largest async-commit LSN for the page. This
922 * is a bit tedious, but since this entire function is a slow path
923 * anyway, it seems better to do this here than to maintain a per-page
924 * LSN variable (which'd need an extra comparison in the
925 * transaction-commit path).
926 */
928 int lsnindex;
929
931 max_lsn = shared->group_lsn[lsnindex++];
932 for (int lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
933 {
935
936 if (max_lsn < this_lsn)
938 }
939
941 {
942 /*
943 * As noted above, elog(ERROR) is not acceptable here, so if
944 * XLogFlush were to fail, we must PANIC. This isn't much of a
945 * restriction because XLogFlush is just about all critical
946 * section anyway, but let's make sure.
947 */
951 }
952 }
953
954 /*
955 * During a SimpleLruWriteAll, we may already have the desired file open.
956 */
957 if (fdata)
958 {
959 for (int i = 0; i < fdata->num_files; i++)
960 {
961 if (fdata->segno[i] == segno)
962 {
963 fd = fdata->fd[i];
964 break;
965 }
966 }
967 }
968
969 if (fd < 0)
970 {
971 /*
972 * If the file doesn't already exist, we should create it. It is
973 * possible for this to need to happen when writing a page that's not
974 * first in its segment; we assume the OS can cope with that. (Note:
975 * it might seem that it'd be okay to create files only when
976 * SimpleLruZeroPage is called for the first page of a segment.
977 * However, if after a crash and restart the REDO logic elects to
978 * replay the log from a checkpoint before the latest one, then it's
979 * possible that we will get commands to set transaction status of
980 * transactions that have already been truncated from the commit log.
981 * Easiest way to deal with that is to accept references to
982 * nonexistent files here and in SlruPhysicalReadPage.)
983 *
984 * Note: it is possible for more than one backend to be executing this
985 * code simultaneously for different pages of the same file. Hence,
986 * don't use O_EXCL or O_TRUNC or anything like that.
987 */
988 SlruFileName(ctl, path, segno);
990 if (fd < 0)
991 {
994 return false;
995 }
996
997 if (fdata)
998 {
999 if (fdata->num_files < MAX_WRITEALL_BUFFERS)
1000 {
1001 fdata->fd[fdata->num_files] = fd;
1002 fdata->segno[fdata->num_files] = segno;
1003 fdata->num_files++;
1004 }
1005 else
1006 {
1007 /*
1008 * In the unlikely event that we exceed MAX_WRITEALL_BUFFERS,
1009 * fall back to treating it as a standalone write.
1010 */
1011 fdata = NULL;
1012 }
1013 }
1014 }
1015
1016 errno = 0;
1018 if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
1019 {
1021 /* if write didn't set errno, assume problem is no disk space */
1022 if (errno == 0)
1023 errno = ENOSPC;
1025 slru_errno = errno;
1026 if (!fdata)
1028 return false;
1029 }
1031
1032 /* Queue up a sync request for the checkpointer. */
1033 if (ctl->sync_handler != SYNC_HANDLER_NONE)
1034 {
1035 FileTag tag;
1036
1037 INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
1038 if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
1039 {
1040 /* No space to enqueue sync request. Do it synchronously. */
1042 if (pg_fsync(fd) != 0)
1043 {
1046 slru_errno = errno;
1048 return false;
1049 }
1051 }
1052 }
1053
1054 /* Close file, unless part of flush request. */
1055 if (!fdata)
1056 {
1057 if (CloseTransientFile(fd) != 0)
1058 {
1060 slru_errno = errno;
1061 return false;
1062 }
1063 }
1064
1065 return true;
1066}

References CloseTransientFile(), ctl, END_CRIT_SECTION, fb(), fd(), SlruSharedData::group_lsn, i, INIT_SLRUFILETAG, SlruSharedData::lsn_groups_per_page, MAX_WRITEALL_BUFFERS, MAXPGPATH, OpenTransientFile(), SlruSharedData::page_buffer, PG_BINARY, pg_fsync(), pg_pwrite, pgstat_count_slru_blocks_written(), pgstat_report_wait_end(), pgstat_report_wait_start(), RegisterSyncRequest(), SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SlruSharedData::slru_stats_idx, SLRU_WRITE_FAILED, SlruFileName(), START_CRIT_SECTION, SYNC_HANDLER_NONE, SYNC_REQUEST, XLogFlush(), and XLogRecPtrIsValid.

Referenced by SlruInternalWritePage().

◆ SlruRecentlyUsed()

static void SlruRecentlyUsed ( SlruShared  shared,
int  slotno 
)
inlinestatic

Definition at line 1148 of file slru.c.

1149{
1152
1154
1155 /*
1156 * The reason for the if-test is that there are often many consecutive
1157 * accesses to the same page (particularly the latest page). By
1158 * suppressing useless increments of bank_cur_lru_count, we reduce the
1159 * probability that old pages' counts will "wrap around" and make them
1160 * appear recently used.
1161 *
1162 * We allow this code to be executed concurrently by multiple processes
1163 * within SimpleLruReadPage_ReadOnly(). As long as int reads and writes
1164 * are atomic, this should not cause any completely-bogus values to enter
1165 * the computation. However, it is possible for either bank_cur_lru_count
1166 * or individual page_lru_count entries to be "reset" to lower values than
1167 * they should have, in case a process is delayed while it executes this
1168 * function. With care in SlruSelectLRUPage(), this does little harm, and
1169 * in any case the absolute worst possible consequence is a nonoptimal
1170 * choice of page to evict. The gain from allowing concurrent reads of
1171 * SLRU pages seems worth it.
1172 */
1173 if (new_lru_count != shared->page_lru_count[slotno])
1174 {
1177 }
1178}

References Assert, SlruSharedData::bank_cur_lru_count, fb(), SlruSharedData::page_lru_count, SlruSharedData::page_status, SlotGetBankNumber, and SLRU_PAGE_EMPTY.

Referenced by SimpleLruReadPage(), SimpleLruReadPage_ReadOnly(), and SimpleLruZeroPage().

◆ SlruReportIOError()

static void SlruReportIOError ( SlruCtl  ctl,
int64  pageno,
TransactionId  xid 
)
static

Definition at line 1073 of file slru.c.

1074{
1075 int64 segno = pageno / SLRU_PAGES_PER_SEGMENT;
1076 int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
1077 int offset = rpageno * BLCKSZ;
1078 char path[MAXPGPATH];
1079
1080 SlruFileName(ctl, path, segno);
1081 errno = slru_errno;
1082 switch (slru_errcause)
1083 {
1084 case SLRU_OPEN_FAILED:
1085 ereport(ERROR,
1087 errmsg("could not access status of transaction %u", xid),
1088 errdetail("Could not open file \"%s\": %m.", path)));
1089 break;
1090 case SLRU_SEEK_FAILED:
1091 ereport(ERROR,
1093 errmsg("could not access status of transaction %u", xid),
1094 errdetail("Could not seek in file \"%s\" to offset %d: %m.",
1095 path, offset)));
1096 break;
1097 case SLRU_READ_FAILED:
1098 if (errno)
1099 ereport(ERROR,
1101 errmsg("could not access status of transaction %u", xid),
1102 errdetail("Could not read from file \"%s\" at offset %d: %m.",
1103 path, offset)));
1104 else
1105 ereport(ERROR,
1106 (errmsg("could not access status of transaction %u", xid),
1107 errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
1108 break;
1109 case SLRU_WRITE_FAILED:
1110 if (errno)
1111 ereport(ERROR,
1113 errmsg("could not access status of transaction %u", xid),
1114 errdetail("Could not write to file \"%s\" at offset %d: %m.",
1115 path, offset)));
1116 else
1117 ereport(ERROR,
1118 (errmsg("could not access status of transaction %u", xid),
1119 errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
1120 path, offset)));
1121 break;
1122 case SLRU_FSYNC_FAILED:
1125 errmsg("could not access status of transaction %u", xid),
1126 errdetail("Could not fsync file \"%s\": %m.",
1127 path)));
1128 break;
1129 case SLRU_CLOSE_FAILED:
1130 ereport(ERROR,
1132 errmsg("could not access status of transaction %u", xid),
1133 errdetail("Could not close file \"%s\": %m.",
1134 path)));
1135 break;
1136 default:
1137 /* can't get here, we trust */
1138 elog(ERROR, "unrecognized SimpleLru error cause: %d",
1139 (int) slru_errcause);
1140 break;
1141 }
1142}

References ctl, data_sync_elevel(), elog, ereport, errcode_for_file_access(), errdetail(), errmsg(), ERROR, fb(), MAXPGPATH, SlruWriteAllData::segno, SLRU_CLOSE_FAILED, slru_errcause, slru_errno, SLRU_FSYNC_FAILED, SLRU_OPEN_FAILED, SLRU_PAGES_PER_SEGMENT, SLRU_READ_FAILED, SLRU_SEEK_FAILED, SLRU_WRITE_FAILED, and SlruFileName().

Referenced by SimpleLruDoesPhysicalPageExist(), SimpleLruReadPage(), SimpleLruWriteAll(), and SlruInternalWritePage().

◆ SlruScanDirCbDeleteAll()

bool SlruScanDirCbDeleteAll ( SlruCtl  ctl,
char filename,
int64  segpage,
void data 
)

Definition at line 1769 of file slru.c.

1770{
1772
1773 return false; /* keep going */
1774}

References ctl, fb(), SLRU_PAGES_PER_SEGMENT, and SlruInternalDeleteSegment().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), and test_slru_scan_cb().

◆ SlruScanDirCbDeleteCutoff()

static bool SlruScanDirCbDeleteCutoff ( SlruCtl  ctl,
char filename,
int64  segpage,
void data 
)
static

Definition at line 1753 of file slru.c.

1755{
1756 int64 cutoffPage = *(int64 *) data;
1757
1760
1761 return false; /* keep going */
1762}

References ctl, data, fb(), SLRU_PAGES_PER_SEGMENT, SlruInternalDeleteSegment(), and SlruMayDeleteSegment().

Referenced by SimpleLruTruncate().

◆ SlruScanDirCbReportPresence()

bool SlruScanDirCbReportPresence ( SlruCtl  ctl,
char filename,
int64  segpage,
void data 
)

Definition at line 1737 of file slru.c.

1739{
1740 int64 cutoffPage = *(int64 *) data;
1741
1743 return true; /* found one; don't iterate any more */
1744
1745 return false; /* keep going */
1746}

References ctl, data, fb(), and SlruMayDeleteSegment().

Referenced by TruncateCLOG(), and TruncateCommitTs().

◆ SlruScanDirectory()

bool SlruScanDirectory ( SlruCtl  ctl,
SlruScanCallback  callback,
void data 
)

Definition at line 1816 of file slru.c.

1817{
1818 bool retval = false;
1819 DIR *cldir;
1820 struct dirent *clde;
1821 int64 segno;
1822 int64 segpage;
1823
1824 cldir = AllocateDir(ctl->Dir);
1825 while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
1826 {
1827 size_t len;
1828
1829 len = strlen(clde->d_name);
1830
1832 strspn(clde->d_name, "0123456789ABCDEF") == len)
1833 {
1834 segno = strtoi64(clde->d_name, NULL, 16);
1836
1837 elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
1838 ctl->Dir, clde->d_name);
1839 retval = callback(ctl, clde->d_name, segpage, data);
1840 if (retval)
1841 break;
1842 }
1843 }
1844 FreeDir(cldir);
1845
1846 return retval;
1847}

References AllocateDir(), callback(), ctl, data, DEBUG2, elog, fb(), FreeDir(), len, ReadDir(), SLRU_PAGES_PER_SEGMENT, and SlruCorrectSegmentFilenameLength().

Referenced by AsyncShmemInit(), DeactivateCommitTs(), SimpleLruTruncate(), test_slru_delete_all(), TruncateCLOG(), and TruncateCommitTs().

◆ SlruSelectLRUPage()

static int SlruSelectLRUPage ( SlruCtl  ctl,
int64  pageno 
)
static

Definition at line 1194 of file slru.c.

1195{
1196 SlruShared shared = ctl->shared;
1197
1198 /* Outer loop handles restart after I/O */
1199 for (;;)
1200 {
1201 int cur_count;
1202 int bestvalidslot = 0; /* keep compiler quiet */
1203 int best_valid_delta = -1;
1204 int64 best_valid_page_number = 0; /* keep compiler quiet */
1205 int bestinvalidslot = 0; /* keep compiler quiet */
1206 int best_invalid_delta = -1;
1207 int64 best_invalid_page_number = 0; /* keep compiler quiet */
1208 int bankno = pageno % ctl->nbanks;
1211
1213
1214 /* See if page already has a buffer assigned */
1215 for (int slotno = bankstart; slotno < bankend; slotno++)
1216 {
1217 if (shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
1218 shared->page_number[slotno] == pageno)
1219 return slotno;
1220 }
1221
1222 /*
1223 * If we find any EMPTY slot, just select that one. Else choose a
1224 * victim page to replace. We normally take the least recently used
1225 * valid page, but we will never take the slot containing
1226 * latest_page_number, even if it appears least recently used. We
1227 * will select a slot that is already I/O busy only if there is no
1228 * other choice: a read-busy slot will not be least recently used once
1229 * the read finishes, and waiting for an I/O on a write-busy slot is
1230 * inferior to just picking some other slot. Testing shows the slot
1231 * we pick instead will often be clean, allowing us to begin a read at
1232 * once.
1233 *
1234 * Normally the page_lru_count values will all be different and so
1235 * there will be a well-defined LRU page. But since we allow
1236 * concurrent execution of SlruRecentlyUsed() within
1237 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
1238 * acquire the same lru_count values. In that case we break ties by
1239 * choosing the furthest-back page.
1240 *
1241 * Notice that this next line forcibly advances cur_lru_count to a
1242 * value that is certainly beyond any value that will be in the
1243 * page_lru_count array after the loop finishes. This ensures that
1244 * the next execution of SlruRecentlyUsed will mark the page newly
1245 * used, even if it's for a page that has the current counter value.
1246 * That gets us back on the path to having good data when there are
1247 * multiple pages with the same lru_count.
1248 */
1249 cur_count = (shared->bank_cur_lru_count[bankno])++;
1250 for (int slotno = bankstart; slotno < bankend; slotno++)
1251 {
1252 int this_delta;
1254
1255 if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
1256 return slotno;
1257
1259 if (this_delta < 0)
1260 {
1261 /*
1262 * Clean up in case shared updates have caused cur_count
1263 * increments to get "lost". We back off the page counts,
1264 * rather than trying to increase cur_count, to avoid any
1265 * question of infinite loops or failure in the presence of
1266 * wrapped-around counts.
1267 */
1268 shared->page_lru_count[slotno] = cur_count;
1269 this_delta = 0;
1270 }
1271
1272 /*
1273 * If this page is the one most recently zeroed, don't consider it
1274 * an eviction candidate. See comments in SimpleLruZeroPage for an
1275 * explanation about the lack of a memory barrier here.
1276 */
1278 if (this_page_number ==
1280 continue;
1281
1282 if (shared->page_status[slotno] == SLRU_PAGE_VALID)
1283 {
1286 ctl->PagePrecedes(this_page_number,
1288 {
1292 }
1293 }
1294 else
1295 {
1298 ctl->PagePrecedes(this_page_number,
1300 {
1304 }
1305 }
1306 }
1307
1308 /*
1309 * If all pages (except possibly the latest one) are I/O busy, we'll
1310 * have to wait for an I/O to complete and then retry. In that
1311 * unhappy case, we choose to wait for the I/O on the least recently
1312 * used slot, on the assumption that it was likely initiated first of
1313 * all the I/Os in progress and may therefore finish first.
1314 */
1315 if (best_valid_delta < 0)
1316 {
1318 continue;
1319 }
1320
1321 /*
1322 * If the selected page is clean, we're set.
1323 */
1324 if (!shared->page_dirty[bestvalidslot])
1325 return bestvalidslot;
1326
1327 /*
1328 * Write the page.
1329 */
1331
1332 /*
1333 * Now loop back and try again. This is the easiest way of dealing
1334 * with corner cases such as the victim page being re-dirtied while we
1335 * wrote it.
1336 */
1337 }
1338}

References Assert, SlruSharedData::bank_cur_lru_count, ctl, fb(), SlruSharedData::latest_page_number, LWLockHeldByMe(), SlruSharedData::page_dirty, SlruSharedData::page_lru_count, SlruSharedData::page_number, SlruSharedData::page_status, pg_atomic_read_u64(), SimpleLruGetBankLock(), SimpleLruWaitIO(), SLRU_BANK_SIZE, SLRU_PAGE_EMPTY, SLRU_PAGE_VALID, and SlruInternalWritePage().

Referenced by SimpleLruReadPage(), and SimpleLruZeroPage().

◆ SlruSyncFileTag()

int SlruSyncFileTag ( SlruCtl  ctl,
const FileTag ftag,
char path 
)

Definition at line 1856 of file slru.c.

1857{
1858 int fd;
1859 int save_errno;
1860 int result;
1861
1862 SlruFileName(ctl, path, ftag->segno);
1863
1865 if (fd < 0)
1866 return -1;
1867
1869 result = pg_fsync(fd);
1871 save_errno = errno;
1872
1874
1875 errno = save_errno;
1876 return result;
1877}

References CloseTransientFile(), ctl, fb(), fd(), OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), FileTag::segno, and SlruFileName().

Referenced by clogsyncfiletag(), committssyncfiletag(), multixactmemberssyncfiletag(), multixactoffsetssyncfiletag(), and test_slru_page_sync().

Variable Documentation

◆ slru_errcause

◆ slru_errno