90 #define INIT_MD_FILETAG(a,xx_rlocator,xx_forknum,xx_segno) \
92 memset(&(a), 0, sizeof(FileTag)), \
93 (a).handler = SYNC_HANDLER_MD, \
94 (a).rlocator = (xx_rlocator), \
95 (a).forknum = (xx_forknum), \
96 (a).segno = (xx_segno) \
102 #define EXTENSION_FAIL (1 << 0)
104 #define EXTENSION_RETURN_NULL (1 << 1)
106 #define EXTENSION_CREATE (1 << 2)
108 #define EXTENSION_CREATE_RECOVERY (1 << 3)
116 #define EXTENSION_DONT_CHECK_SIZE (1 << 4)
118 #define EXTENSION_DONT_OPEN (1 << 5)
220 int save_errno = errno;
230 errmsg(
"could not create file \"%s\": %m", path)));
312 for (forknum = 0; forknum <=
MAX_FORKNUM; forknum++)
331 if (ret < 0 && errno != ENOENT)
336 errmsg(
"could not truncate file \"%s\": %m", path)));
350 path =
relpath(rlocator, forknum);
373 if (ret >= 0 || errno != ENOENT)
376 if (ret < 0 && errno != ENOENT)
381 errmsg(
"could not remove file \"%s\": %m", path)));
409 if (ret >= 0 || errno != ENOENT)
411 char *segpath = (
char *)
palloc(strlen(path) + 12);
414 for (segno = 1;; segno++)
416 sprintf(segpath,
"%s.%u", path, segno);
434 if (unlink(segpath) < 0)
440 errmsg(
"could not remove file \"%s\": %m", segpath)));
461 const void *buffer,
bool skipFsync)
472 #ifdef CHECK_WRITE_VS_EXTEND
484 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
485 errmsg(
"cannot extend file \"%s\" beyond %u blocks",
491 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
493 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
495 if ((nbytes =
FileWrite(v->
mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
500 errmsg(
"could not extend file \"%s\": %m",
502 errhint(
"Check free disk space.")));
506 errmsg(
"could not extend file \"%s\": wrote only %d of %d bytes at block %u",
508 nbytes, BLCKSZ, blocknum),
509 errhint(
"Check free disk space.")));
530 int remblocks = nblocks;
535 #ifdef CHECK_WRITE_VS_EXTEND
546 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
547 errmsg(
"cannot extend file \"%s\" beyond %u blocks",
551 while (remblocks > 0)
554 off_t seekpos = (off_t) BLCKSZ * segstartblock;
557 if (segstartblock + remblocks > RELSEG_SIZE)
558 numblocks = RELSEG_SIZE - segstartblock;
560 numblocks = remblocks;
564 Assert(segstartblock < RELSEG_SIZE);
565 Assert(segstartblock + numblocks <= RELSEG_SIZE);
583 seekpos, (off_t) BLCKSZ * numblocks,
584 WAIT_EVENT_DATA_FILE_EXTEND);
589 errmsg(
"could not extend file \"%s\" with FileFallocate(): %m",
591 errhint(
"Check free disk space."));
606 seekpos, (off_t) BLCKSZ * numblocks,
607 WAIT_EVENT_DATA_FILE_EXTEND);
611 errmsg(
"could not extend file \"%s\": %m",
613 errhint(
"Check free disk space."));
621 remblocks -= numblocks;
622 curblocknum += numblocks;
661 errmsg(
"could not open file \"%s\": %m", path)));
683 for (
int forknum = 0; forknum <=
MAX_FORKNUM; forknum++)
700 while (nopensegs > 0)
728 int nblocks_this_segment;
735 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
737 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
739 nblocks_this_segment =
741 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
744 WAIT_EVENT_DATA_FILE_PREFETCH);
746 blocknum += nblocks_this_segment;
747 nblocks -= nblocks_this_segment;
770 for (
int i = 0;
i < nblocks; ++
i)
773 Assert((uintptr_t) buffers[
i] ==
779 iovp->iov_base = buffers[0];
780 iovp->iov_len = BLCKSZ;
784 for (
int i = 1;
i < nblocks; ++
i)
786 void *buffer = buffers[
i];
788 if (((
char *) iovp->iov_base + iovp->iov_len) == buffer)
791 iovp->iov_len += BLCKSZ;
797 iovp->iov_base = buffer;
798 iovp->iov_len = BLCKSZ;
821 size_t transferred_this_segment;
822 size_t size_this_segment;
827 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
829 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
831 nblocks_this_segment =
833 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
834 nblocks_this_segment =
Min(nblocks_this_segment,
lengthof(iov));
837 size_this_segment = nblocks_this_segment * BLCKSZ;
838 transferred_this_segment = 0;
847 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
853 WAIT_EVENT_DATA_FILE_READ);
854 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
860 size_this_segment - transferred_this_segment);
862 #ifdef SIMULATE_SHORT_READ
863 nbytes =
Min(nbytes, 4096);
869 errmsg(
"could not read blocks %u..%u in file \"%s\": %m",
871 blocknum + nblocks_this_segment - 1,
887 i < nblocks_this_segment;
889 memset(buffers[
i], 0, BLCKSZ);
895 errmsg(
"could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
897 blocknum + nblocks_this_segment - 1,
899 transferred_this_segment,
900 size_this_segment)));
904 transferred_this_segment += nbytes;
905 Assert(transferred_this_segment <= size_this_segment);
906 if (transferred_this_segment == size_this_segment)
914 nblocks -= nblocks_this_segment;
915 buffers += nblocks_this_segment;
916 blocknum += nblocks_this_segment;
929 const void **buffers,
BlockNumber nblocks,
bool skipFsync)
932 #ifdef CHECK_WRITE_VS_EXTEND
933 Assert((uint64) blocknum + (uint64) nblocks <= (uint64)
mdnblocks(reln, forknum));
944 size_t transferred_this_segment;
945 size_t size_this_segment;
950 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
952 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
954 nblocks_this_segment =
956 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
957 nblocks_this_segment =
Min(nblocks_this_segment,
lengthof(iov));
960 size_this_segment = nblocks_this_segment * BLCKSZ;
961 transferred_this_segment = 0;
970 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
976 WAIT_EVENT_DATA_FILE_WRITE);
977 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
983 size_this_segment - transferred_this_segment);
985 #ifdef SIMULATE_SHORT_WRITE
986 nbytes =
Min(nbytes, 4096);
991 bool enospc = errno == ENOSPC;
995 errmsg(
"could not write blocks %u..%u in file \"%s\": %m",
997 blocknum + nblocks_this_segment - 1,
999 enospc ?
errhint(
"Check free disk space.") : 0));
1003 transferred_this_segment += nbytes;
1004 Assert(transferred_this_segment <= size_this_segment);
1005 if (transferred_this_segment == size_this_segment)
1016 nblocks -= nblocks_this_segment;
1017 buffers += nblocks_this_segment;
1018 blocknum += nblocks_this_segment;
1061 segnum_start = blocknum / RELSEG_SIZE;
1064 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1065 if (segnum_start != segnum_end)
1066 nflush = RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE));
1069 Assert(nflush <= nblocks);
1071 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
1122 return (segno * ((
BlockNumber) RELSEG_SIZE)) + nblocks;
1157 if (nblocks > curnblk)
1163 (
errmsg(
"could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1165 nblocks, curnblk)));
1167 if (nblocks == curnblk)
1175 while (curopensegs > 0)
1179 priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1181 v = &reln->
md_seg_fds[forknum][curopensegs - 1];
1183 if (priorblocks > nblocks)
1192 errmsg(
"could not truncate file \"%s\": %m",
1204 else if (priorblocks + ((
BlockNumber) RELSEG_SIZE) > nblocks)
1213 BlockNumber lastsegblocks = nblocks - priorblocks;
1215 if (
FileTruncate(v->
mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1218 errmsg(
"could not truncate file \"%s\" to %u blocks: %m",
1243 int min_inactive_seg;
1269 if (segno > min_inactive_seg)
1294 int min_inactive_seg;
1329 errmsg(
"could not fsync file \"%s\": %m",
1333 if (segno > min_inactive_seg)
1367 (
errmsg_internal(
"could not forward fsync request because request queue is full")));
1374 errmsg(
"could not fsync file \"%s\": %m",
1433 rlocator.
dbOid = dbid;
1452 for (
i = 0;
i < ndelrels;
i++)
1468 for (
i = 0;
i < ndelrels;
i++)
1525 fullpath =
psprintf(
"%s.%u", path, segno);
1585 bool skipFsync,
int behavior)
1599 if (targetseg < reln->md_num_open_segs[forknum])
1626 nextsegno <= targetseg; nextsegno++)
1660 zerobuf, skipFsync);
1688 errmsg(
"could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1702 errmsg(
"could not open file \"%s\" (target block %u): %m",
1723 errmsg(
"could not seek to end of file \"%s\": %m",
1750 need_to_close =
false;
1763 need_to_close =
true;
1769 result =
FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1799 return unlink(path);
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
#define InvalidBlockNumber
#define TYPEALIGN(ALIGNVAL, LEN)
#define Assert(condition)
int errmsg_internal(const char *fmt,...)
int errcode_for_file_access(void)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
int FileSync(File file, uint32 wait_event_info)
void FileClose(File file)
int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
int data_sync_elevel(int elevel)
File PathNameOpenFile(const char *fileName, int fileFlags)
char * FilePathName(File file)
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
off_t FileSize(File file)
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
int pg_truncate(const char *path, off_t length)
#define FILE_POSSIBLY_DELETED(err)
static ssize_t FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
void pfree(void *pointer)
MemoryContext TopMemoryContext
void * repalloc(void *pointer, Size size)
void * MemoryContextAlloc(MemoryContext context, Size size)
void * palloc_aligned(Size size, Size alignto, int flags)
void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
#define EXTENSION_CREATE_RECOVERY
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
bool mdfiletagmatches(const FileTag *ftag, const FileTag *candidate)
bool mdexists(SMgrRelation reln, ForkNumber forknum)
void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
#define EXTENSION_DONT_OPEN
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
int mdunlinkfiletag(const FileTag *ftag, char *path)
static MemoryContext MdCxt
void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
static int do_truncate(const char *path)
void mdclose(SMgrRelation reln, ForkNumber forknum)
void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
int mdsyncfiletag(const FileTag *ftag, char *path)
void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
#define EXTENSION_RETURN_NULL
static char * _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
void mdregistersync(SMgrRelation reln, ForkNumber forknum)
void mdopen(SMgrRelation reln)
static int _mdfd_open_flags(void)
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
#define EXTENSION_DONT_CHECK_SIZE
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
void ForgetDatabaseSyncRequests(Oid dbid)
void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
void mdimmedsync(SMgrRelation reln, ForkNumber forknum)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
#define ERRCODE_DATA_CORRUPTED
instr_time pgstat_prepare_io_time(bool track_io_guc)
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt)
size_t strlcpy(char *dst, const char *src, size_t siz)
static int fd(const char *x, int i)
#define INVALID_PROC_NUMBER
char * psprintf(const char *fmt,...)
#define RelFileLocatorBackendIsTemp(rlocator)
#define relpath(rlocator, forknum)
#define relpathperm(rlocator, forknum)
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
void smgrclose(SMgrRelation reln)
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
int md_num_open_segs[MAX_FORKNUM+1]
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
RelFileLocatorBackend smgr_rlocator
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)