91#define INIT_MD_FILETAG(a,xx_rlocator,xx_forknum,xx_segno) \
93 memset(&(a), 0, sizeof(FileTag)), \
94 (a).handler = SYNC_HANDLER_MD, \
95 (a).rlocator = (xx_rlocator), \
96 (a).forknum = (xx_forknum), \
97 (a).segno = (xx_segno) \
103#define EXTENSION_FAIL (1 << 0)
105#define EXTENSION_RETURN_NULL (1 << 1)
107#define EXTENSION_CREATE (1 << 2)
109#define EXTENSION_CREATE_RECOVERY (1 << 3)
111#define EXTENSION_DONT_OPEN (1 << 5)
121#define SEGMENT_CHARS OIDCHARS
122#define MD_PATH_STR_MAXLEN \
124 REL_PATH_STR_MAXLEN \
125 + sizeof((char)'.') \
242 int save_errno = errno;
252 errmsg(
"could not create file \"%s\": %m", path.
str)));
259 mdfd->mdfd_segno = 0;
332 for (forknum = 0; forknum <=
MAX_FORKNUM; forknum++)
351 if (ret < 0 && errno != ENOENT)
356 errmsg(
"could not truncate file \"%s\": %m", path)));
370 path =
relpath(rlocator, forknum);
393 if (ret >= 0 || errno != ENOENT)
395 ret = unlink(path.
str);
396 if (ret < 0 && errno != ENOENT)
401 errmsg(
"could not remove file \"%s\": %m", path.
str)));
429 if (ret >= 0 || errno != ENOENT)
434 for (segno = 1;; segno++)
454 if (unlink(segpath.
str) < 0)
460 errmsg(
"could not remove file \"%s\": %m", segpath.
str)));
478 const void *buffer,
bool skipFsync)
489#ifdef CHECK_WRITE_VS_EXTEND
501 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
502 errmsg(
"cannot extend file \"%s\" beyond %u blocks",
508 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
510 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
512 if ((nbytes =
FileWrite(v->
mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
517 errmsg(
"could not extend file \"%s\": %m",
519 errhint(
"Check free disk space.")));
523 errmsg(
"could not extend file \"%s\": wrote only %d of %d bytes at block %u",
525 nbytes, BLCKSZ, blocknum),
526 errhint(
"Check free disk space.")));
547 int remblocks = nblocks;
552#ifdef CHECK_WRITE_VS_EXTEND
563 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
564 errmsg(
"cannot extend file \"%s\" beyond %u blocks",
568 while (remblocks > 0)
571 off_t seekpos = (off_t) BLCKSZ * segstartblock;
574 if (segstartblock + remblocks > RELSEG_SIZE)
575 numblocks = RELSEG_SIZE - segstartblock;
577 numblocks = remblocks;
581 Assert(segstartblock < RELSEG_SIZE);
582 Assert(segstartblock + numblocks <= RELSEG_SIZE);
600 seekpos, (off_t) BLCKSZ * numblocks,
601 WAIT_EVENT_DATA_FILE_EXTEND);
606 errmsg(
"could not extend file \"%s\" with FileFallocate(): %m",
608 errhint(
"Check free disk space."));
623 seekpos, (off_t) BLCKSZ * numblocks,
624 WAIT_EVENT_DATA_FILE_EXTEND);
628 errmsg(
"could not extend file \"%s\": %m",
630 errhint(
"Check free disk space."));
638 remblocks -= numblocks;
639 curblocknum += numblocks;
675 errmsg(
"could not open file \"%s\": %m", path.
str)));
681 mdfd->mdfd_segno = 0;
695 for (
int forknum = 0; forknum <=
MAX_FORKNUM; forknum++)
712 while (nopensegs > 0)
740 int nblocks_this_segment;
747 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
749 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
751 nblocks_this_segment =
753 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
756 WAIT_EVENT_DATA_FILE_PREFETCH);
758 blocknum += nblocks_this_segment;
759 nblocks -= nblocks_this_segment;
782 for (
int i = 0;
i < nblocks; ++
i)
785 Assert((uintptr_t) buffers[
i] ==
791 iovp->iov_base = buffers[0];
792 iovp->iov_len = BLCKSZ;
796 for (
int i = 1;
i < nblocks; ++
i)
798 void *buffer = buffers[
i];
800 if (((
char *) iovp->iov_base + iovp->iov_len) == buffer)
803 iovp->iov_len += BLCKSZ;
809 iovp->iov_base = buffer;
810 iovp->iov_len = BLCKSZ;
830 return RELSEG_SIZE - segoff;
848 size_t transferred_this_segment;
849 size_t size_this_segment;
854 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
856 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
858 nblocks_this_segment =
860 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
861 nblocks_this_segment =
Min(nblocks_this_segment,
lengthof(iov));
863 if (nblocks_this_segment != nblocks)
864 elog(
ERROR,
"read crosses segment boundary");
867 size_this_segment = nblocks_this_segment * BLCKSZ;
868 transferred_this_segment = 0;
877 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
883 WAIT_EVENT_DATA_FILE_READ);
884 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
890 size_this_segment - transferred_this_segment);
892#ifdef SIMULATE_SHORT_READ
893 nbytes =
Min(nbytes, 4096);
899 errmsg(
"could not read blocks %u..%u in file \"%s\": %m",
901 blocknum + nblocks_this_segment - 1,
938 i < nblocks_this_segment;
940 memset(buffers[
i], 0, BLCKSZ);
946 errmsg(
"could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
948 blocknum + nblocks_this_segment - 1,
950 transferred_this_segment,
951 size_this_segment)));
955 transferred_this_segment += nbytes;
956 Assert(transferred_this_segment <= size_this_segment);
957 if (transferred_this_segment == size_this_segment)
965 nblocks -= nblocks_this_segment;
966 buffers += nblocks_this_segment;
967 blocknum += nblocks_this_segment;
989 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
991 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
993 nblocks_this_segment =
995 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
997 if (nblocks_this_segment != nblocks)
998 elog(
ERROR,
"read crossing segment boundary");
1002 Assert(nblocks <= iovcnt);
1006 Assert(iovcnt <= nblocks_this_segment);
1023 errmsg(
"could not start reading blocks %u..%u in file \"%s\": %m",
1025 blocknum + nblocks_this_segment - 1,
1050 const void **buffers,
BlockNumber nblocks,
bool skipFsync)
1053#ifdef CHECK_WRITE_VS_EXTEND
1065 size_t transferred_this_segment;
1066 size_t size_this_segment;
1071 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
1073 Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
1075 nblocks_this_segment =
1077 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
1078 nblocks_this_segment =
Min(nblocks_this_segment,
lengthof(iov));
1080 if (nblocks_this_segment != nblocks)
1081 elog(
ERROR,
"write crosses segment boundary");
1084 size_this_segment = nblocks_this_segment * BLCKSZ;
1085 transferred_this_segment = 0;
1094 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
1100 WAIT_EVENT_DATA_FILE_WRITE);
1101 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
1107 size_this_segment - transferred_this_segment);
1109#ifdef SIMULATE_SHORT_WRITE
1110 nbytes =
Min(nbytes, 4096);
1115 bool enospc = errno == ENOSPC;
1119 errmsg(
"could not write blocks %u..%u in file \"%s\": %m",
1121 blocknum + nblocks_this_segment - 1,
1123 enospc ?
errhint(
"Check free disk space.") : 0));
1127 transferred_this_segment += nbytes;
1128 Assert(transferred_this_segment <= size_this_segment);
1129 if (transferred_this_segment == size_this_segment)
1140 nblocks -= nblocks_this_segment;
1141 buffers += nblocks_this_segment;
1142 blocknum += nblocks_this_segment;
1185 segnum_start = blocknum / RELSEG_SIZE;
1188 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1189 if (segnum_start != segnum_end)
1190 nflush = RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE));
1193 Assert(nflush <= nblocks);
1195 seekpos = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
1246 return (segno * ((
BlockNumber) RELSEG_SIZE)) + nblocks;
1283 if (nblocks > curnblk)
1289 (
errmsg(
"could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1291 nblocks, curnblk)));
1293 if (nblocks == curnblk)
1301 while (curopensegs > 0)
1305 priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1307 v = &reln->
md_seg_fds[forknum][curopensegs - 1];
1309 if (priorblocks > nblocks)
1318 errmsg(
"could not truncate file \"%s\": %m",
1330 else if (priorblocks + ((
BlockNumber) RELSEG_SIZE) > nblocks)
1339 BlockNumber lastsegblocks = nblocks - priorblocks;
1341 if (
FileTruncate(v->
mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
1344 errmsg(
"could not truncate file \"%s\" to %u blocks: %m",
1369 int min_inactive_seg;
1395 if (segno > min_inactive_seg)
1420 int min_inactive_seg;
1455 errmsg(
"could not fsync file \"%s\": %m",
1459 if (segno > min_inactive_seg)
1477 *off = (off_t) BLCKSZ * (blocknum % ((
BlockNumber) RELSEG_SIZE));
1479 Assert(*off < (off_t) BLCKSZ * RELSEG_SIZE);
1508 (
errmsg_internal(
"could not forward fsync request because request queue is full")));
1515 errmsg(
"could not fsync file \"%s\": %m",
1574 rlocator.
dbOid = dbid;
1593 for (
i = 0;
i < ndelrels;
i++)
1609 for (
i = 0;
i < ndelrels;
i++)
1677 strcpy(fullpath.
str, path.
str);
1731 bool skipFsync,
int behavior)
1745 if (targetseg < reln->md_num_open_segs[forknum])
1772 nextsegno <= targetseg; nextsegno++)
1806 zerobuf, skipFsync);
1832 errmsg(
"could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1846 errmsg(
"could not open file \"%s\" (target block %u): %m",
1867 errmsg(
"could not seek to end of file \"%s\": %m",
1894 need_to_close =
false;
1906 need_to_close =
true;
1912 result =
FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1941 return unlink(path);
1970 if (prior_result.
result < 0)
2047 errmsg(
"could not read blocks %u..%u in file \"%s\": %m",
2060 errmsg(
"could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
2064 result.
result * (
size_t) BLCKSZ,
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
int pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov)
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
#define InvalidBlockNumber
#define TYPEALIGN(ALIGNVAL, LEN)
int errmsg_internal(const char *fmt,...)
int errcode_for_file_access(void)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
int FileGetRawDesc(File file)
void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
char * FilePathName(File file)
int FileSync(File file, uint32 wait_event_info)
void FileClose(File file)
int FileStartReadV(PgAioHandle *ioh, File file, int iovcnt, off_t offset, uint32 wait_event_info)
int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
int data_sync_elevel(int elevel)
File PathNameOpenFile(const char *fileName, int fileFlags)
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
off_t FileSize(File file)
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info)
int FileTruncate(File file, off_t offset, uint32 wait_event_info)
int pg_truncate(const char *path, off_t length)
#define FILE_POSSIBLY_DELETED(err)
static ssize_t FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info)
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
Assert(PointerIsAligned(start, uint64))
void * MemoryContextAlloc(MemoryContext context, Size size)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
MemoryContext TopMemoryContext
void * palloc_aligned(Size size, Size alignto, int flags)
void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
static void md_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
#define EXTENSION_CREATE_RECOVERY
void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks)
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
bool mdfiletagmatches(const FileTag *ftag, const FileTag *candidate)
bool mdexists(SMgrRelation reln, ForkNumber forknum)
void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
#define EXTENSION_DONT_OPEN
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
int mdunlinkfiletag(const FileTag *ftag, char *path)
static MemoryContext MdCxt
void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
int mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
static PgAioResult md_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
static int do_truncate(const char *path)
void mdclose(SMgrRelation reln, ForkNumber forknum)
void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
int mdsyncfiletag(const FileTag *ftag, char *path)
void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
#define EXTENSION_RETURN_NULL
void mdstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
void mdregistersync(SMgrRelation reln, ForkNumber forknum)
void mdopen(SMgrRelation reln)
const PgAioHandleCallbacks aio_md_readv_cb
static int _mdfd_open_flags(void)
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
struct MdPathStr MdPathStr
#define MD_PATH_STR_MAXLEN
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
void ForgetDatabaseSyncRequests(Oid dbid)
void mdimmedsync(SMgrRelation reln, ForkNumber forknum)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
#define ERRCODE_DATA_CORRUPTED
instr_time pgstat_prepare_io_time(bool track_io_guc)
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
size_t strlcpy(char *dst, const char *src, size_t siz)
static int fd(const char *x, int i)
#define INVALID_PROC_NUMBER
#define RelFileLocatorBackendIsTemp(rlocator)
#define relpath(rlocator, forknum)
#define relpathbackend(rlocator, backend, forknum)
#define relpathperm(rlocator, forknum)
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
void smgrclose(SMgrRelation reln)
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
void pgaio_io_set_target_smgr(PgAioHandle *ioh, SMgrRelationData *smgr, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skip_fsync)
char str[MD_PATH_STR_MAXLEN+1]
PgAioHandleCallbackComplete complete_shared
char str[REL_PATH_STR_MAXLEN+1]
int md_num_open_segs[MAX_FORKNUM+1]
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
RelFileLocatorBackend smgr_rlocator
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
struct PgAioTargetData::@124 smgr
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)