PostgreSQL Source Code git master
Loading...
Searching...
No Matches
md.h File Reference
#include "storage/aio_types.h"
#include "storage/block.h"
#include "storage/relfilelocator.h"
#include "storage/smgr.h"
#include "storage/sync.h"
Include dependency graph for md.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void mdinit (void)
 
void mdopen (SMgrRelation reln)
 
void mdclose (SMgrRelation reln, ForkNumber forknum)
 
void mdcreate (SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
bool mdexists (SMgrRelation reln, ForkNumber forknum)
 
void mdunlink (RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 
void mdextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
 
void mdzeroextend (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
 
bool mdprefetch (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
 
uint32 mdmaxcombine (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
void mdreadv (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
 
void mdstartreadv (PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
 
void mdwritev (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
 
void mdwriteback (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
 
BlockNumber mdnblocks (SMgrRelation reln, ForkNumber forknum)
 
void mdtruncate (SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks)
 
void mdimmedsync (SMgrRelation reln, ForkNumber forknum)
 
void mdregistersync (SMgrRelation reln, ForkNumber forknum)
 
int mdfd (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
 
void ForgetDatabaseSyncRequests (Oid dbid)
 
void DropRelationFiles (RelFileLocator *delrels, int ndelrels, bool isRedo)
 
int mdsyncfiletag (const FileTag *ftag, char *path)
 
int mdunlinkfiletag (const FileTag *ftag, char *path)
 
bool mdfiletagmatches (const FileTag *ftag, const FileTag *candidate)
 

Variables

PGDLLIMPORT const PgAioHandleCallbacks aio_md_readv_cb
 

Function Documentation

◆ DropRelationFiles()

void DropRelationFiles ( RelFileLocator delrels,
int  ndelrels,
bool  isRedo 
)
extern

Definition at line 1611 of file md.c.

1612{
1614 int i;
1615
1617 for (i = 0; i < ndelrels; i++)
1618 {
1620
1621 if (isRedo)
1622 {
1624
1625 for (fork = 0; fork <= MAX_FORKNUM; fork++)
1627 }
1628 srels[i] = srel;
1629 }
1630
1632
1633 for (i = 0; i < ndelrels; i++)
1634 smgrclose(srels[i]);
1635 pfree(srels);
1636}
#define palloc_array(type, count)
Definition fe_memutils.h:76
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
static int fb(int x)
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
ForkNumber
Definition relpath.h:56
#define MAX_FORKNUM
Definition relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition smgr.c:240
void smgrclose(SMgrRelation reln)
Definition smgr.c:374
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
Definition smgr.c:538
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
Definition xlogutils.c:630

References fb(), i, INVALID_PROC_NUMBER, MAX_FORKNUM, palloc_array, pfree(), smgrclose(), smgrdounlinkall(), smgropen(), and XLogDropRelation().

Referenced by FinishPreparedTransaction(), xact_redo_abort(), and xact_redo_commit().

◆ ForgetDatabaseSyncRequests()

void ForgetDatabaseSyncRequests ( Oid  dbid)
extern

Definition at line 1593 of file md.c.

1594{
1595 FileTag tag;
1596 RelFileLocator rlocator;
1597
1598 rlocator.dbOid = dbid;
1599 rlocator.spcOid = 0;
1600 rlocator.relNumber = 0;
1601
1603
1604 RegisterSyncRequest(&tag, SYNC_FILTER_REQUEST, true /* retryOnError */ );
1605}
#define InvalidBlockNumber
Definition block.h:33
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
Definition md.c:101
@ InvalidForkNumber
Definition relpath.h:57
Definition sync.h:51
RelFileNumber relNumber
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
Definition sync.c:580
@ SYNC_FILTER_REQUEST
Definition sync.h:28

References RelFileLocator::dbOid, INIT_MD_FILETAG, InvalidBlockNumber, InvalidForkNumber, RegisterSyncRequest(), RelFileLocator::relNumber, RelFileLocator::spcOid, and SYNC_FILTER_REQUEST.

Referenced by createdb_failure_callback(), dbase_redo(), and dropdb().

◆ mdclose()

void mdclose ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 724 of file md.c.

725{
726 int nopensegs = reln->md_num_open_segs[forknum];
727
728 /* No work if already closed */
729 if (nopensegs == 0)
730 return;
731
732 /* close segments starting from the end */
733 while (nopensegs > 0)
734 {
735 MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
736
738 _fdvec_resize(reln, forknum, nopensegs - 1);
739 nopensegs--;
740 }
741}
void FileClose(File file)
Definition fd.c:1965
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
Definition md.c:1643
Definition md.c:92
File mdfd_vfd
Definition md.c:93

References _fdvec_resize(), fb(), FileClose(), and _MdfdVec::mdfd_vfd.

Referenced by mdexists().

◆ mdcreate()

void mdcreate ( SMgrRelation  reln,
ForkNumber  forknum,
bool  isRedo 
)
extern

Definition at line 222 of file md.c.

223{
224 MdfdVec *mdfd;
225 RelPathStr path;
226 File fd;
227
228 if (isRedo && reln->md_num_open_segs[forknum] > 0)
229 return; /* created and opened already... */
230
231 Assert(reln->md_num_open_segs[forknum] == 0);
232
233 /*
234 * We may be using the target table space for the first time in this
235 * database, so create a per-database subdirectory if needed.
236 *
237 * XXX this is a fairly ugly violation of module layering, but this seems
238 * to be the best place to put the check. Maybe TablespaceCreateDbspace
239 * should be here and not in commands/tablespace.c? But that would imply
240 * importing a lot of stuff that smgr.c oughtn't know, either.
241 */
242 TablespaceCreateDbspace(reln->smgr_rlocator.locator.spcOid,
243 reln->smgr_rlocator.locator.dbOid,
244 isRedo);
245
246 path = relpath(reln->smgr_rlocator, forknum);
247
249
250 if (fd < 0)
251 {
252 int save_errno = errno;
253
254 if (isRedo)
256 if (fd < 0)
257 {
258 /* be sure to report the error reported by create, not open */
262 errmsg("could not create file \"%s\": %m", path.str)));
263 }
264 }
265
266 _fdvec_resize(reln, forknum, 1);
267 mdfd = &reln->md_seg_fds[forknum][0];
268 mdfd->mdfd_vfd = fd;
269 mdfd->mdfd_segno = 0;
270
271 if (!SmgrIsTemp(reln))
273}
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
Definition tablespace.c:112
#define Assert(condition)
Definition c.h:873
int errcode_for_file_access(void)
Definition elog.c:886
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define ereport(elevel,...)
Definition elog.h:150
File PathNameOpenFile(const char *fileName, int fileFlags)
Definition fd.c:1562
int File
Definition fd.h:51
int mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
Definition md.c:1494
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition md.c:1518
static int _mdfd_open_flags(void)
Definition md.c:176
static int fd(const char *x, int i)
#define relpath(rlocator, forknum)
Definition relpath.h:150
#define SmgrIsTemp(smgr)
Definition smgr.h:74
char str[REL_PATH_STR_MAXLEN+1]
Definition relpath.h:123

References _fdvec_resize(), _mdfd_open_flags(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), fd(), mdfd(), PathNameOpenFile(), register_dirty_segment(), relpath, SmgrIsTemp, RelPathStr::str, and TablespaceCreateDbspace().

◆ mdexists()

bool mdexists ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 203 of file md.c.

204{
205 /*
206 * Close it first, to ensure that we notice if the fork has been unlinked
207 * since we opened it. As an optimization, we can skip that in recovery,
208 * which already closes relations when dropping them.
209 */
210 if (!InRecovery)
211 mdclose(reln, forknum);
212
213 return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL);
214}
void mdclose(SMgrRelation reln, ForkNumber forknum)
Definition md.c:724
#define EXTENSION_RETURN_NULL
Definition md.c:115
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
Definition md.c:675
bool InRecovery
Definition xlogutils.c:50

References EXTENSION_RETURN_NULL, fb(), InRecovery, mdclose(), and mdopenfork().

◆ mdextend()

void mdextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void buffer,
bool  skipFsync 
)
extern

Definition at line 487 of file md.c.

489{
490 pgoff_t seekpos;
491 int nbytes;
492 MdfdVec *v;
493
494 /* If this build supports direct I/O, the buffer must be I/O aligned. */
495 if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
496 Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
497
498 /* This assert is too expensive to have on normally ... */
499#ifdef CHECK_WRITE_VS_EXTEND
500 Assert(blocknum >= mdnblocks(reln, forknum));
501#endif
502
503 /*
504 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
505 * more --- we mustn't create a block whose number actually is
506 * InvalidBlockNumber. (Note that this failure should be unreachable
507 * because of upstream checks in bufmgr.c.)
508 */
509 if (blocknum == InvalidBlockNumber)
512 errmsg("cannot extend file \"%s\" beyond %u blocks",
513 relpath(reln->smgr_rlocator, forknum).str,
515
516 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
517
518 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
519
520 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
521
522 if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
523 {
524 if (nbytes < 0)
527 errmsg("could not extend file \"%s\": %m",
529 errhint("Check free disk space.")));
530 /* short write: complain appropriately */
533 errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
535 nbytes, BLCKSZ, blocknum),
536 errhint("Check free disk space.")));
537 }
538
539 if (!skipFsync && !SmgrIsTemp(reln))
540 register_dirty_segment(reln, forknum, v);
541
542 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
543}
uint32 BlockNumber
Definition block.h:31
#define TYPEALIGN(ALIGNVAL, LEN)
Definition c.h:819
int errhint(const char *fmt,...)
Definition elog.c:1330
int errcode(int sqlerrcode)
Definition elog.c:863
char * FilePathName(File file)
Definition fd.c:2499
static ssize_t FileWrite(File file, const void *buffer, size_t amount, pgoff_t offset, uint32 wait_event_info)
Definition fd.h:237
#define PG_O_DIRECT
Definition fd.h:123
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
Definition md.c:1883
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
Definition md.c:1234
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
Definition md.c:1754
#define EXTENSION_CREATE
Definition md.c:117
#define PG_IO_ALIGN_SIZE
off_t pgoff_t
Definition port.h:421

References _mdfd_getseg(), _mdnblocks(), Assert, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, fb(), FilePathName(), FileWrite(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), PG_IO_ALIGN_SIZE, PG_O_DIRECT, register_dirty_segment(), relpath, SmgrIsTemp, and TYPEALIGN.

Referenced by _mdfd_getseg().

◆ mdfd()

int mdfd ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
uint32 off 
)
extern

Definition at line 1494 of file md.c.

1495{
1496 MdfdVec *v = mdopenfork(reln, forknum, EXTENSION_FAIL);
1497
1498 v = _mdfd_getseg(reln, forknum, blocknum, false,
1500
1501 *off = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1502
1503 Assert(*off < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1504
1505 return FileGetRawDesc(v->mdfd_vfd);
1506}
int FileGetRawDesc(File file)
Definition fd.c:2515
#define EXTENSION_FAIL
Definition md.c:113

References _mdfd_getseg(), Assert, EXTENSION_FAIL, fb(), FileGetRawDesc(), _MdfdVec::mdfd_vfd, and mdopenfork().

Referenced by mdcreate(), and mdopenfork().

◆ mdfiletagmatches()

bool mdfiletagmatches ( const FileTag ftag,
const FileTag candidate 
)
extern

Definition at line 1974 of file md.c.

1975{
1976 /*
1977 * For now we only use filter requests as a way to drop all scheduled
1978 * callbacks relating to a given database, when dropping the database.
1979 * We'll return true for all candidates that have the same database OID as
1980 * the ftag from the SYNC_FILTER_REQUEST request, so they're forgotten.
1981 */
1982 return ftag->rlocator.dbOid == candidate->rlocator.dbOid;
1983}
RelFileLocator rlocator
Definition sync.h:54

References RelFileLocator::dbOid, fb(), and FileTag::rlocator.

◆ mdimmedsync()

void mdimmedsync ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 1441 of file md.c.

1442{
1443 int segno;
1444 int min_inactive_seg;
1445
1446 /*
1447 * NOTE: mdnblocks makes sure we have opened all active segments, so that
1448 * the loop below will get them all!
1449 */
1450 mdnblocks(reln, forknum);
1451
1452 min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1453
1454 /*
1455 * Temporarily open inactive segments, then close them after sync. There
1456 * may be some inactive segments left opened after fsync() error, but that
1457 * is harmless. We don't bother to clean them up and take a risk of
1458 * further trouble. The next mdclose() will soon close them.
1459 */
1460 while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1461 segno++;
1462
1463 while (segno > 0)
1464 {
1465 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1466
1467 /*
1468 * fsyncs done through mdimmedsync() should be tracked in a separate
1469 * IOContext than those done through mdsyncfiletag() to differentiate
1470 * between unavoidable client backend fsyncs (e.g. those done during
1471 * index build) and those which ideally would have been done by the
1472 * checkpointer. Since other IO operations bypassing the buffer
1473 * manager could also be tracked in such an IOContext, wait until
1474 * these are also tracked to track immediate fsyncs.
1475 */
1479 errmsg("could not fsync file \"%s\": %m",
1480 FilePathName(v->mdfd_vfd))));
1481
1482 /* Close inactive segments immediately */
1483 if (segno > min_inactive_seg)
1484 {
1485 FileClose(v->mdfd_vfd);
1486 _fdvec_resize(reln, forknum, segno - 1);
1487 }
1488
1489 segno--;
1490 }
1491}
int FileSync(File file, uint32 wait_event_info)
Definition fd.c:2335
int data_sync_elevel(int elevel)
Definition fd.c:3985
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
Definition md.c:1711

References _fdvec_resize(), _mdfd_openseg(), data_sync_elevel(), ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), FileClose(), FilePathName(), FileSync(), _MdfdVec::mdfd_vfd, and mdnblocks().

◆ mdinit()

void mdinit ( void  )
extern

Definition at line 190 of file md.c.

191{
193 "MdSmgr",
195}
MemoryContext TopMemoryContext
Definition mcxt.c:166
static MemoryContext MdCxt
Definition md.c:97
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, MdCxt, and TopMemoryContext.

◆ mdmaxcombine()

uint32 mdmaxcombine ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum 
)
extern

Definition at line 844 of file md.c.

846{
847 BlockNumber segoff;
848
849 segoff = blocknum % ((BlockNumber) RELSEG_SIZE);
850
851 return RELSEG_SIZE - segoff;
852}

References fb().

◆ mdnblocks()

BlockNumber mdnblocks ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 1234 of file md.c.

1235{
1236 MdfdVec *v;
1237 BlockNumber nblocks;
1238 BlockNumber segno;
1239
1240 mdopenfork(reln, forknum, EXTENSION_FAIL);
1241
1242 /* mdopen has opened the first segment */
1243 Assert(reln->md_num_open_segs[forknum] > 0);
1244
1245 /*
1246 * Start from the last open segments, to avoid redundant seeks. We have
1247 * previously verified that these segments are exactly RELSEG_SIZE long,
1248 * and it's useless to recheck that each time.
1249 *
1250 * NOTE: this assumption could only be wrong if another backend has
1251 * truncated the relation. We rely on higher code levels to handle that
1252 * scenario by closing and re-opening the md fd, which is handled via
1253 * relcache flush. (Since the checkpointer doesn't participate in
1254 * relcache flush, it could have segment entries for inactive segments;
1255 * that's OK because the checkpointer never needs to compute relation
1256 * size.)
1257 */
1258 segno = reln->md_num_open_segs[forknum] - 1;
1259 v = &reln->md_seg_fds[forknum][segno];
1260
1261 for (;;)
1262 {
1263 nblocks = _mdnblocks(reln, forknum, v);
1264 if (nblocks > ((BlockNumber) RELSEG_SIZE))
1265 elog(FATAL, "segment too big");
1266 if (nblocks < ((BlockNumber) RELSEG_SIZE))
1267 return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
1268
1269 /*
1270 * If segment is exactly RELSEG_SIZE, advance to next one.
1271 */
1272 segno++;
1273
1274 /*
1275 * We used to pass O_CREAT here, but that has the disadvantage that it
1276 * might create a segment which has vanished through some operating
1277 * system misadventure. In such a case, creating the segment here
1278 * undermines _mdfd_getseg's attempts to notice and report an error
1279 * upon access to a missing segment.
1280 */
1281 v = _mdfd_openseg(reln, forknum, segno, 0);
1282 if (v == NULL)
1283 return segno * ((BlockNumber) RELSEG_SIZE);
1284 }
1285}
#define FATAL
Definition elog.h:41
#define elog(elevel,...)
Definition elog.h:226

References _mdfd_openseg(), _mdnblocks(), Assert, elog, EXTENSION_FAIL, FATAL, fb(), and mdopenfork().

Referenced by mdextend(), mdimmedsync(), mdregistersync(), mdwritev(), and mdzeroextend().

◆ mdopen()

void mdopen ( SMgrRelation  reln)
extern

Definition at line 713 of file md.c.

714{
715 /* mark it not open */
716 for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
717 reln->md_num_open_segs[forknum] = 0;
718}

References fb(), and MAX_FORKNUM.

◆ mdprefetch()

bool mdprefetch ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks 
)
extern

Definition at line 747 of file md.c.

749{
750#ifdef USE_PREFETCH
751
753
754 if ((uint64) blocknum + nblocks > (uint64) MaxBlockNumber + 1)
755 return false;
756
757 while (nblocks > 0)
758 {
759 pgoff_t seekpos;
760 MdfdVec *v;
762
763 v = _mdfd_getseg(reln, forknum, blocknum, false,
765 if (v == NULL)
766 return false;
767
768 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
769
770 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
771
773 Min(nblocks,
774 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
775
778
779 blocknum += nblocks_this_segment;
780 nblocks -= nblocks_this_segment;
781 }
782#endif /* USE_PREFETCH */
783
784 return true;
785}
#define MaxBlockNumber
Definition block.h:35
#define Min(x, y)
Definition c.h:997
uint64_t uint64
Definition c.h:547
int io_direct_flags
Definition fd.c:171
int FilePrefetch(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition fd.c:2066
#define IO_DIRECT_DATA
Definition fd.h:54

References _mdfd_getseg(), Assert, EXTENSION_FAIL, EXTENSION_RETURN_NULL, fb(), FilePrefetch(), InRecovery, IO_DIRECT_DATA, io_direct_flags, MaxBlockNumber, _MdfdVec::mdfd_vfd, and Min.

◆ mdreadv()

void mdreadv ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void **  buffers,
BlockNumber  nblocks 
)
extern

Definition at line 858 of file md.c.

860{
861 while (nblocks > 0)
862 {
863 struct iovec iov[PG_IOV_MAX];
864 int iovcnt;
865 pgoff_t seekpos;
866 int nbytes;
867 MdfdVec *v;
870 size_t size_this_segment;
871
872 v = _mdfd_getseg(reln, forknum, blocknum, false,
874
875 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
876
877 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
878
880 Min(nblocks,
881 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
883
884 if (nblocks_this_segment != nblocks)
885 elog(ERROR, "read crosses segment boundary");
886
890
891 /*
892 * Inner loop to continue after a short read. We'll keep going until
893 * we hit EOF rather than assuming that a short read means we hit the
894 * end.
895 */
896 for (;;)
897 {
898 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
899 reln->smgr_rlocator.locator.spcOid,
900 reln->smgr_rlocator.locator.dbOid,
901 reln->smgr_rlocator.locator.relNumber,
902 reln->smgr_rlocator.backend);
903 nbytes = FileReadV(v->mdfd_vfd, iov, iovcnt, seekpos,
905 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
906 reln->smgr_rlocator.locator.spcOid,
907 reln->smgr_rlocator.locator.dbOid,
908 reln->smgr_rlocator.locator.relNumber,
909 reln->smgr_rlocator.backend,
910 nbytes,
912
913#ifdef SIMULATE_SHORT_READ
914 nbytes = Min(nbytes, 4096);
915#endif
916
917 if (nbytes < 0)
920 errmsg("could not read blocks %u..%u in file \"%s\": %m",
921 blocknum,
922 blocknum + nblocks_this_segment - 1,
923 FilePathName(v->mdfd_vfd))));
924
925 if (nbytes == 0)
926 {
927 /*
928 * We are at or past EOF, or we read a partial block at EOF.
929 * Normally this is an error; upper levels should never try to
930 * read a nonexistent block. However, if zero_damaged_pages
931 * is ON or we are InRecovery, we should instead return zeroes
932 * without complaining. This allows, for example, the case of
933 * trying to update a block that was later truncated away.
934 *
935 * NB: We think that this codepath is unreachable in recovery
936 * and incomplete with zero_damaged_pages, as missing segments
937 * are not created. Putting blocks into the buffer-pool that
938 * do not exist on disk is rather problematic, as it will not
939 * be found by scans that rely on smgrnblocks(), as they are
940 * beyond EOF. It also can cause weird problems with relation
941 * extension, as relation extension does not expect blocks
942 * beyond EOF to exist.
943 *
944 * Therefore we do not want to copy the logic into
945 * mdstartreadv(), where it would have to be more complicated
946 * due to potential differences in the zero_damaged_pages
947 * setting between the definer and completor of IO.
948 *
949 * For PG 18, we are putting an Assert(false) in mdreadv()
950 * (triggering failures in assertion-enabled builds, but
951 * continuing to work in production builds). Afterwards we
952 * plan to remove this code entirely.
953 */
955 {
956 Assert(false); /* see comment above */
957
960 ++i)
961 memset(buffers[i], 0, BLCKSZ);
962 break;
963 }
964 else
967 errmsg("could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
968 blocknum,
969 blocknum + nblocks_this_segment - 1,
973 }
974
975 /* One loop should usually be enough. */
976 transferred_this_segment += nbytes;
979 break;
980
981 /* Adjust position and vectors after a short read. */
982 seekpos += nbytes;
984 }
985
986 nblocks -= nblocks_this_segment;
987 buffers += nblocks_this_segment;
988 blocknum += nblocks_this_segment;
989 }
990}
bool zero_damaged_pages
Definition bufmgr.c:173
#define lengthof(array)
Definition c.h:803
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition fd.c:2148
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
Definition file_utils.c:614
#define EXTENSION_CREATE_RECOVERY
Definition md.c:119
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
Definition md.c:795
#define ERRCODE_DATA_CORRUPTED
#define PG_IOV_MAX
Definition pg_iovec.h:47

References _mdfd_getseg(), Assert, buffers_to_iovec(), compute_remaining_iovec(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, fb(), FilePathName(), FileReadV(), i, InRecovery, lengthof, _MdfdVec::mdfd_vfd, Min, PG_IOV_MAX, and zero_damaged_pages.

◆ mdregistersync()

void mdregistersync ( SMgrRelation  reln,
ForkNumber  forknum 
)
extern

Definition at line 1390 of file md.c.

1391{
1392 int segno;
1393 int min_inactive_seg;
1394
1395 /*
1396 * NOTE: mdnblocks makes sure we have opened all active segments, so that
1397 * the loop below will get them all!
1398 */
1399 mdnblocks(reln, forknum);
1400
1401 min_inactive_seg = segno = reln->md_num_open_segs[forknum];
1402
1403 /*
1404 * Temporarily open inactive segments, then close them after sync. There
1405 * may be some inactive segments left opened after error, but that is
1406 * harmless. We don't bother to clean them up and take a risk of further
1407 * trouble. The next mdclose() will soon close them.
1408 */
1409 while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
1410 segno++;
1411
1412 while (segno > 0)
1413 {
1414 MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
1415
1416 register_dirty_segment(reln, forknum, v);
1417
1418 /* Close inactive segments immediately */
1419 if (segno > min_inactive_seg)
1420 {
1421 FileClose(v->mdfd_vfd);
1422 _fdvec_resize(reln, forknum, segno - 1);
1423 }
1424
1425 segno--;
1426 }
1427}

References _fdvec_resize(), _mdfd_openseg(), fb(), FileClose(), _MdfdVec::mdfd_vfd, mdnblocks(), and register_dirty_segment().

◆ mdstartreadv()

void mdstartreadv ( PgAioHandle ioh,
SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
void **  buffers,
BlockNumber  nblocks 
)
extern

Definition at line 996 of file md.c.

999{
1000 pgoff_t seekpos;
1001 MdfdVec *v;
1003 struct iovec *iov;
1004 int iovcnt;
1005 int ret;
1006
1007 v = _mdfd_getseg(reln, forknum, blocknum, false,
1009
1010 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1011
1012 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1013
1015 Min(nblocks,
1016 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
1017
1018 if (nblocks_this_segment != nblocks)
1019 elog(ERROR, "read crossing segment boundary");
1020
1022
1023 Assert(nblocks <= iovcnt);
1024
1026
1028
1031
1033 reln,
1034 forknum,
1035 blocknum,
1036 nblocks,
1037 false);
1039
1041 if (ret != 0)
1042 ereport(ERROR,
1044 errmsg("could not start reading blocks %u..%u in file \"%s\": %m",
1045 blocknum,
1046 blocknum + nblocks_this_segment - 1,
1047 FilePathName(v->mdfd_vfd))));
1048
1049 /*
1050 * The error checks corresponding to the post-read checks in mdreadv() are
1051 * in md_readv_complete().
1052 *
1053 * However we chose, at least for now, to not implement the
1054 * zero_damaged_pages logic present in mdreadv(). As outlined in mdreadv()
1055 * that logic is rather problematic, and we want to get rid of it. Here
1056 * equivalent logic would have to be more complicated due to potential
1057 * differences in the zero_damaged_pages setting between the definer and
1058 * completor of IO.
1059 */
1060}
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition aio.c:330
@ PGAIO_HCB_MD_READV
Definition aio.h:196
@ PGAIO_HF_BUFFERED
Definition aio.h:77
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
int pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov)
Definition aio_io.c:42
int FileStartReadV(PgAioHandle *ioh, File file, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition fd.c:2204
void pgaio_io_set_target_smgr(PgAioHandle *ioh, SMgrRelationData *smgr, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skip_fsync)
Definition smgr.c:1038

References _mdfd_getseg(), Assert, buffers_to_iovec(), elog, ereport, errcode_for_file_access(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, fb(), FilePathName(), FileStartReadV(), IO_DIRECT_DATA, io_direct_flags, _MdfdVec::mdfd_vfd, Min, PGAIO_HCB_MD_READV, PGAIO_HF_BUFFERED, pgaio_io_get_iovec(), pgaio_io_register_callbacks(), pgaio_io_set_flag(), and pgaio_io_set_target_smgr().

◆ mdsyncfiletag()

int mdsyncfiletag ( const FileTag ftag,
char path 
)
extern

Definition at line 1904 of file md.c.

1905{
1907 File file;
1909 bool need_to_close;
1910 int result,
1911 save_errno;
1912
1913 /* See if we already have the file open, or need to open it. */
1914 if (ftag->segno < reln->md_num_open_segs[ftag->forknum])
1915 {
1916 file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd;
1917 strlcpy(path, FilePathName(file), MAXPGPATH);
1918 need_to_close = false;
1919 }
1920 else
1921 {
1922 MdPathStr p;
1923
1924 p = _mdfd_segpath(reln, ftag->forknum, ftag->segno);
1925 strlcpy(path, p.str, MD_PATH_STR_MAXLEN);
1926
1927 file = PathNameOpenFile(path, _mdfd_open_flags());
1928 if (file < 0)
1929 return -1;
1930 need_to_close = true;
1931 }
1932
1934
1935 /* Sync the file. */
1936 result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1937 save_errno = errno;
1938
1939 if (need_to_close)
1940 FileClose(file);
1941
1943 IOOP_FSYNC, io_start, 1, 0);
1944
1945 errno = save_errno;
1946 return result;
1947}
bool track_io_timing
Definition bufmgr.c:176
static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
Definition md.c:1691
#define MD_PATH_STR_MAXLEN
Definition md.c:132
#define MAXPGPATH
@ IOOBJECT_RELATION
Definition pgstat.h:277
@ IOCONTEXT_NORMAL
Definition pgstat.h:289
@ IOOP_FSYNC
Definition pgstat.h:308
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition pgstat_io.c:122
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
int16 forknum
Definition sync.h:53
uint64 segno
Definition sync.h:55
char str[MD_PATH_STR_MAXLEN+1]
Definition md.c:140

References _mdfd_open_flags(), _mdfd_segpath(), fb(), FileClose(), FilePathName(), FileSync(), FileTag::forknum, INVALID_PROC_NUMBER, IOCONTEXT_NORMAL, IOOBJECT_RELATION, IOOP_FSYNC, MAXPGPATH, MD_PATH_STR_MAXLEN, PathNameOpenFile(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), FileTag::rlocator, FileTag::segno, smgropen(), MdPathStr::str, strlcpy(), and track_io_timing.

◆ mdtruncate()

void mdtruncate ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  curnblk,
BlockNumber  nblocks 
)
extern

Definition at line 1301 of file md.c.

1303{
1305 int curopensegs;
1306
1307 if (nblocks > curnblk)
1308 {
1309 /* Bogus request ... but no complaint if InRecovery */
1310 if (InRecovery)
1311 return;
1312 ereport(ERROR,
1313 (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1314 relpath(reln->smgr_rlocator, forknum).str,
1315 nblocks, curnblk)));
1316 }
1317 if (nblocks == curnblk)
1318 return; /* no work */
1319
1320 /*
1321 * Truncate segments, starting at the last one. Starting at the end makes
1322 * managing the memory for the fd array easier, should there be errors.
1323 */
1324 curopensegs = reln->md_num_open_segs[forknum];
1325 while (curopensegs > 0)
1326 {
1327 MdfdVec *v;
1328
1330
1331 v = &reln->md_seg_fds[forknum][curopensegs - 1];
1332
1333 if (priorblocks > nblocks)
1334 {
1335 /*
1336 * This segment is no longer active. We truncate the file, but do
1337 * not delete it, for reasons explained in the header comments.
1338 */
1340 ereport(ERROR,
1342 errmsg("could not truncate file \"%s\": %m",
1343 FilePathName(v->mdfd_vfd))));
1344
1345 if (!SmgrIsTemp(reln))
1346 register_dirty_segment(reln, forknum, v);
1347
1348 /* we never drop the 1st segment */
1349 Assert(v != &reln->md_seg_fds[forknum][0]);
1350
1351 FileClose(v->mdfd_vfd);
1352 _fdvec_resize(reln, forknum, curopensegs - 1);
1353 }
1354 else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
1355 {
1356 /*
1357 * This is the last segment we want to keep. Truncate the file to
1358 * the right length. NOTE: if nblocks is exactly a multiple K of
1359 * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
1360 * keep it. This adheres to the invariant given in the header
1361 * comments.
1362 */
1364
1366 ereport(ERROR,
1368 errmsg("could not truncate file \"%s\" to %u blocks: %m",
1370 nblocks)));
1371 if (!SmgrIsTemp(reln))
1372 register_dirty_segment(reln, forknum, v);
1373 }
1374 else
1375 {
1376 /*
1377 * We still need this segment, so nothing to do for this and any
1378 * earlier segment.
1379 */
1380 break;
1381 }
1382 curopensegs--;
1383 }
1384}
int FileTruncate(File file, pgoff_t offset, uint32 wait_event_info)
Definition fd.c:2464

References _fdvec_resize(), Assert, ereport, errcode_for_file_access(), errmsg(), ERROR, fb(), FileClose(), FilePathName(), FileTruncate(), InRecovery, _MdfdVec::mdfd_vfd, register_dirty_segment(), relpath, and SmgrIsTemp.

◆ mdunlink()

void mdunlink ( RelFileLocatorBackend  rlocator,
ForkNumber  forknum,
bool  isRedo 
)
extern

Definition at line 337 of file md.c.

338{
339 /* Now do the per-fork work */
340 if (forknum == InvalidForkNumber)
341 {
342 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
343 mdunlinkfork(rlocator, forknum, isRedo);
344 }
345 else
346 mdunlinkfork(rlocator, forknum, isRedo);
347}
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
Definition md.c:374

References fb(), InvalidForkNumber, MAX_FORKNUM, and mdunlinkfork().

◆ mdunlinkfiletag()

int mdunlinkfiletag ( const FileTag ftag,
char path 
)
extern

Definition at line 1956 of file md.c.

1957{
1958 RelPathStr p;
1959
1960 /* Compute the path. */
1961 p = relpathperm(ftag->rlocator, MAIN_FORKNUM);
1962 strlcpy(path, p.str, MAXPGPATH);
1963
1964 /* Try to unlink the file. */
1965 return unlink(path);
1966}
@ MAIN_FORKNUM
Definition relpath.h:58
#define relpathperm(rlocator, forknum)
Definition relpath.h:146

References fb(), MAIN_FORKNUM, MAXPGPATH, relpathperm, FileTag::rlocator, RelPathStr::str, and strlcpy().

◆ mdwriteback()

void mdwriteback ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
BlockNumber  nblocks 
)
extern

Definition at line 1175 of file md.c.

1177{
1179
1180 /*
1181 * Issue flush requests in as few requests as possible; have to split at
1182 * segment boundaries though, since those are actually separate files.
1183 */
1184 while (nblocks > 0)
1185 {
1186 BlockNumber nflush = nblocks;
1187 pgoff_t seekpos;
1188 MdfdVec *v;
1189 int segnum_start,
1190 segnum_end;
1191
1192 v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
1194
1195 /*
1196 * We might be flushing buffers of already removed relations, that's
1197 * ok, just ignore that case. If the segment file wasn't open already
1198 * (ie from a recent mdwrite()), then we don't want to re-open it, to
1199 * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave
1200 * us with a descriptor to a file that is about to be unlinked.
1201 */
1202 if (!v)
1203 return;
1204
1205 /* compute offset inside the current segment */
1206 segnum_start = blocknum / RELSEG_SIZE;
1207
1208 /* compute number of desired writes within the current segment */
1209 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1210 if (segnum_start != segnum_end)
1211 nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
1212
1213 Assert(nflush >= 1);
1214 Assert(nflush <= nblocks);
1215
1216 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1217
1219
1220 nblocks -= nflush;
1221 blocknum += nflush;
1222 }
1223}
void FileWriteback(File file, pgoff_t offset, pgoff_t nbytes, uint32 wait_event_info)
Definition fd.c:2122
#define EXTENSION_DONT_OPEN
Definition md.c:121

References _mdfd_getseg(), Assert, EXTENSION_DONT_OPEN, fb(), FileWriteback(), IO_DIRECT_DATA, io_direct_flags, and _MdfdVec::mdfd_vfd.

◆ mdwritev()

void mdwritev ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
const void **  buffers,
BlockNumber  nblocks,
bool  skipFsync 
)
extern

Definition at line 1070 of file md.c.

1072{
1073 /* This assert is too expensive to have on normally ... */
1074#ifdef CHECK_WRITE_VS_EXTEND
1075 Assert((uint64) blocknum + (uint64) nblocks <= (uint64) mdnblocks(reln, forknum));
1076#endif
1077
1078 while (nblocks > 0)
1079 {
1080 struct iovec iov[PG_IOV_MAX];
1081 int iovcnt;
1082 pgoff_t seekpos;
1083 int nbytes;
1084 MdfdVec *v;
1087 size_t size_this_segment;
1088
1089 v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
1091
1092 seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
1093
1094 Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
1095
1097 Min(nblocks,
1098 RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
1100
1101 if (nblocks_this_segment != nblocks)
1102 elog(ERROR, "write crosses segment boundary");
1103
1104 iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
1107
1108 /*
1109 * Inner loop to continue after a short write. If the reason is that
1110 * we're out of disk space, a future attempt should get an ENOSPC
1111 * error from the kernel.
1112 */
1113 for (;;)
1114 {
1115 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
1116 reln->smgr_rlocator.locator.spcOid,
1117 reln->smgr_rlocator.locator.dbOid,
1118 reln->smgr_rlocator.locator.relNumber,
1119 reln->smgr_rlocator.backend);
1120 nbytes = FileWriteV(v->mdfd_vfd, iov, iovcnt, seekpos,
1122 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
1123 reln->smgr_rlocator.locator.spcOid,
1124 reln->smgr_rlocator.locator.dbOid,
1125 reln->smgr_rlocator.locator.relNumber,
1126 reln->smgr_rlocator.backend,
1127 nbytes,
1129
1130#ifdef SIMULATE_SHORT_WRITE
1131 nbytes = Min(nbytes, 4096);
1132#endif
1133
1134 if (nbytes < 0)
1135 {
1136 bool enospc = errno == ENOSPC;
1137
1138 ereport(ERROR,
1140 errmsg("could not write blocks %u..%u in file \"%s\": %m",
1141 blocknum,
1142 blocknum + nblocks_this_segment - 1,
1144 enospc ? errhint("Check free disk space.") : 0));
1145 }
1146
1147 /* One loop should usually be enough. */
1148 transferred_this_segment += nbytes;
1151 break;
1152
1153 /* Adjust position and iovecs after a short write. */
1154 seekpos += nbytes;
1156 }
1157
1158 if (!skipFsync && !SmgrIsTemp(reln))
1159 register_dirty_segment(reln, forknum, v);
1160
1161 nblocks -= nblocks_this_segment;
1162 buffers += nblocks_this_segment;
1163 blocknum += nblocks_this_segment;
1164 }
1165}
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info)
Definition fd.c:2230

References _mdfd_getseg(), Assert, buffers_to_iovec(), compute_remaining_iovec(), elog, ereport, errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE_RECOVERY, EXTENSION_FAIL, fb(), FilePathName(), FileWriteV(), lengthof, _MdfdVec::mdfd_vfd, mdnblocks(), Min, PG_IOV_MAX, register_dirty_segment(), and SmgrIsTemp.

◆ mdzeroextend()

void mdzeroextend ( SMgrRelation  reln,
ForkNumber  forknum,
BlockNumber  blocknum,
int  nblocks,
bool  skipFsync 
)
extern

Definition at line 552 of file md.c.

554{
555 MdfdVec *v;
556 BlockNumber curblocknum = blocknum;
557 int remblocks = nblocks;
558
559 Assert(nblocks > 0);
560
561 /* This assert is too expensive to have on normally ... */
562#ifdef CHECK_WRITE_VS_EXTEND
563 Assert(blocknum >= mdnblocks(reln, forknum));
564#endif
565
566 /*
567 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
568 * more --- we mustn't create a block whose number actually is
569 * InvalidBlockNumber or larger.
570 */
571 if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
574 errmsg("cannot extend file \"%s\" beyond %u blocks",
575 relpath(reln->smgr_rlocator, forknum).str,
577
578 while (remblocks > 0)
579 {
581 pgoff_t seekpos = (pgoff_t) BLCKSZ * segstartblock;
582 int numblocks;
583
586 else
588
590
593
594 /*
595 * If available and useful, use posix_fallocate() (via
596 * FileFallocate()) to extend the relation. That's often more
597 * efficient than using write(), as it commonly won't cause the kernel
598 * to allocate page cache space for the extended pages.
599 *
600 * However, we don't use FileFallocate() for small extensions, as it
601 * defeats delayed allocation on some filesystems. Not clear where
602 * that decision should be made though? For now just use a cutoff of
603 * 8, anything between 4 and 8 worked OK in some local testing.
604 */
605 if (numblocks > 8 &&
607 {
608 int ret = 0;
609
610#ifdef HAVE_POSIX_FALLOCATE
612 {
613 ret = FileFallocate(v->mdfd_vfd,
614 seekpos, (pgoff_t) BLCKSZ * numblocks,
616 }
617 else
618#endif
619 {
620 elog(ERROR, "unsupported file_extend_method: %d",
622 }
623 if (ret != 0)
624 {
627 errmsg("could not extend file \"%s\" with FileFallocate(): %m",
629 errhint("Check free disk space."));
630 }
631 }
632 else
633 {
634 int ret;
635
636 /*
637 * Even if we don't want to use fallocate, we can still extend a
638 * bit more efficiently than writing each 8kB block individually.
639 * pg_pwrite_zeros() (via FileZero()) uses pg_pwritev_with_retry()
640 * to avoid multiple writes or needing a zeroed buffer for the
641 * whole length of the extension.
642 */
643 ret = FileZero(v->mdfd_vfd,
644 seekpos, (pgoff_t) BLCKSZ * numblocks,
646 if (ret < 0)
649 errmsg("could not extend file \"%s\": %m",
651 errhint("Check free disk space."));
652 }
653
654 if (!skipFsync && !SmgrIsTemp(reln))
655 register_dirty_segment(reln, forknum, v);
656
657 Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
658
661 }
662}
int file_extend_method
Definition fd.c:168
int FileFallocate(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition fd.c:2407
int FileZero(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
Definition fd.c:2362
@ FILE_EXTEND_METHOD_WRITE_ZEROS
Definition fd.h:63

References _mdfd_getseg(), _mdnblocks(), Assert, elog, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, EXTENSION_CREATE, fb(), file_extend_method, FILE_EXTEND_METHOD_WRITE_ZEROS, FileFallocate(), FilePathName(), FileZero(), InvalidBlockNumber, _MdfdVec::mdfd_vfd, mdnblocks(), register_dirty_segment(), relpath, and SmgrIsTemp.

Variable Documentation

◆ aio_md_readv_cb

PGDLLIMPORT const PgAioHandleCallbacks aio_md_readv_cb
extern

Definition at line 169 of file md.c.

169 {
170 .complete_shared = md_readv_complete,
171 .report = md_readv_report,
172};
static void md_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
Definition md.c:2056
static PgAioResult md_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition md.c:1989