112 #define BootstrapTimeLineID 1
141 bool XLOG_DEBUG =
false;
151 #define NUM_XLOGINSERT_LOCKS 8
168 #ifdef HAVE_FSYNC_WRITETHROUGH
171 #ifdef HAVE_FDATASYNC
174 #ifdef OPEN_SYNC_FLAG
177 #ifdef OPEN_DATASYNC_FLAG
572 #define INSERT_FREESPACE(endptr) \
573 (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
576 #define NextBufIdx(idx) \
577 (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
583 #define XLogRecPtrToBufIdx(recptr) \
584 (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
589 #define UsableBytesInPage (XLOG_BLCKSZ - SizeOfXLogShortPHD)
595 #define ConvertToXSegs(x, segsize) XLogMBVarToSegs((x), (segsize))
732 bool topxid_included)
739 bool isLogSwitch = (rechdr->
xl_rmid == RM_XLOG_ID &&
751 elog(
ERROR,
"cannot make new WAL entries during recovery");
821 (!prevDoPageWrites ||
852 rdata_crc = rechdr->
xl_crc;
855 rechdr->
xl_crc = rdata_crc;
862 StartPos, EndPos, insertTLI);
904 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
922 TRACE_POSTGRESQL_WAL_SWITCH();
933 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
937 if (offset == EndPos % XLOG_BLCKSZ)
953 char *errormsg = NULL;
967 for (; rdata != NULL; rdata = rdata->
next)
990 errormsg ? errormsg :
"no error message");
996 debug_reader->
record = decoded;
998 debug_reader->
record = NULL;
1045 uint64 startbytepos;
1066 startbytepos =
Insert->CurrBytePos;
1067 endbytepos = startbytepos + size;
1068 prevbytepos =
Insert->PrevBytePos;
1069 Insert->CurrBytePos = endbytepos;
1070 Insert->PrevBytePos = startbytepos;
1100 uint64 startbytepos;
1115 startbytepos =
Insert->CurrBytePos;
1121 *EndPos = *StartPos = ptr;
1125 endbytepos = startbytepos + size;
1126 prevbytepos =
Insert->PrevBytePos;
1138 Insert->CurrBytePos = endbytepos;
1139 Insert->PrevBytePos = startbytepos;
1183 while (rdata != NULL)
1185 char *rdata_data = rdata->
data;
1186 int rdata_len = rdata->
len;
1188 while (rdata_len > freespace)
1194 memcpy(currpos, rdata_data, freespace);
1195 rdata_data += freespace;
1196 rdata_len -= freespace;
1197 written += freespace;
1198 CurrPos += freespace;
1229 memcpy(currpos, rdata_data, rdata_len);
1230 currpos += rdata_len;
1231 CurrPos += rdata_len;
1232 freespace -= rdata_len;
1233 written += rdata_len;
1235 rdata = rdata->
next;
1237 Assert(written == write_len);
1253 CurrPos += freespace;
1261 while (CurrPos < EndPos)
1285 CurrPos += XLOG_BLCKSZ;
1294 if (CurrPos != EndPos)
1295 elog(
PANIC,
"space reserved for WAL record does not match what was written");
1317 static int lockToTry = -1;
1319 if (lockToTry == -1)
1444 elog(
PANIC,
"cannot wait without a PGPROC structure");
1448 bytepos =
Insert->CurrBytePos;
1460 if (upto > reservedUpto)
1463 (
errmsg(
"request to flush past end of generated WAL; request %X/%X, current position %X/%X",
1465 upto = reservedUpto;
1477 finishedUpto = reservedUpto;
1498 insertingat, &insertingat))
1509 }
while (insertingat < upto);
1512 finishedUpto = insertingat;
1514 return finishedUpto;
1538 static uint64 cachedPage = 0;
1539 static char *cachedPos = NULL;
1546 if (ptr / XLOG_BLCKSZ == cachedPage)
1550 return cachedPos + ptr % XLOG_BLCKSZ;
1577 expectedEndPtr = ptr;
1578 expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1581 if (expectedEndPtr != endptr)
1606 initializedUpto = ptr;
1613 if (expectedEndPtr != endptr)
1614 elog(
PANIC,
"could not find WAL buffer for %X/%X",
1630 cachedPage = ptr / XLOG_BLCKSZ;
1636 return cachedPos + ptr % XLOG_BLCKSZ;
1664 seg_offset = XLOG_BLCKSZ;
1707 seg_offset = XLOG_BLCKSZ;
1714 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1738 offset = ptr % XLOG_BLCKSZ;
1842 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
1843 WriteRqst.
Write = OldPageRqstPtr;
1844 WriteRqst.
Flush = 0;
1848 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
1861 NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
1871 MemSet((
char *) NewPage, 0, XLOG_BLCKSZ);
1879 NewPage->xlp_tli = tli;
1880 NewPage->xlp_pageaddr = NewPageBeginPtr;
1897 if (!
Insert->forcePageWrites)
1929 if (XLOG_DEBUG && npages > 0)
1931 elog(
DEBUG1,
"initialized %d pages, up to %X/%X",
2018 recycleSegNo = (
XLogSegNo) ceil(((
double) lastredoptr + distance) /
2021 if (recycleSegNo < minSegNo)
2022 recycleSegNo = minSegNo;
2023 if (recycleSegNo > maxSegNo)
2024 recycleSegNo = maxSegNo;
2026 return recycleSegNo;
2066 bool last_iteration;
2111 elog(
PANIC,
"xlog write request %X/%X is past end of log %X/%X",
2166 finishing_seg = !ispartialpage &&
2169 if (last_iteration ||
2181 nbytes = npages * (
Size) XLOG_BLCKSZ;
2224 errmsg(
"could not write to log file %s "
2225 "at offset %u, length %zu: %m",
2226 xlogfname, startoffset, nleft)));
2230 startoffset += written;
2231 }
while (nleft > 0);
2288 if (flexible && npages == 0)
2374 WriteRqstPtr -= WriteRqstPtr % XLOG_BLCKSZ;
2479 if (!force && newMinRecoveryPoint < lsn)
2481 "xlog min recovery request %X/%X is past current point %X/%X",
2494 (
errmsg_internal(
"updated min recovery point to %X/%X on timeline %u",
2496 newMinRecoveryPointTLI)));
2534 elog(
LOG,
"xlog flush request %X/%X; write %X/%X; flush %X/%X",
2551 WriteRqstPtr = record;
2563 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2631 WriteRqst.
Write = insertpos;
2632 WriteRqst.
Flush = insertpos;
2669 "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2702 bool flexible =
true;
2725 WriteRqst.
Write -= WriteRqst.
Write % XLOG_BLCKSZ;
2787 WriteRqst.
Flush = 0;
2792 elog(
LOG,
"xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
2808 XLogWrite(WriteRqst, insertTLI, flexible);
2918 bool *added,
char *path)
2938 if (errno != ENOENT)
2941 errmsg(
"could not open file \"%s\": %m", path)));
2952 elog(
DEBUG2,
"creating and filling new WAL file");
2963 errmsg(
"could not create file \"%s\": %m", tmppath)));
2965 memset(zbuffer.
data, 0, XLOG_BLCKSZ);
2993 for (
int i = 0;
i < blocks;)
2996 off_t offset =
i * XLOG_BLCKSZ;
3017 save_errno = errno ? errno : ENOSPC;
3035 errmsg(
"could not write to file \"%s\": %m", tmppath)));
3041 int save_errno = errno;
3047 errmsg(
"could not fsync file \"%s\": %m", tmppath)));
3054 errmsg(
"could not close file \"%s\": %m", tmppath)));
3061 installed_segno = logsegno;
3077 elog(
DEBUG2,
"done creating and filling new WAL file");
3123 errmsg(
"could not open file \"%s\": %m", path)));
3162 errmsg(
"could not open file \"%s\": %m", path)));
3176 errmsg(
"could not create file \"%s\": %m", tmppath)));
3185 nread = upto - nbytes;
3191 if (nread <
sizeof(buffer))
3192 memset(buffer.
data, 0,
sizeof(buffer));
3198 if (nread >
sizeof(buffer))
3199 nread =
sizeof(buffer);
3201 r =
read(srcfd, buffer.
data, nread);
3207 errmsg(
"could not read file \"%s\": %m",
3212 errmsg(
"could not read file \"%s\": read %d of %zu",
3213 path, r, (
Size) nread)));
3219 if ((
int)
write(
fd, buffer.
data,
sizeof(buffer)) != (
int)
sizeof(buffer))
3221 int save_errno = errno;
3228 errno = save_errno ? save_errno : ENOSPC;
3232 errmsg(
"could not write to file \"%s\": %m", tmppath)));
3241 errmsg(
"could not fsync file \"%s\": %m", tmppath)));
3247 errmsg(
"could not close file \"%s\": %m", tmppath)));
3252 errmsg(
"could not close file \"%s\": %m", path)));
3258 elog(
ERROR,
"InstallXLogFileSegment should not have failed");
3292 struct stat stat_buf;
3313 while (
stat(path, &stat_buf) == 0)
3315 if ((*segno) >= max_segno)
3357 errmsg(
"could not open file \"%s\": %m", path)));
3376 #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3378 (void) posix_fadvise(
openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3384 int save_errno = errno;
3390 errmsg(
"could not close file \"%s\": %m", xlogfname)));
3456 int save_errno = errno;
3463 if (segno <= lastRemovedSegNo)
3471 errmsg(
"requested WAL segment %s has already been removed",
3493 return lastRemovedSegNo;
3527 elog(
DEBUG2,
"removing all temporary WAL segments");
3534 if (strncmp(xlde->
d_name,
"xlogtemp.", 9) != 0)
3539 elog(
DEBUG2,
"removed temporary WAL segment \"%s\"", path);
3575 elog(
DEBUG2,
"attempting to remove WAL segments older than log file %s",
3598 if (strcmp(xlde->
d_name + 8, lastoff + 8) <= 0)
3645 recycleSegNo = endLogSegNo + 10;
3652 elog(
DEBUG2,
"attempting to remove WAL segments newer than log file %s",
3668 if (strncmp(xlde->
d_name, switchseg, 8) < 0 &&
3669 strcmp(xlde->
d_name + 8, switchseg + 8) > 0)
3707 struct stat statbuf;
3717 *endlogSegNo <= recycleSegNo &&
3721 true, recycleSegNo, insertTLI))
3752 if (rename(path, newpath) != 0)
3756 errmsg(
"could not rename file \"%s\": %m",
3792 struct stat stat_buf;
3798 (
errmsg(
"required WAL directory \"%s\" does not exist",
3803 if (
stat(path, &stat_buf) == 0)
3808 (
errmsg(
"required WAL directory \"%s\" does not exist",
3814 (
errmsg(
"creating missing WAL directory \"%s\"", path)));
3817 (
errmsg(
"could not create missing directory \"%s\": %m",
3842 elog(
DEBUG2,
"removing WAL backup history file \"%s\"",
3883 (
errcode(ERRCODE_INTERNAL_ERROR),
3884 errmsg(
"could not generate secret authorization token")));
3916 "pg_control is too large for atomic disk writes");
3918 "sizeof(ControlFileData) exceeds PG_CONTROL_FILE_SIZE");
3964 errmsg(
"could not create file \"%s\": %m",
3976 errmsg(
"could not write to file \"%s\": %m",
3985 errmsg(
"could not fsync file \"%s\": %m",
3992 errmsg(
"could not close file \"%s\": %m",
4001 static char wal_segsz_str[20];
4012 errmsg(
"could not open file \"%s\": %m",
4022 errmsg(
"could not read file \"%s\": %m",
4027 errmsg(
"could not read file \"%s\": read %d of %zu",
4043 (
errmsg(
"database files are incompatible with server"),
4044 errdetail(
"The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4045 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4048 errhint(
"This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4052 (
errmsg(
"database files are incompatible with server"),
4053 errdetail(
"The database cluster was initialized with PG_CONTROL_VERSION %d,"
4054 " but the server was compiled with PG_CONTROL_VERSION %d.",
4056 errhint(
"It looks like you need to initdb.")));
4067 (
errmsg(
"incorrect checksum in control file")));
4076 (
errmsg(
"database files are incompatible with server"),
4077 errdetail(
"The database cluster was initialized with CATALOG_VERSION_NO %d,"
4078 " but the server was compiled with CATALOG_VERSION_NO %d.",
4080 errhint(
"It looks like you need to initdb.")));
4083 (
errmsg(
"database files are incompatible with server"),
4084 errdetail(
"The database cluster was initialized with MAXALIGN %d,"
4085 " but the server was compiled with MAXALIGN %d.",
4087 errhint(
"It looks like you need to initdb.")));
4090 (
errmsg(
"database files are incompatible with server"),
4091 errdetail(
"The database cluster appears to use a different floating-point number format than the server executable."),
4092 errhint(
"It looks like you need to initdb.")));
4095 (
errmsg(
"database files are incompatible with server"),
4096 errdetail(
"The database cluster was initialized with BLCKSZ %d,"
4097 " but the server was compiled with BLCKSZ %d.",
4099 errhint(
"It looks like you need to recompile or initdb.")));
4102 (
errmsg(
"database files are incompatible with server"),
4103 errdetail(
"The database cluster was initialized with RELSEG_SIZE %d,"
4104 " but the server was compiled with RELSEG_SIZE %d.",
4106 errhint(
"It looks like you need to recompile or initdb.")));
4109 (
errmsg(
"database files are incompatible with server"),
4110 errdetail(
"The database cluster was initialized with XLOG_BLCKSZ %d,"
4111 " but the server was compiled with XLOG_BLCKSZ %d.",
4113 errhint(
"It looks like you need to recompile or initdb.")));
4116 (
errmsg(
"database files are incompatible with server"),
4117 errdetail(
"The database cluster was initialized with NAMEDATALEN %d,"
4118 " but the server was compiled with NAMEDATALEN %d.",
4120 errhint(
"It looks like you need to recompile or initdb.")));
4123 (
errmsg(
"database files are incompatible with server"),
4124 errdetail(
"The database cluster was initialized with INDEX_MAX_KEYS %d,"
4125 " but the server was compiled with INDEX_MAX_KEYS %d.",
4127 errhint(
"It looks like you need to recompile or initdb.")));
4130 (
errmsg(
"database files are incompatible with server"),
4131 errdetail(
"The database cluster was initialized with TOAST_MAX_CHUNK_SIZE %d,"
4132 " but the server was compiled with TOAST_MAX_CHUNK_SIZE %d.",
4134 errhint(
"It looks like you need to recompile or initdb.")));
4137 (
errmsg(
"database files are incompatible with server"),
4138 errdetail(
"The database cluster was initialized with LOBLKSIZE %d,"
4139 " but the server was compiled with LOBLKSIZE %d.",
4141 errhint(
"It looks like you need to recompile or initdb.")));
4143 #ifdef USE_FLOAT8_BYVAL
4146 (
errmsg(
"database files are incompatible with server"),
4147 errdetail(
"The database cluster was initialized without USE_FLOAT8_BYVAL"
4148 " but the server was compiled with USE_FLOAT8_BYVAL."),
4149 errhint(
"It looks like you need to recompile or initdb.")));
4153 (
errmsg(
"database files are incompatible with server"),
4154 errdetail(
"The database cluster was initialized with USE_FLOAT8_BYVAL"
4155 " but the server was compiled without USE_FLOAT8_BYVAL."),
4156 errhint(
"It looks like you need to recompile or initdb.")));
4163 errmsg_plural(
"WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d byte",
4164 "WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d bytes",
4175 errmsg(
"\"min_wal_size\" must be at least twice \"wal_segment_size\"")));
4179 errmsg(
"\"max_wal_size\" must be at least twice \"wal_segment_size\"")));
4251 return nextUnloggedLSN;
4364 size =
add_size(size, XLOG_BLCKSZ);
4393 if (walDebugCxt == NULL)
4410 if (foundCFile || foundXLog)
4413 Assert(foundCFile && foundXLog);
4418 if (localControlFile)
4419 pfree(localControlFile);
4428 if (localControlFile)
4431 pfree(localControlFile);
4464 allocptr = (
char *)
TYPEALIGN(XLOG_BLCKSZ, allocptr);
4495 uint64 sysidentifier;
4517 sysidentifier = ((uint64) tv.tv_sec) << 32;
4518 sysidentifier |= ((uint64) tv.tv_usec) << 12;
4519 sysidentifier |= getpid() & 0xFFF;
4522 buffer = (
char *)
palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
4524 memset(page, 0, XLOG_BLCKSZ);
4581 *(recptr++) =
sizeof(checkPoint);
4582 memcpy(recptr, &checkPoint,
sizeof(checkPoint));
4583 recptr +=
sizeof(checkPoint);
4611 errmsg(
"could not write bootstrap write-ahead log file: %m")));
4619 errmsg(
"could not fsync bootstrap write-ahead log file: %m")));
4625 errmsg(
"could not close bootstrap write-ahead log file: %m")));
4656 static char buf[128];
4659 "%Y-%m-%d %H:%M:%S %Z",
4676 Assert(endTLI != newTLI);
4698 if (endLogSegNo == startLogSegNo)
4723 int save_errno = errno;
4729 errmsg(
"could not close file \"%s\": %m", xlogfname)));
4753 "recovery_end_command",
4843 (
errmsg(
"WAL was generated with wal_level=minimal, cannot continue recovering"),
4844 errdetail(
"This happens if you temporarily set wal_level=minimal on the server."),
4845 errhint(
"Use a backup taken after setting wal_level to higher than minimal.")));
4884 bool haveBackupLabel;
4888 bool performedWalRecovery;
4893 bool promoted =
false;
4909 (
errmsg(
"control file contains invalid checkpoint location")));
4920 (
errmsg(
"database system was shut down at %s",
4926 (
errmsg(
"database system was shut down in recovery at %s",
4932 (
errmsg(
"database system shutdown was interrupted; last known up at %s",
4938 (
errmsg(
"database system was interrupted while in recovery at %s",
4940 errhint(
"This probably means that some data is corrupted and"
4941 " you will have to use the last backup for recovery.")));
4946 (
errmsg(
"database system was interrupted while in recovery at log time %s",
4948 errhint(
"If this has occurred more than once some data might be corrupted"
4949 " and you might need to choose an earlier recovery target.")));
4954 (
errmsg(
"database system was interrupted; last known up at %s",
4960 (
errmsg(
"control file contains invalid database cluster state")));
4964 #ifdef XLOG_REPLAY_DELAY
5014 &haveBackupLabel, &haveTblspcMap);
5165 if (haveBackupLabel)
5271 running.
xcnt = nxids;
5280 running.
xids = xids;
5292 performedWalRecovery =
true;
5295 performedWalRecovery =
false;
5301 EndOfLog = endOfRecoveryInfo->
endOfLog;
5338 (
errmsg(
"WAL ends before end of online backup"),
5339 errhint(
"All WAL generated while online backup was taken must be available at recovery.")));
5342 (
errmsg(
"WAL ends before consistent recovery point")));
5390 (
errmsg(
"selected new timeline ID: %u", newTLI)));
5423 (
errmsg(
"archive recovery complete")));
5456 if (EndOfLog % XLOG_BLCKSZ != 0)
5469 memset(page +
len, 0, XLOG_BLCKSZ -
len);
5551 if (performedWalRecovery)