114 #define BootstrapTimeLineID 1
143 bool XLOG_DEBUG =
false;
153 #define NUM_XLOGINSERT_LOCKS 8
176 #ifdef HAVE_FSYNC_WRITETHROUGH
586 #define INSERT_FREESPACE(endptr) \
587 (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
590 #define NextBufIdx(idx) \
591 (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
597 #define XLogRecPtrToBufIdx(recptr) \
598 (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
603 #define UsableBytesInPage (XLOG_BLCKSZ - SizeOfXLogShortPHD)
609 #define ConvertToXSegs(x, segsize) XLogMBVarToSegs((x), (segsize))
745 bool topxid_included)
772 elog(
ERROR,
"cannot make new WAL entries during recovery");
842 (!prevDoPageWrites ||
906 rdata_crc = rechdr->
xl_crc;
909 rechdr->
xl_crc = rdata_crc;
917 StartPos, EndPos, insertTLI);
959 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
977 TRACE_POSTGRESQL_WAL_SWITCH();
988 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
992 if (offset == EndPos % XLOG_BLCKSZ)
1008 char *errormsg = NULL;
1022 for (; rdata != NULL; rdata = rdata->
next)
1045 errormsg ? errormsg :
"no error message");
1051 debug_reader->
record = decoded;
1053 debug_reader->
record = NULL;
1104 uint64 startbytepos;
1125 startbytepos =
Insert->CurrBytePos;
1126 endbytepos = startbytepos + size;
1127 prevbytepos =
Insert->PrevBytePos;
1128 Insert->CurrBytePos = endbytepos;
1129 Insert->PrevBytePos = startbytepos;
1159 uint64 startbytepos;
1174 startbytepos =
Insert->CurrBytePos;
1180 *EndPos = *StartPos = ptr;
1184 endbytepos = startbytepos + size;
1185 prevbytepos =
Insert->PrevBytePos;
1197 Insert->CurrBytePos = endbytepos;
1198 Insert->PrevBytePos = startbytepos;
1242 while (rdata != NULL)
1244 char *rdata_data = rdata->
data;
1245 int rdata_len = rdata->
len;
1247 while (rdata_len > freespace)
1253 memcpy(currpos, rdata_data, freespace);
1254 rdata_data += freespace;
1255 rdata_len -= freespace;
1256 written += freespace;
1257 CurrPos += freespace;
1288 memcpy(currpos, rdata_data, rdata_len);
1289 currpos += rdata_len;
1290 CurrPos += rdata_len;
1291 freespace -= rdata_len;
1292 written += rdata_len;
1294 rdata = rdata->
next;
1296 Assert(written == write_len);
1312 CurrPos += freespace;
1320 while (CurrPos < EndPos)
1344 CurrPos += XLOG_BLCKSZ;
1353 if (CurrPos != EndPos)
1354 elog(
PANIC,
"space reserved for WAL record does not match what was written");
1376 static int lockToTry = -1;
1378 if (lockToTry == -1)
1503 elog(
PANIC,
"cannot wait without a PGPROC structure");
1507 bytepos =
Insert->CurrBytePos;
1519 if (upto > reservedUpto)
1522 (
errmsg(
"request to flush past end of generated WAL; request %X/%X, current position %X/%X",
1524 upto = reservedUpto;
1536 finishedUpto = reservedUpto;
1568 insertingat, &insertingat))
1579 }
while (insertingat < upto);
1582 finishedUpto = insertingat;
1584 return finishedUpto;
1608 static uint64 cachedPage = 0;
1609 static char *cachedPos = NULL;
1616 if (ptr / XLOG_BLCKSZ == cachedPage)
1620 return cachedPos + ptr % XLOG_BLCKSZ;
1647 expectedEndPtr = ptr;
1648 expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1651 if (expectedEndPtr != endptr)
1676 initializedUpto = ptr;
1683 if (expectedEndPtr != endptr)
1684 elog(
PANIC,
"could not find WAL buffer for %X/%X",
1700 cachedPage = ptr / XLOG_BLCKSZ;
1706 return cachedPos + ptr % XLOG_BLCKSZ;
1734 seg_offset = XLOG_BLCKSZ;
1777 seg_offset = XLOG_BLCKSZ;
1784 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1808 offset = ptr % XLOG_BLCKSZ;
1912 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
1913 WriteRqst.
Write = OldPageRqstPtr;
1914 WriteRqst.
Flush = 0;
1918 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
1931 NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
1941 MemSet((
char *) NewPage, 0, XLOG_BLCKSZ);
1949 NewPage->xlp_tli = tli;
1950 NewPage->xlp_pageaddr = NewPageBeginPtr;
1967 if (
Insert->runningBackups == 0)
1999 if (XLOG_DEBUG && npages > 0)
2001 elog(
DEBUG1,
"initialized %d pages, up to %X/%X",
2059 GUC_check_errdetail(
"The WAL segment size must be a power of two between 1 MB and 1 GB.");
2078 "max_slot_wal_keep_size");
2119 recycleSegNo = (
XLogSegNo) ceil(((
double) lastredoptr + distance) /
2122 if (recycleSegNo < minSegNo)
2123 recycleSegNo = minSegNo;
2124 if (recycleSegNo > maxSegNo)
2125 recycleSegNo = maxSegNo;
2127 return recycleSegNo;
2167 bool last_iteration;
2212 elog(
PANIC,
"xlog write request %X/%X is past end of log %X/%X",
2267 finishing_seg = !ispartialpage &&
2270 if (last_iteration ||
2282 nbytes = npages * (
Size) XLOG_BLCKSZ;
2326 errmsg(
"could not write to log file \"%s\" at offset %u, length %zu: %m",
2327 xlogfname, startoffset, nleft)));
2331 startoffset += written;
2332 }
while (nleft > 0);
2389 if (flexible && npages == 0)
2474 if (asyncXactLSN <= prevAsyncXactLSN)
2589 if (!force && newMinRecoveryPoint < lsn)
2591 "xlog min recovery request %X/%X is past current point %X/%X",
2604 (
errmsg_internal(
"updated min recovery point to %X/%X on timeline %u",
2606 newMinRecoveryPointTLI)));
2644 elog(
LOG,
"xlog flush request %X/%X; write %X/%X; flush %X/%X",
2661 WriteRqstPtr = record;
2673 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2741 WriteRqst.
Write = insertpos;
2742 WriteRqst.
Flush = insertpos;
2779 "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2812 bool flexible =
true;
2835 WriteRqst.
Write -= WriteRqst.
Write % XLOG_BLCKSZ;
2901 WriteRqst.
Flush = 0;
2906 elog(
LOG,
"xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
2922 XLogWrite(WriteRqst, insertTLI, flexible);
3032 bool *added,
char *path)
3039 int open_flags = O_RDWR | O_CREAT | O_EXCL |
PG_BINARY;
3053 if (errno != ENOENT)
3056 errmsg(
"could not open file \"%s\": %m", path)));
3067 elog(
DEBUG2,
"creating and filling new WAL file");
3081 errmsg(
"could not create file \"%s\": %m", tmppath)));
3113 save_errno = errno ? errno : ENOSPC;
3131 errmsg(
"could not write to file \"%s\": %m", tmppath)));
3142 errmsg(
"could not fsync file \"%s\": %m", tmppath)));
3149 errmsg(
"could not close file \"%s\": %m", tmppath)));
3156 installed_segno = logsegno;
3172 elog(
DEBUG2,
"done creating and filling new WAL file");
3219 errmsg(
"could not open file \"%s\": %m", path)));
3258 errmsg(
"could not open file \"%s\": %m", path)));
3272 errmsg(
"could not create file \"%s\": %m", tmppath)));
3281 nread = upto - nbytes;
3287 if (nread <
sizeof(buffer))
3288 memset(buffer.
data, 0,
sizeof(buffer));
3294 if (nread >
sizeof(buffer))
3295 nread =
sizeof(buffer);
3297 r =
read(srcfd, buffer.
data, nread);
3303 errmsg(
"could not read file \"%s\": %m",
3308 errmsg(
"could not read file \"%s\": read %d of %zu",
3309 path, r, (
Size) nread)));
3315 if ((
int)
write(
fd, buffer.
data,
sizeof(buffer)) != (
int)
sizeof(buffer))
3317 int save_errno = errno;
3324 errno = save_errno ? save_errno : ENOSPC;
3328 errmsg(
"could not write to file \"%s\": %m", tmppath)));
3337 errmsg(
"could not fsync file \"%s\": %m", tmppath)));
3343 errmsg(
"could not close file \"%s\": %m", tmppath)));
3348 errmsg(
"could not close file \"%s\": %m", path)));
3354 elog(
ERROR,
"InstallXLogFileSegment should not have failed");
3388 struct stat stat_buf;
3409 while (
stat(path, &stat_buf) == 0)
3411 if ((*segno) >= max_segno)
3451 errmsg(
"could not open file \"%s\": %m", path)));
3470 #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3472 (void) posix_fadvise(
openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3478 int save_errno = errno;
3484 errmsg(
"could not close file \"%s\": %m", xlogfname)));
3550 int save_errno = errno;
3557 if (segno <= lastRemovedSegNo)
3565 errmsg(
"requested WAL segment %s has already been removed",
3587 return lastRemovedSegNo;
3621 elog(
DEBUG2,
"removing all temporary WAL segments");
3628 if (strncmp(xlde->
d_name,
"xlogtemp.", 9) != 0)
3633 elog(
DEBUG2,
"removed temporary WAL segment \"%s\"", path);
3669 elog(
DEBUG2,
"attempting to remove WAL segments older than log file %s",
3692 if (strcmp(xlde->
d_name + 8, lastoff + 8) <= 0)
3739 recycleSegNo = endLogSegNo + 10;
3746 elog(
DEBUG2,
"attempting to remove WAL segments newer than log file %s",
3762 if (strncmp(xlde->
d_name, switchseg, 8) < 0 &&
3763 strcmp(xlde->
d_name + 8, switchseg + 8) > 0)
3801 const char *segname = segment_de->
d_name;
3811 *endlogSegNo <= recycleSegNo &&
3815 true, recycleSegNo, insertTLI))
3846 if (rename(path, newpath) != 0)
3850 errmsg(
"could not rename file \"%s\": %m",
3886 struct stat stat_buf;
3892 (
errmsg(
"required WAL directory \"%s\" does not exist",
3897 if (
stat(path, &stat_buf) == 0)
3902 (
errmsg(
"required WAL directory \"%s\" does not exist",
3908 (
errmsg(
"creating missing WAL directory \"%s\"", path)));
3911 (
errmsg(
"could not create missing directory \"%s\": %m",
3936 elog(
DEBUG2,
"removing WAL backup history file \"%s\"",
3977 (
errcode(ERRCODE_INTERNAL_ERROR),
3978 errmsg(
"could not generate secret authorization token")));
4049 errmsg(
"could not create file \"%s\": %m",
4061 errmsg(
"could not write to file \"%s\": %m",
4070 errmsg(
"could not fsync file \"%s\": %m",
4077 errmsg(
"could not close file \"%s\": %m",
4086 static char wal_segsz_str[20];
4097 errmsg(
"could not open file \"%s\": %m",
4107 errmsg(
"could not read file \"%s\": %m",
4112 errmsg(
"could not read file \"%s\": read %d of %zu",
4128 (
errmsg(
"database files are incompatible with server"),
4129 errdetail(
"The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4130 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4133 errhint(
"This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4137 (
errmsg(
"database files are incompatible with server"),
4138 errdetail(
"The database cluster was initialized with PG_CONTROL_VERSION %d,"
4139 " but the server was compiled with PG_CONTROL_VERSION %d.",
4141 errhint(
"It looks like you need to initdb.")));
4152 (
errmsg(
"incorrect checksum in control file")));
4161 (
errmsg(
"database files are incompatible with server"),
4162 errdetail(
"The database cluster was initialized with CATALOG_VERSION_NO %d,"
4163 " but the server was compiled with CATALOG_VERSION_NO %d.",
4165 errhint(
"It looks like you need to initdb.")));
4168 (
errmsg(
"database files are incompatible with server"),
4169 errdetail(
"The database cluster was initialized with MAXALIGN %d,"
4170 " but the server was compiled with MAXALIGN %d.",
4172 errhint(
"It looks like you need to initdb.")));
4175 (
errmsg(
"database files are incompatible with server"),
4176 errdetail(
"The database cluster appears to use a different floating-point number format than the server executable."),
4177 errhint(
"It looks like you need to initdb.")));
4180 (
errmsg(
"database files are incompatible with server"),
4181 errdetail(
"The database cluster was initialized with BLCKSZ %d,"
4182 " but the server was compiled with BLCKSZ %d.",
4184 errhint(
"It looks like you need to recompile or initdb.")));
4187 (
errmsg(
"database files are incompatible with server"),
4188 errdetail(
"The database cluster was initialized with RELSEG_SIZE %d,"
4189 " but the server was compiled with RELSEG_SIZE %d.",
4191 errhint(
"It looks like you need to recompile or initdb.")));
4194 (
errmsg(
"database files are incompatible with server"),
4195 errdetail(
"The database cluster was initialized with XLOG_BLCKSZ %d,"
4196 " but the server was compiled with XLOG_BLCKSZ %d.",
4198 errhint(
"It looks like you need to recompile or initdb.")));
4201 (
errmsg(
"database files are incompatible with server"),
4202 errdetail(
"The database cluster was initialized with NAMEDATALEN %d,"
4203 " but the server was compiled with NAMEDATALEN %d.",
4205 errhint(
"It looks like you need to recompile or initdb.")));
4208 (
errmsg(
"database files are incompatible with server"),
4209 errdetail(
"The database cluster was initialized with INDEX_MAX_KEYS %d,"
4210 " but the server was compiled with INDEX_MAX_KEYS %d.",
4212 errhint(
"It looks like you need to recompile or initdb.")));
4215 (
errmsg(
"database files are incompatible with server"),
4216 errdetail(
"The database cluster was initialized with TOAST_MAX_CHUNK_SIZE %d,"
4217 " but the server was compiled with TOAST_MAX_CHUNK_SIZE %d.",
4219 errhint(
"It looks like you need to recompile or initdb.")));
4222 (
errmsg(
"database files are incompatible with server"),
4223 errdetail(
"The database cluster was initialized with LOBLKSIZE %d,"
4224 " but the server was compiled with LOBLKSIZE %d.",
4226 errhint(
"It looks like you need to recompile or initdb.")));
4228 #ifdef USE_FLOAT8_BYVAL
4231 (
errmsg(
"database files are incompatible with server"),
4232 errdetail(
"The database cluster was initialized without USE_FLOAT8_BYVAL"
4233 " but the server was compiled with USE_FLOAT8_BYVAL."),
4234 errhint(
"It looks like you need to recompile or initdb.")));
4238 (
errmsg(
"database files are incompatible with server"),
4239 errdetail(
"The database cluster was initialized with USE_FLOAT8_BYVAL"
4240 " but the server was compiled without USE_FLOAT8_BYVAL."),
4241 errhint(
"It looks like you need to recompile or initdb.")));
4248 errmsg_plural(
"invalid WAL segment size in control file (%d byte)",
4249 "invalid WAL segment size in control file (%d bytes)",
4252 errdetail(
"The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4261 errmsg(
"\"min_wal_size\" must be at least twice \"wal_segment_size\"")));
4265 errmsg(
"\"max_wal_size\" must be at least twice \"wal_segment_size\"")));
4337 return nextUnloggedLSN;
4426 foreach(l, elemlist)
4428 char *tok = (
char *)
lfirst(l);
4434 for (rmid = 0; rmid <=
RM_MAX_ID; rmid++)
4436 newwalconsistency[rmid] =
true;
4443 for (rmid = 0; rmid <=
RM_MAX_ID; rmid++)
4448 newwalconsistency[rmid] =
true;
4480 memcpy(*extra, newwalconsistency, (
RM_MAX_ID + 1) *
sizeof(
bool));
4546 return "(disabled)";
4664 if (walDebugCxt == NULL)
4681 if (foundCFile || foundXLog)
4684 Assert(foundCFile && foundXLog);
4689 if (localControlFile)
4690 pfree(localControlFile);
4699 if (localControlFile)
4702 pfree(localControlFile);
4735 allocptr = (
char *)
TYPEALIGN(XLOG_BLCKSZ, allocptr);
4766 uint64 sysidentifier;
4786 sysidentifier = ((uint64) tv.tv_sec) << 32;
4787 sysidentifier |= ((uint64) tv.tv_usec) << 12;
4788 sysidentifier |= getpid() & 0xFFF;
4791 buffer = (
char *)
palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
4793 memset(page, 0, XLOG_BLCKSZ);
4850 *(recptr++) =
sizeof(checkPoint);
4851 memcpy(recptr, &checkPoint,
sizeof(checkPoint));
4852 recptr +=
sizeof(checkPoint);
4880 errmsg(
"could not write bootstrap write-ahead log file: %m")));
4888 errmsg(
"could not fsync bootstrap write-ahead log file: %m")));
4894 errmsg(
"could not close bootstrap write-ahead log file: %m")));
4925 static char buf[128];
4928 "%Y-%m-%d %H:%M:%S %Z",
4945 Assert(endTLI != newTLI);
4967 if (endLogSegNo == startLogSegNo)
4991 int save_errno = errno;
4997 errmsg(
"could not close file \"%s\": %m", xlogfname)));
5021 "recovery_end_command",
5023 WAIT_EVENT_RECOVERY_END_COMMAND);
5111 (
errmsg(
"WAL was generated with wal_level=minimal, cannot continue recovering"),
5112 errdetail(
"This happens if you temporarily set wal_level=minimal on the server."),
5113 errhint(
"Use a backup taken after setting wal_level to higher than minimal.")));
5152 bool haveBackupLabel;
5156 bool performedWalRecovery;
5161 bool promoted =
false;
5177 (
errmsg(
"control file contains invalid checkpoint location")));
5188 (
errmsg(
"database system was shut down at %s",
5194 (
errmsg(
"database system was shut down in recovery at %s",
5200 (
errmsg(
"database system shutdown was interrupted; last known up at %s",
5206 (
errmsg(
"database system was interrupted while in recovery at %s",
5208 errhint(
"This probably means that some data is corrupted and"
5209 " you will have to use the last backup for recovery.")));
5214 (
errmsg(
"database system was interrupted while in recovery at log time %s",
5216 errhint(
"If this has occurred more than once some data might be corrupted"
5217 " and you might need to choose an earlier recovery target.")));
5222 (
errmsg(
"database system was interrupted; last known up at %s",
5228 (
errmsg(
"control file contains invalid database cluster state")));
5232 #ifdef XLOG_REPLAY_DELAY
5282 &haveBackupLabel, &haveTblspcMap);
5433 if (haveBackupLabel)
5539 running.
xcnt = nxids;
5548 running.
xids = xids;
5560 performedWalRecovery =
true;
5563 performedWalRecovery =
false;
5569 EndOfLog = endOfRecoveryInfo->
endOfLog;
5612 (
errmsg(
"WAL ends before end of online backup"),
5613 errhint(
"All WAL generated while online backup was taken must be available at recovery.")));
5616 (
errmsg(
"WAL ends before consistent recovery point")));