111 #define BootstrapTimeLineID 1
140 bool XLOG_DEBUG =
false;
150 #define NUM_XLOGINSERT_LOCKS 8
173 #ifdef HAVE_FSYNC_WRITETHROUGH
580 #define INSERT_FREESPACE(endptr) \
581 (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
584 #define NextBufIdx(idx) \
585 (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
591 #define XLogRecPtrToBufIdx(recptr) \
592 (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
597 #define UsableBytesInPage (XLOG_BLCKSZ - SizeOfXLogShortPHD)
603 #define ConvertToXSegs(x, segsize) XLogMBVarToSegs((x), (segsize))
620 #define RefreshXLogWriteResult(_target) \
622 _target.Flush = pg_atomic_read_u64(&XLogCtl->logFlushResult); \
624 _target.Write = pg_atomic_read_u64(&XLogCtl->logWriteResult); \
752 bool topxid_included)
779 elog(
ERROR,
"cannot make new WAL entries during recovery");
849 (!prevDoPageWrites ||
913 rdata_crc = rechdr->
xl_crc;
916 rechdr->
xl_crc = rdata_crc;
924 StartPos, EndPos, insertTLI);
966 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
983 TRACE_POSTGRESQL_WAL_SWITCH();
994 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
998 if (offset == EndPos % XLOG_BLCKSZ)
1014 char *errormsg = NULL;
1028 for (; rdata != NULL; rdata = rdata->
next)
1039 .segment_open = NULL,
1040 .segment_close = NULL),
1053 errormsg ? errormsg :
"no error message");
1059 debug_reader->
record = decoded;
1061 debug_reader->
record = NULL;
1112 uint64 startbytepos;
1133 startbytepos =
Insert->CurrBytePos;
1134 endbytepos = startbytepos +
size;
1135 prevbytepos =
Insert->PrevBytePos;
1136 Insert->CurrBytePos = endbytepos;
1137 Insert->PrevBytePos = startbytepos;
1167 uint64 startbytepos;
1182 startbytepos =
Insert->CurrBytePos;
1188 *EndPos = *StartPos = ptr;
1192 endbytepos = startbytepos +
size;
1193 prevbytepos =
Insert->PrevBytePos;
1205 Insert->CurrBytePos = endbytepos;
1206 Insert->PrevBytePos = startbytepos;
1250 while (rdata != NULL)
1252 const char *rdata_data = rdata->
data;
1253 int rdata_len = rdata->
len;
1255 while (rdata_len > freespace)
1261 memcpy(currpos, rdata_data, freespace);
1262 rdata_data += freespace;
1263 rdata_len -= freespace;
1264 written += freespace;
1265 CurrPos += freespace;
1296 memcpy(currpos, rdata_data, rdata_len);
1297 currpos += rdata_len;
1298 CurrPos += rdata_len;
1299 freespace -= rdata_len;
1300 written += rdata_len;
1302 rdata = rdata->
next;
1304 Assert(written == write_len);
1320 CurrPos += freespace;
1328 while (CurrPos < EndPos)
1352 CurrPos += XLOG_BLCKSZ;
1361 if (CurrPos != EndPos)
1364 errmsg_internal(
"space reserved for WAL record does not match what was written"));
1386 static int lockToTry = -1;
1388 if (lockToTry == -1)
1514 elog(
PANIC,
"cannot wait without a PGPROC structure");
1521 if (upto <= inserted)
1526 bytepos =
Insert->CurrBytePos;
1538 if (upto > reservedUpto)
1541 (
errmsg(
"request to flush past end of generated WAL; request %X/%X, current position %X/%X",
1543 upto = reservedUpto;
1555 finishedUpto = reservedUpto;
1587 insertingat, &insertingat))
1598 }
while (insertingat < upto);
1601 finishedUpto = insertingat;
1612 return finishedUpto;
1636 static uint64 cachedPage = 0;
1637 static char *cachedPos = NULL;
1644 if (ptr / XLOG_BLCKSZ == cachedPage)
1648 return cachedPos + ptr % XLOG_BLCKSZ;
1671 expectedEndPtr = ptr;
1672 expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1675 if (expectedEndPtr != endptr)
1700 initializedUpto = ptr;
1707 if (expectedEndPtr != endptr)
1708 elog(
PANIC,
"could not find WAL buffer for %X/%X",
1724 cachedPage = ptr / XLOG_BLCKSZ;
1730 return cachedPos + ptr % XLOG_BLCKSZ;
1751 char *pdst = dstbuf;
1754 Size nbytes = count;
1766 if (startptr + count > inserted)
1768 errmsg(
"cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
1788 uint32 offset = recptr % XLOG_BLCKSZ;
1800 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1807 if (expectedEndPtr != endptr)
1816 psrc = page + offset;
1817 npagebytes =
Min(nbytes, XLOG_BLCKSZ - offset);
1826 memcpy(pdst, psrc, npagebytes);
1839 if (expectedEndPtr != endptr)
1843 recptr += npagebytes;
1844 nbytes -= npagebytes;
1847 Assert(pdst - dstbuf <= count);
1849 return pdst - dstbuf;
1877 seg_offset = XLOG_BLCKSZ;
1920 seg_offset = XLOG_BLCKSZ;
1927 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1951 offset = ptr % XLOG_BLCKSZ;
2055 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2056 WriteRqst.
Write = OldPageRqstPtr;
2057 WriteRqst.
Flush = 0;
2061 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2074 NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2092 MemSet((
char *) NewPage, 0, XLOG_BLCKSZ);
2100 NewPage->xlp_tli = tli;
2101 NewPage->xlp_pageaddr = NewPageBeginPtr;
2118 if (
Insert->runningBackups == 0)
2149 if (XLOG_DEBUG && npages > 0)
2151 elog(
DEBUG1,
"initialized %d pages, up to %X/%X",
2209 GUC_check_errdetail(
"The WAL segment size must be a power of two between 1 MB and 1 GB.");
2228 "max_slot_wal_keep_size");
2269 recycleSegNo = (
XLogSegNo) ceil(((
double) lastredoptr + distance) /
2272 if (recycleSegNo < minSegNo)
2273 recycleSegNo = minSegNo;
2274 if (recycleSegNo > maxSegNo)
2275 recycleSegNo = maxSegNo;
2277 return recycleSegNo;
2317 bool last_iteration;
2362 elog(
PANIC,
"xlog write request %X/%X is past end of log %X/%X",
2417 finishing_seg = !ispartialpage &&
2420 if (last_iteration ||
2432 nbytes = npages * (
Size) XLOG_BLCKSZ;
2476 errmsg(
"could not write to log file \"%s\" at offset %u, length %zu: %m",
2477 xlogfname, startoffset, nleft)));
2481 startoffset += written;
2482 }
while (nleft > 0);
2539 if (flexible && npages == 0)
2604 #ifdef USE_ASSERT_CHECKING
2649 if (asyncXactLSN <= prevAsyncXactLSN)
2772 if (!force && newMinRecoveryPoint < lsn)
2774 "xlog min recovery request %X/%X is past current point %X/%X",
2787 (
errmsg_internal(
"updated min recovery point to %X/%X on timeline %u",
2789 newMinRecoveryPointTLI)));
2827 elog(
LOG,
"xlog flush request %X/%X; write %X/%X; flush %X/%X",
2844 WriteRqstPtr = record;
2864 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2922 WriteRqst.
Write = insertpos;
2923 WriteRqst.
Flush = insertpos;
2960 "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
2993 bool flexible =
true;
3015 WriteRqst.
Write -= WriteRqst.
Write % XLOG_BLCKSZ;
3082 WriteRqst.
Flush = 0;
3087 elog(
LOG,
"xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
3103 XLogWrite(WriteRqst, insertTLI, flexible);
3211 bool *added,
char *path)
3218 int open_flags = O_RDWR | O_CREAT | O_EXCL |
PG_BINARY;
3232 if (errno != ENOENT)
3235 errmsg(
"could not open file \"%s\": %m", path)));
3246 elog(
DEBUG2,
"creating and filling new WAL file");
3260 errmsg(
"could not create file \"%s\": %m", tmppath)));
3292 save_errno = errno ? errno : ENOSPC;
3310 errmsg(
"could not write to file \"%s\": %m", tmppath)));
3321 errmsg(
"could not fsync file \"%s\": %m", tmppath)));
3328 errmsg(
"could not close file \"%s\": %m", tmppath)));
3335 installed_segno = logsegno;
3351 elog(
DEBUG2,
"done creating and filling new WAL file");
3398 errmsg(
"could not open file \"%s\": %m", path)));
3437 errmsg(
"could not open file \"%s\": %m", path)));
3451 errmsg(
"could not create file \"%s\": %m", tmppath)));
3460 nread = upto - nbytes;
3466 if (nread <
sizeof(buffer))
3467 memset(buffer.
data, 0,
sizeof(buffer));
3473 if (nread >
sizeof(buffer))
3474 nread =
sizeof(buffer);
3476 r =
read(srcfd, buffer.
data, nread);
3482 errmsg(
"could not read file \"%s\": %m",
3487 errmsg(
"could not read file \"%s\": read %d of %zu",
3488 path, r, (
Size) nread)));
3494 if ((
int)
write(
fd, buffer.
data,
sizeof(buffer)) != (
int)
sizeof(buffer))
3496 int save_errno = errno;
3503 errno = save_errno ? save_errno : ENOSPC;
3507 errmsg(
"could not write to file \"%s\": %m", tmppath)));
3516 errmsg(
"could not fsync file \"%s\": %m", tmppath)));
3522 errmsg(
"could not close file \"%s\": %m", tmppath)));
3527 errmsg(
"could not close file \"%s\": %m", path)));
3533 elog(
ERROR,
"InstallXLogFileSegment should not have failed");
3567 struct stat stat_buf;
3588 while (
stat(path, &stat_buf) == 0)
3590 if ((*segno) >= max_segno)
3630 errmsg(
"could not open file \"%s\": %m", path)));
3649 #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3651 (void) posix_fadvise(
openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3657 int save_errno = errno;
3663 errmsg(
"could not close file \"%s\": %m", xlogfname)));
3729 int save_errno = errno;
3736 if (segno <= lastRemovedSegNo)
3744 errmsg(
"requested WAL segment %s has already been removed",
3766 return lastRemovedSegNo;
3795 if (tli != file_tli)
3799 if (oldest_segno == 0 || file_segno < oldest_segno)
3800 oldest_segno = file_segno;
3804 return oldest_segno;
3837 elog(
DEBUG2,
"removing all temporary WAL segments");
3844 if (strncmp(xlde->
d_name,
"xlogtemp.", 9) != 0)
3849 elog(
DEBUG2,
"removed temporary WAL segment \"%s\"", path);
3885 elog(
DEBUG2,
"attempting to remove WAL segments older than log file %s",
3908 if (strcmp(xlde->
d_name + 8, lastoff + 8) <= 0)
3955 recycleSegNo = endLogSegNo + 10;
3962 elog(
DEBUG2,
"attempting to remove WAL segments newer than log file %s",
3978 if (strncmp(xlde->
d_name, switchseg, 8) < 0 &&
3979 strcmp(xlde->
d_name + 8, switchseg + 8) > 0)
4017 const char *segname = segment_de->
d_name;
4027 *endlogSegNo <= recycleSegNo &&
4031 true, recycleSegNo, insertTLI))
4062 if (rename(path, newpath) != 0)
4066 errmsg(
"could not rename file \"%s\": %m",
4102 struct stat stat_buf;
4109 errmsg(
"required WAL directory \"%s\" does not exist",
4114 if (
stat(path, &stat_buf) == 0)
4120 errmsg(
"required WAL directory \"%s\" does not exist",
4126 (
errmsg(
"creating missing WAL directory \"%s\"", path)));
4130 errmsg(
"could not create missing directory \"%s\": %m",
4136 if (
stat(path, &stat_buf) == 0)
4141 (
errmsg(
"required WAL directory \"%s\" does not exist",
4147 (
errmsg(
"creating missing WAL directory \"%s\"", path)));
4150 (
errmsg(
"could not create missing directory \"%s\": %m",
4175 elog(
DEBUG2,
"removing WAL backup history file \"%s\"",
4216 (
errcode(ERRCODE_INTERNAL_ERROR),
4217 errmsg(
"could not generate secret authorization token")));
4288 errmsg(
"could not create file \"%s\": %m",
4300 errmsg(
"could not write to file \"%s\": %m",
4309 errmsg(
"could not fsync file \"%s\": %m",
4316 errmsg(
"could not close file \"%s\": %m",
4325 char wal_segsz_str[20];
4336 errmsg(
"could not open file \"%s\": %m",
4346 errmsg(
"could not read file \"%s\": %m",
4351 errmsg(
"could not read file \"%s\": read %d of %zu",
4367 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4368 errmsg(
"database files are incompatible with server"),
4369 errdetail(
"The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4370 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4373 errhint(
"This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4377 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4378 errmsg(
"database files are incompatible with server"),
4379 errdetail(
"The database cluster was initialized with PG_CONTROL_VERSION %d,"
4380 " but the server was compiled with PG_CONTROL_VERSION %d.",
4382 errhint(
"It looks like you need to initdb.")));
4393 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4394 errmsg(
"incorrect checksum in control file")));
4403 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4404 errmsg(
"database files are incompatible with server"),
4406 errdetail(
"The database cluster was initialized with %s %d,"
4407 " but the server was compiled with %s %d.",
4410 errhint(
"It looks like you need to initdb.")));
4413 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4414 errmsg(
"database files are incompatible with server"),
4416 errdetail(
"The database cluster was initialized with %s %d,"
4417 " but the server was compiled with %s %d.",
4419 "MAXALIGN", MAXIMUM_ALIGNOF),
4420 errhint(
"It looks like you need to initdb.")));
4423 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4424 errmsg(
"database files are incompatible with server"),
4425 errdetail(
"The database cluster appears to use a different floating-point number format than the server executable."),
4426 errhint(
"It looks like you need to initdb.")));
4429 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4430 errmsg(
"database files are incompatible with server"),
4432 errdetail(
"The database cluster was initialized with %s %d,"
4433 " but the server was compiled with %s %d.",
4436 errhint(
"It looks like you need to recompile or initdb.")));
4439 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4440 errmsg(
"database files are incompatible with server"),
4442 errdetail(
"The database cluster was initialized with %s %d,"
4443 " but the server was compiled with %s %d.",
4445 "RELSEG_SIZE", RELSEG_SIZE),
4446 errhint(
"It looks like you need to recompile or initdb.")));
4449 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4450 errmsg(
"database files are incompatible with server"),
4452 errdetail(
"The database cluster was initialized with %s %d,"
4453 " but the server was compiled with %s %d.",
4455 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4456 errhint(
"It looks like you need to recompile or initdb.")));
4459 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4460 errmsg(
"database files are incompatible with server"),
4462 errdetail(
"The database cluster was initialized with %s %d,"
4463 " but the server was compiled with %s %d.",
4466 errhint(
"It looks like you need to recompile or initdb.")));
4469 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4470 errmsg(
"database files are incompatible with server"),
4472 errdetail(
"The database cluster was initialized with %s %d,"
4473 " but the server was compiled with %s %d.",
4476 errhint(
"It looks like you need to recompile or initdb.")));
4479 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4480 errmsg(
"database files are incompatible with server"),
4482 errdetail(
"The database cluster was initialized with %s %d,"
4483 " but the server was compiled with %s %d.",
4486 errhint(
"It looks like you need to recompile or initdb.")));
4489 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4490 errmsg(
"database files are incompatible with server"),
4492 errdetail(
"The database cluster was initialized with %s %d,"
4493 " but the server was compiled with %s %d.",
4496 errhint(
"It looks like you need to recompile or initdb.")));
4498 #ifdef USE_FLOAT8_BYVAL
4501 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4502 errmsg(
"database files are incompatible with server"),
4503 errdetail(
"The database cluster was initialized without USE_FLOAT8_BYVAL"
4504 " but the server was compiled with USE_FLOAT8_BYVAL."),
4505 errhint(
"It looks like you need to recompile or initdb.")));
4509 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4510 errmsg(
"database files are incompatible with server"),
4511 errdetail(
"The database cluster was initialized with USE_FLOAT8_BYVAL"
4512 " but the server was compiled without USE_FLOAT8_BYVAL."),
4513 errhint(
"It looks like you need to recompile or initdb.")));
4520 errmsg_plural(
"invalid WAL segment size in control file (%d byte)",
4521 "invalid WAL segment size in control file (%d bytes)",
4524 errdetail(
"The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4534 errmsg(
"\"%s\" must be at least twice \"%s\"",
4535 "min_wal_size",
"wal_segment_size")));
4540 errmsg(
"\"%s\" must be at least twice \"%s\"",
4541 "max_wal_size",
"wal_segment_size")));
4695 foreach(l, elemlist)
4697 char *tok = (
char *)
lfirst(l);
4703 for (rmid = 0; rmid <=
RM_MAX_ID; rmid++)
4705 newwalconsistency[rmid] =
true;
4712 for (rmid = 0; rmid <=
RM_MAX_ID; rmid++)
4717 newwalconsistency[rmid] =
true;
4749 memcpy(*extra, newwalconsistency, (
RM_MAX_ID + 1) *
sizeof(
bool));
4815 return "(disabled)";
4933 if (walDebugCxt == NULL)
4950 if (foundCFile || foundXLog)
4953 Assert(foundCFile && foundXLog);
4958 if (localControlFile)
4959 pfree(localControlFile);
4968 if (localControlFile)
4971 pfree(localControlFile);
5007 allocptr = (
char *)
TYPEALIGN(XLOG_BLCKSZ, allocptr);
5041 uint64 sysidentifier;
5061 sysidentifier = ((uint64) tv.tv_sec) << 32;
5062 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5063 sysidentifier |= getpid() & 0xFFF;
5066 buffer = (
char *)
palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5068 memset(page, 0, XLOG_BLCKSZ);
5126 *(recptr++) =
sizeof(checkPoint);
5127 memcpy(recptr, &checkPoint,
sizeof(checkPoint));
5128 recptr +=
sizeof(checkPoint);
5156 errmsg(
"could not write bootstrap write-ahead log file: %m")));
5164 errmsg(
"could not fsync bootstrap write-ahead log file: %m")));
5170 errmsg(
"could not close bootstrap write-ahead log file: %m")));
5204 "%Y-%m-%d %H:%M:%S %Z",
5221 Assert(endTLI != newTLI);
5243 if (endLogSegNo == startLogSegNo)
5267 int save_errno = errno;
5273 errmsg(
"could not close file \"%s\": %m", xlogfname)));
5297 "recovery_end_command",
5299 WAIT_EVENT_RECOVERY_END_COMMAND);
5394 (
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5395 errmsg(
"WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5396 errdetail(
"This happens if you temporarily set \"wal_level=minimal\" on the server."),
5397 errhint(
"Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5436 bool haveBackupLabel;
5440 bool performedWalRecovery;
5445 bool promoted =
false;
5462 errmsg(
"control file contains invalid checkpoint location")));
5473 (
errmsg(
"database system was shut down at %s",
5479 (
errmsg(
"database system was shut down in recovery at %s",
5485 (
errmsg(
"database system shutdown was interrupted; last known up at %s",
5491 (
errmsg(
"database system was interrupted while in recovery at %s",
5493 errhint(
"This probably means that some data is corrupted and"
5494 " you will have to use the last backup for recovery.")));
5499 (
errmsg(
"database system was interrupted while in recovery at log time %s",
5501 errhint(
"If this has occurred more than once some data might be corrupted"
5502 " and you might need to choose an earlier recovery target.")));
5507 (
errmsg(
"database system was interrupted; last known up at %s",
5514 errmsg(
"control file contains invalid database cluster state")));
5518 #ifdef XLOG_REPLAY_DELAY
5568 &haveBackupLabel, &haveTblspcMap);