PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
backup_manifest.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * backup_manifest.c
4 * code for generating and sending a backup manifest
5 *
6 * Portions Copyright (c) 2010-2024, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/backup/backup_manifest.c
10 *
11 *-------------------------------------------------------------------------
12 */
13#include "postgres.h"
14
15#include "access/timeline.h"
16#include "access/xlog.h"
19#include "common/relpath.h"
20#include "mb/pg_wchar.h"
21#include "utils/builtins.h"
22#include "utils/json.h"
23
24static void AppendStringToManifest(backup_manifest_info *manifest, const char *s);
25
26/*
27 * Does the user want a backup manifest?
28 *
29 * It's simplest to always have a manifest_info object, so that we don't need
30 * checks for NULL pointers in too many places. However, if the user doesn't
31 * want a manifest, we set manifest->buffile to NULL.
32 */
33static inline bool
35{
36 return (manifest->buffile != NULL);
37}
38
39/*
40 * Convenience macro for appending data to the backup manifest.
41 */
42#define AppendToManifest(manifest, ...) \
43 { \
44 char *_manifest_s = psprintf(__VA_ARGS__); \
45 AppendStringToManifest(manifest, _manifest_s); \
46 pfree(_manifest_s); \
47 }
48
49/*
50 * Initialize state so that we can construct a backup manifest.
51 *
52 * NB: Although the checksum type for the data files is configurable, the
53 * checksum for the manifest itself always uses SHA-256. See comments in
54 * SendBackupManifest.
55 */
56void
58 backup_manifest_option want_manifest,
59 pg_checksum_type manifest_checksum_type)
60{
61 memset(manifest, 0, sizeof(backup_manifest_info));
62 manifest->checksum_type = manifest_checksum_type;
63
64 if (want_manifest == MANIFEST_OPTION_NO)
65 manifest->buffile = NULL;
66 else
67 {
68 manifest->buffile = BufFileCreateTemp(false);
70 if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
71 elog(ERROR, "failed to initialize checksum of backup manifest: %s",
72 pg_cryptohash_error(manifest->manifest_ctx));
73 }
74
75 manifest->manifest_size = UINT64CONST(0);
76 manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
77 manifest->first_file = true;
78 manifest->still_checksumming = true;
79
80 if (want_manifest != MANIFEST_OPTION_NO)
82 "{ \"PostgreSQL-Backup-Manifest-Version\": 2,\n"
83 "\"System-Identifier\": " UINT64_FORMAT ",\n"
84 "\"Files\": [",
86}
87
88/*
89 * Free resources assigned to a backup manifest constructed.
90 */
91void
93{
94 pg_cryptohash_free(manifest->manifest_ctx);
95 manifest->manifest_ctx = NULL;
96}
97
98/*
99 * Add an entry to the backup manifest for a file.
100 */
101void
103 const char *pathname, size_t size, pg_time_t mtime,
104 pg_checksum_context *checksum_ctx)
105{
106 char pathbuf[MAXPGPATH];
107 int pathlen;
109
111 return;
112
113 /*
114 * If this file is part of a tablespace, the pathname passed to this
115 * function will be relative to the tar file that contains it. We want the
116 * pathname relative to the data directory (ignoring the intermediate
117 * symlink traversal).
118 */
119 if (OidIsValid(spcoid))
120 {
121 snprintf(pathbuf, sizeof(pathbuf), "%s/%u/%s", PG_TBLSPC_DIR, spcoid,
122 pathname);
123 pathname = pathbuf;
124 }
125
126 /*
127 * Each file's entry needs to be separated from any entry that follows by
128 * a comma, but there's no comma before the first one or after the last
129 * one. To make that work, adding a file to the manifest starts by
130 * terminating the most recently added line, with a comma if appropriate,
131 * but does not terminate the line inserted for this file.
132 */
134 if (manifest->first_file)
135 {
137 manifest->first_file = false;
138 }
139 else
141
142 /*
143 * Write the relative pathname to this file out to the manifest. The
144 * manifest is always stored in UTF-8, so we have to encode paths that are
145 * not valid in that encoding.
146 */
147 pathlen = strlen(pathname);
148 if (!manifest->force_encode &&
149 pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
150 {
151 appendStringInfoString(&buf, "{ \"Path\": ");
152 escape_json_with_len(&buf, pathname, pathlen);
154 }
155 else
156 {
157 appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
158 enlargeStringInfo(&buf, 2 * pathlen);
159 buf.len += hex_encode(pathname, pathlen,
160 &buf.data[buf.len]);
161 appendStringInfoString(&buf, "\", ");
162 }
163
164 appendStringInfo(&buf, "\"Size\": %zu, ", size);
165
166 /*
167 * Convert last modification time to a string and append it to the
168 * manifest. Since it's not clear what time zone to use and since time
169 * zone definitions can change, possibly causing confusion, use GMT
170 * always.
171 */
172 appendStringInfoString(&buf, "\"Last-Modified\": \"");
173 enlargeStringInfo(&buf, 128);
174 buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
175 pg_gmtime(&mtime));
177
178 /* Add checksum information. */
179 if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
180 {
181 uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
182 int checksumlen;
183
184 checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
185 if (checksumlen < 0)
186 elog(ERROR, "could not finalize checksum of file \"%s\"",
187 pathname);
188
190 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
191 pg_checksum_type_name(checksum_ctx->type));
192 enlargeStringInfo(&buf, 2 * checksumlen);
193 buf.len += hex_encode((char *) checksumbuf, checksumlen,
194 &buf.data[buf.len]);
196 }
197
198 /* Close out the object. */
200
201 /* OK, add it to the manifest. */
203
204 /* Avoid leaking memory. */
205 pfree(buf.data);
206}
207
208/*
209 * Add information about the WAL that will need to be replayed when restoring
210 * this backup to the manifest.
211 */
212void
214 TimeLineID starttli, XLogRecPtr endptr,
215 TimeLineID endtli)
216{
217 List *timelines;
218 ListCell *lc;
219 bool first_wal_range = true;
220 bool found_start_timeline = false;
221
223 return;
224
225 /* Terminate the list of files. */
227
228 /* Read the timeline history for the ending timeline. */
229 timelines = readTimeLineHistory(endtli);
230
231 /* Start a list of LSN ranges. */
232 AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
233
234 foreach(lc, timelines)
235 {
236 TimeLineHistoryEntry *entry = lfirst(lc);
237 XLogRecPtr tl_beginptr;
238
239 /*
240 * We only care about timelines that were active during the backup.
241 * Skip any that ended before the backup started. (Note that if
242 * entry->end is InvalidXLogRecPtr, it means that the timeline has not
243 * yet ended.)
244 */
245 if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
246 continue;
247
248 /*
249 * Because the timeline history file lists newer timelines before
250 * older ones, the first timeline we encounter that is new enough to
251 * matter ought to match the ending timeline of the backup.
252 */
253 if (first_wal_range && endtli != entry->tli)
255 errmsg("expected end timeline %u but found timeline %u",
256 starttli, entry->tli));
257
258 /*
259 * If this timeline entry matches with the timeline on which the
260 * backup started, WAL needs to be checked from the start LSN of the
261 * backup. If this entry refers to a newer timeline, WAL needs to be
262 * checked since the beginning of this timeline, so use the LSN where
263 * the timeline began.
264 */
265 if (starttli == entry->tli)
266 tl_beginptr = startptr;
267 else
268 {
269 tl_beginptr = entry->begin;
270
271 /*
272 * If we reach a TLI that has no valid beginning LSN, there can't
273 * be any more timelines in the history after this point, so we'd
274 * better have arrived at the expected starting TLI. If not,
275 * something's gone horribly wrong.
276 */
277 if (XLogRecPtrIsInvalid(entry->begin))
279 errmsg("expected start timeline %u but found timeline %u",
280 starttli, entry->tli));
281 }
282
284 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
285 first_wal_range ? "" : ",\n",
286 entry->tli,
287 LSN_FORMAT_ARGS(tl_beginptr),
288 LSN_FORMAT_ARGS(endptr));
289
290 if (starttli == entry->tli)
291 {
292 found_start_timeline = true;
293 break;
294 }
295
296 endptr = entry->begin;
297 first_wal_range = false;
298 }
299
300 /*
301 * The last entry in the timeline history for the ending timeline should
302 * be the ending timeline itself. Verify that this is what we observed.
303 */
304 if (!found_start_timeline)
306 errmsg("start timeline %u not found in history of timeline %u",
307 starttli, endtli));
308
309 /* Terminate the list of WAL ranges. */
311}
312
313/*
314 * Finalize the backup manifest, and send it to the client.
315 */
316void
318{
319 uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
320 char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
321 size_t manifest_bytes_done = 0;
322
324 return;
325
326 /*
327 * Append manifest checksum, so that the problems with the manifest itself
328 * can be detected.
329 *
330 * We always use SHA-256 for this, regardless of what algorithm is chosen
331 * for checksumming the files. If we ever want to make the checksum
332 * algorithm used for the manifest file variable, the client will need a
333 * way to figure out which algorithm to use as close to the beginning of
334 * the manifest file as possible, to avoid having to read the whole thing
335 * twice.
336 */
337 manifest->still_checksumming = false;
338 if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf,
339 sizeof(checksumbuf)) < 0)
340 elog(ERROR, "failed to finalize checksum of backup manifest: %s",
341 pg_cryptohash_error(manifest->manifest_ctx));
342 AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
343
344 hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
345 checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
346
347 AppendStringToManifest(manifest, checksumstringbuf);
349
350 /*
351 * We've written all the data to the manifest file. Rewind the file so
352 * that we can read it all back.
353 */
354 if (BufFileSeek(manifest->buffile, 0, 0, SEEK_SET))
357 errmsg("could not rewind temporary file")));
358
359
360 /*
361 * Send the backup manifest.
362 */
364 while (manifest_bytes_done < manifest->manifest_size)
365 {
366 size_t bytes_to_read;
367
368 bytes_to_read = Min(sink->bbs_buffer_length,
369 manifest->manifest_size - manifest_bytes_done);
370 BufFileReadExact(manifest->buffile, sink->bbs_buffer, bytes_to_read);
371 bbsink_manifest_contents(sink, bytes_to_read);
372 manifest_bytes_done += bytes_to_read;
373 }
375
376 /* Release resources */
377 BufFileClose(manifest->buffile);
378}
379
380/*
381 * Append a cstring to the manifest.
382 */
383static void
385{
386 int len = strlen(s);
387
388 Assert(manifest != NULL);
389 if (manifest->still_checksumming)
390 {
391 if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
392 elog(ERROR, "failed to update checksum of backup manifest: %s",
393 pg_cryptohash_error(manifest->manifest_ctx));
394 }
395 BufFileWrite(manifest->buffile, s, len);
396 manifest->manifest_size += len;
397}
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76
void InitializeBackupManifest(backup_manifest_info *manifest, backup_manifest_option want_manifest, pg_checksum_type manifest_checksum_type)
void AddFileToBackupManifest(backup_manifest_info *manifest, Oid spcoid, const char *pathname, size_t size, pg_time_t mtime, pg_checksum_context *checksum_ctx)
void AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr, TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli)
void SendBackupManifest(backup_manifest_info *manifest, bbsink *sink)
void FreeBackupManifest(backup_manifest_info *manifest)
#define AppendToManifest(manifest,...)
static void AppendStringToManifest(backup_manifest_info *manifest, const char *s)
static bool IsManifestEnabled(backup_manifest_info *manifest)
@ MANIFEST_OPTION_NO
@ MANIFEST_OPTION_FORCE_ENCODE
enum manifest_option backup_manifest_option
static void bbsink_begin_manifest(bbsink *sink)
static void bbsink_end_manifest(bbsink *sink)
static void bbsink_manifest_contents(bbsink *sink, size_t len)
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:654
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:193
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition: buffile.c:676
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:740
void BufFileClose(BufFile *file)
Definition: buffile.c:412
#define Min(x, y)
Definition: c.h:958
uint8_t uint8
Definition: c.h:483
#define Assert(condition)
Definition: c.h:812
#define UINT64_FORMAT
Definition: c.h:504
#define UINT64CONST(x)
Definition: c.h:500
#define OidIsValid(objectId)
Definition: c.h:729
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
char * pg_checksum_type_name(pg_checksum_type type)
#define PG_CHECKSUM_MAX_LENGTH
pg_checksum_type
@ CHECKSUM_TYPE_NONE
const char * pg_cryptohash_error(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:254
int pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
Definition: cryptohash.c:136
pg_cryptohash_ctx * pg_cryptohash_create(pg_cryptohash_type type)
Definition: cryptohash.c:74
int pg_cryptohash_init(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:100
void pg_cryptohash_free(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:238
int pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len)
Definition: cryptohash.c:172
@ PG_SHA256
Definition: cryptohash.h:24
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition: encode.c:162
void escape_json_with_len(StringInfo buf, const char *str, int len)
Definition: json.c:1631
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1566
void pfree(void *pointer)
Definition: mcxt.c:1521
static bool manifest
#define MAXPGPATH
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:172
static char * buf
Definition: pg_test_fsync.c:72
@ PG_UTF8
Definition: pg_wchar.h:232
int64 pg_time_t
Definition: pgtime.h:23
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
struct pg_tm * pg_gmtime(const pg_time_t *timep)
Definition: localtime.c:1389
#define snprintf
Definition: port.h:238
unsigned int Oid
Definition: postgres_ext.h:31
#define PG_TBLSPC_DIR
Definition: relpath.h:41
#define PG_SHA256_DIGEST_LENGTH
Definition: sha2.h:23
#define PG_SHA256_DIGEST_STRING_LENGTH
Definition: sha2.h:24
static pg_noinline void Size size
Definition: slab.c:607
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:94
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:286
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:179
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:191
void initStringInfo(StringInfo str)
Definition: stringinfo.c:56
Definition: pg_list.h:54
XLogRecPtr begin
Definition: timeline.h:28
TimeLineID tli
Definition: timeline.h:27
XLogRecPtr end
Definition: timeline.h:29
char * bbs_buffer
size_t bbs_buffer_length
pg_checksum_type type
uint64 GetSystemIdentifier(void)
Definition: xlog.c:4568
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59