PostgreSQL Source Code  git master
backup_manifest.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * backup_manifest.c
4  * code for generating and sending a backup manifest
5  *
6  * Portions Copyright (c) 2010-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/backup/backup_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/timeline.h"
16 #include "access/xlog.h"
17 #include "backup/backup_manifest.h"
18 #include "backup/basebackup_sink.h"
19 #include "common/relpath.h"
20 #include "mb/pg_wchar.h"
21 #include "utils/builtins.h"
22 #include "utils/json.h"
23 
24 static void AppendStringToManifest(backup_manifest_info *manifest, const char *s);
25 
26 /*
27  * Does the user want a backup manifest?
28  *
29  * It's simplest to always have a manifest_info object, so that we don't need
30  * checks for NULL pointers in too many places. However, if the user doesn't
31  * want a manifest, we set manifest->buffile to NULL.
32  */
33 static inline bool
35 {
36  return (manifest->buffile != NULL);
37 }
38 
39 /*
40  * Convenience macro for appending data to the backup manifest.
41  */
42 #define AppendToManifest(manifest, ...) \
43  { \
44  char *_manifest_s = psprintf(__VA_ARGS__); \
45  AppendStringToManifest(manifest, _manifest_s); \
46  pfree(_manifest_s); \
47  }
48 
49 /*
50  * Initialize state so that we can construct a backup manifest.
51  *
52  * NB: Although the checksum type for the data files is configurable, the
53  * checksum for the manifest itself always uses SHA-256. See comments in
54  * SendBackupManifest.
55  */
56 void
58  backup_manifest_option want_manifest,
59  pg_checksum_type manifest_checksum_type)
60 {
61  memset(manifest, 0, sizeof(backup_manifest_info));
62  manifest->checksum_type = manifest_checksum_type;
63 
64  if (want_manifest == MANIFEST_OPTION_NO)
65  manifest->buffile = NULL;
66  else
67  {
68  manifest->buffile = BufFileCreateTemp(false);
69  manifest->manifest_ctx = pg_cryptohash_create(PG_SHA256);
70  if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
71  elog(ERROR, "failed to initialize checksum of backup manifest: %s",
72  pg_cryptohash_error(manifest->manifest_ctx));
73  }
74 
75  manifest->manifest_size = UINT64CONST(0);
76  manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
77  manifest->first_file = true;
78  manifest->still_checksumming = true;
79 
80  if (want_manifest != MANIFEST_OPTION_NO)
82  "{ \"PostgreSQL-Backup-Manifest-Version\": 2,\n"
83  "\"System-Identifier\": " UINT64_FORMAT ",\n"
84  "\"Files\": [",
86 }
87 
88 /*
89  * Free resources assigned to a backup manifest constructed.
90  */
91 void
93 {
94  pg_cryptohash_free(manifest->manifest_ctx);
95  manifest->manifest_ctx = NULL;
96 }
97 
98 /*
99  * Add an entry to the backup manifest for a file.
100  */
101 void
103  const char *pathname, size_t size, pg_time_t mtime,
104  pg_checksum_context *checksum_ctx)
105 {
106  char pathbuf[MAXPGPATH];
107  int pathlen;
109 
111  return;
112 
113  /*
114  * If this file is part of a tablespace, the pathname passed to this
115  * function will be relative to the tar file that contains it. We want the
116  * pathname relative to the data directory (ignoring the intermediate
117  * symlink traversal).
118  */
119  if (OidIsValid(spcoid))
120  {
121  snprintf(pathbuf, sizeof(pathbuf), "%s/%u/%s", PG_TBLSPC_DIR, spcoid,
122  pathname);
123  pathname = pathbuf;
124  }
125 
126  /*
127  * Each file's entry needs to be separated from any entry that follows by
128  * a comma, but there's no comma before the first one or after the last
129  * one. To make that work, adding a file to the manifest starts by
130  * terminating the most recently added line, with a comma if appropriate,
131  * but does not terminate the line inserted for this file.
132  */
134  if (manifest->first_file)
135  {
136  appendStringInfoChar(&buf, '\n');
137  manifest->first_file = false;
138  }
139  else
140  appendStringInfoString(&buf, ",\n");
141 
142  /*
143  * Write the relative pathname to this file out to the manifest. The
144  * manifest is always stored in UTF-8, so we have to encode paths that are
145  * not valid in that encoding.
146  */
147  pathlen = strlen(pathname);
148  if (!manifest->force_encode &&
149  pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
150  {
151  appendStringInfoString(&buf, "{ \"Path\": ");
152  escape_json_with_len(&buf, pathname, pathlen);
153  appendStringInfoString(&buf, ", ");
154  }
155  else
156  {
157  appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
158  enlargeStringInfo(&buf, 2 * pathlen);
159  buf.len += hex_encode(pathname, pathlen,
160  &buf.data[buf.len]);
161  appendStringInfoString(&buf, "\", ");
162  }
163 
164  appendStringInfo(&buf, "\"Size\": %zu, ", size);
165 
166  /*
167  * Convert last modification time to a string and append it to the
168  * manifest. Since it's not clear what time zone to use and since time
169  * zone definitions can change, possibly causing confusion, use GMT
170  * always.
171  */
172  appendStringInfoString(&buf, "\"Last-Modified\": \"");
173  enlargeStringInfo(&buf, 128);
174  buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
175  pg_gmtime(&mtime));
176  appendStringInfoChar(&buf, '"');
177 
178  /* Add checksum information. */
179  if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
180  {
181  uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
182  int checksumlen;
183 
184  checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
185  if (checksumlen < 0)
186  elog(ERROR, "could not finalize checksum of file \"%s\"",
187  pathname);
188 
190  ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
191  pg_checksum_type_name(checksum_ctx->type));
192  enlargeStringInfo(&buf, 2 * checksumlen);
193  buf.len += hex_encode((char *) checksumbuf, checksumlen,
194  &buf.data[buf.len]);
195  appendStringInfoChar(&buf, '"');
196  }
197 
198  /* Close out the object. */
199  appendStringInfoString(&buf, " }");
200 
201  /* OK, add it to the manifest. */
203 
204  /* Avoid leaking memory. */
205  pfree(buf.data);
206 }
207 
208 /*
209  * Add information about the WAL that will need to be replayed when restoring
210  * this backup to the manifest.
211  */
212 void
214  TimeLineID starttli, XLogRecPtr endptr,
215  TimeLineID endtli)
216 {
217  List *timelines;
218  ListCell *lc;
219  bool first_wal_range = true;
220  bool found_start_timeline = false;
221 
223  return;
224 
225  /* Terminate the list of files. */
226  AppendStringToManifest(manifest, "\n],\n");
227 
228  /* Read the timeline history for the ending timeline. */
229  timelines = readTimeLineHistory(endtli);
230 
231  /* Start a list of LSN ranges. */
232  AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
233 
234  foreach(lc, timelines)
235  {
236  TimeLineHistoryEntry *entry = lfirst(lc);
237  XLogRecPtr tl_beginptr;
238 
239  /*
240  * We only care about timelines that were active during the backup.
241  * Skip any that ended before the backup started. (Note that if
242  * entry->end is InvalidXLogRecPtr, it means that the timeline has not
243  * yet ended.)
244  */
245  if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
246  continue;
247 
248  /*
249  * Because the timeline history file lists newer timelines before
250  * older ones, the first timeline we encounter that is new enough to
251  * matter ought to match the ending timeline of the backup.
252  */
253  if (first_wal_range && endtli != entry->tli)
254  ereport(ERROR,
255  errmsg("expected end timeline %u but found timeline %u",
256  starttli, entry->tli));
257 
258  /*
259  * If this timeline entry matches with the timeline on which the
260  * backup started, WAL needs to be checked from the start LSN of the
261  * backup. If this entry refers to a newer timeline, WAL needs to be
262  * checked since the beginning of this timeline, so use the LSN where
263  * the timeline began.
264  */
265  if (starttli == entry->tli)
266  tl_beginptr = startptr;
267  else
268  {
269  tl_beginptr = entry->begin;
270 
271  /*
272  * If we reach a TLI that has no valid beginning LSN, there can't
273  * be any more timelines in the history after this point, so we'd
274  * better have arrived at the expected starting TLI. If not,
275  * something's gone horribly wrong.
276  */
277  if (XLogRecPtrIsInvalid(entry->begin))
278  ereport(ERROR,
279  errmsg("expected start timeline %u but found timeline %u",
280  starttli, entry->tli));
281  }
282 
284  "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
285  first_wal_range ? "" : ",\n",
286  entry->tli,
287  LSN_FORMAT_ARGS(tl_beginptr),
288  LSN_FORMAT_ARGS(endptr));
289 
290  if (starttli == entry->tli)
291  {
292  found_start_timeline = true;
293  break;
294  }
295 
296  endptr = entry->begin;
297  first_wal_range = false;
298  }
299 
300  /*
301  * The last entry in the timeline history for the ending timeline should
302  * be the ending timeline itself. Verify that this is what we observed.
303  */
304  if (!found_start_timeline)
305  ereport(ERROR,
306  errmsg("start timeline %u not found in history of timeline %u",
307  starttli, endtli));
308 
309  /* Terminate the list of WAL ranges. */
310  AppendStringToManifest(manifest, "\n],\n");
311 }
312 
313 /*
314  * Finalize the backup manifest, and send it to the client.
315  */
316 void
318 {
319  uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
320  char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
321  size_t manifest_bytes_done = 0;
322 
324  return;
325 
326  /*
327  * Append manifest checksum, so that the problems with the manifest itself
328  * can be detected.
329  *
330  * We always use SHA-256 for this, regardless of what algorithm is chosen
331  * for checksumming the files. If we ever want to make the checksum
332  * algorithm used for the manifest file variable, the client will need a
333  * way to figure out which algorithm to use as close to the beginning of
334  * the manifest file as possible, to avoid having to read the whole thing
335  * twice.
336  */
337  manifest->still_checksumming = false;
338  if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf,
339  sizeof(checksumbuf)) < 0)
340  elog(ERROR, "failed to finalize checksum of backup manifest: %s",
341  pg_cryptohash_error(manifest->manifest_ctx));
342  AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
343 
344  hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
345  checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
346 
347  AppendStringToManifest(manifest, checksumstringbuf);
349 
350  /*
351  * We've written all the data to the manifest file. Rewind the file so
352  * that we can read it all back.
353  */
354  if (BufFileSeek(manifest->buffile, 0, 0, SEEK_SET))
355  ereport(ERROR,
357  errmsg("could not rewind temporary file")));
358 
359 
360  /*
361  * Send the backup manifest.
362  */
363  bbsink_begin_manifest(sink);
364  while (manifest_bytes_done < manifest->manifest_size)
365  {
366  size_t bytes_to_read;
367 
368  bytes_to_read = Min(sink->bbs_buffer_length,
369  manifest->manifest_size - manifest_bytes_done);
370  BufFileReadExact(manifest->buffile, sink->bbs_buffer, bytes_to_read);
371  bbsink_manifest_contents(sink, bytes_to_read);
372  manifest_bytes_done += bytes_to_read;
373  }
374  bbsink_end_manifest(sink);
375 
376  /* Release resources */
377  BufFileClose(manifest->buffile);
378 }
379 
380 /*
381  * Append a cstring to the manifest.
382  */
383 static void
385 {
386  int len = strlen(s);
387 
388  Assert(manifest != NULL);
389  if (manifest->still_checksumming)
390  {
391  if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
392  elog(ERROR, "failed to update checksum of backup manifest: %s",
393  pg_cryptohash_error(manifest->manifest_ctx));
394  }
395  BufFileWrite(manifest->buffile, s, len);
396  manifest->manifest_size += len;
397 }
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76
void InitializeBackupManifest(backup_manifest_info *manifest, backup_manifest_option want_manifest, pg_checksum_type manifest_checksum_type)
void AddFileToBackupManifest(backup_manifest_info *manifest, Oid spcoid, const char *pathname, size_t size, pg_time_t mtime, pg_checksum_context *checksum_ctx)
void AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr, TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli)
void SendBackupManifest(backup_manifest_info *manifest, bbsink *sink)
void FreeBackupManifest(backup_manifest_info *manifest)
#define AppendToManifest(manifest,...)
static void AppendStringToManifest(backup_manifest_info *manifest, const char *s)
static bool IsManifestEnabled(backup_manifest_info *manifest)
@ MANIFEST_OPTION_NO
@ MANIFEST_OPTION_FORCE_ENCODE
enum manifest_option backup_manifest_option
static void bbsink_begin_manifest(bbsink *sink)
static void bbsink_end_manifest(bbsink *sink)
static void bbsink_manifest_contents(bbsink *sink, size_t len)
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:654
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:193
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition: buffile.c:676
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:740
void BufFileClose(BufFile *file)
Definition: buffile.c:412
#define Min(x, y)
Definition: c.h:958
uint8_t uint8
Definition: c.h:483
#define Assert(condition)
Definition: c.h:812
#define UINT64_FORMAT
Definition: c.h:504
#define UINT64CONST(x)
Definition: c.h:500
#define OidIsValid(objectId)
Definition: c.h:729
char * pg_checksum_type_name(pg_checksum_type type)
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
#define PG_CHECKSUM_MAX_LENGTH
pg_checksum_type
@ CHECKSUM_TYPE_NONE
int pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
Definition: cryptohash.c:136
int pg_cryptohash_init(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:100
void pg_cryptohash_free(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:238
pg_cryptohash_ctx * pg_cryptohash_create(pg_cryptohash_type type)
Definition: cryptohash.c:74
int pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len)
Definition: cryptohash.c:172
const char * pg_cryptohash_error(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:254
@ PG_SHA256
Definition: cryptohash.h:24
int errcode_for_file_access(void)
Definition: elog.c:876
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition: encode.c:162
void escape_json_with_len(StringInfo buf, const char *str, int len)
Definition: json.c:1631
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1566
void pfree(void *pointer)
Definition: mcxt.c:1521
static bool manifest
#define MAXPGPATH
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:172
static char * buf
Definition: pg_test_fsync.c:72
@ PG_UTF8
Definition: pg_wchar.h:232
struct pg_tm * pg_gmtime(const pg_time_t *timep)
Definition: localtime.c:1389
int64 pg_time_t
Definition: pgtime.h:23
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
#define snprintf
Definition: port.h:238
unsigned int Oid
Definition: postgres_ext.h:31
#define PG_TBLSPC_DIR
Definition: relpath.h:41
#define PG_SHA256_DIGEST_LENGTH
Definition: sha2.h:23
#define PG_SHA256_DIGEST_STRING_LENGTH
Definition: sha2.h:24
static pg_noinline void Size size
Definition: slab.c:607
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:94
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:286
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:179
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:191
void initStringInfo(StringInfo str)
Definition: stringinfo.c:56
Definition: pg_list.h:54
XLogRecPtr begin
Definition: timeline.h:28
TimeLineID tli
Definition: timeline.h:27
XLogRecPtr end
Definition: timeline.h:29
char * bbs_buffer
size_t bbs_buffer_length
pg_checksum_type type
uint64 GetSystemIdentifier(void)
Definition: xlog.c:4568
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59