PostgreSQL Source Code  git master
backup_manifest.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * backup_manifest.c
4  * code for generating and sending a backup manifest
5  *
6  * Portions Copyright (c) 2010-2023, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/backup/backup_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/timeline.h"
16 #include "backup/backup_manifest.h"
17 #include "backup/basebackup_sink.h"
18 #include "libpq/libpq.h"
19 #include "libpq/pqformat.h"
20 #include "mb/pg_wchar.h"
21 #include "utils/builtins.h"
22 #include "utils/json.h"
23 
24 static void AppendStringToManifest(backup_manifest_info *manifest, const char *s);
25 
26 /*
27  * Does the user want a backup manifest?
28  *
29  * It's simplest to always have a manifest_info object, so that we don't need
30  * checks for NULL pointers in too many places. However, if the user doesn't
31  * want a manifest, we set manifest->buffile to NULL.
32  */
33 static inline bool
35 {
36  return (manifest->buffile != NULL);
37 }
38 
39 /*
40  * Convenience macro for appending data to the backup manifest.
41  */
42 #define AppendToManifest(manifest, ...) \
43  { \
44  char *_manifest_s = psprintf(__VA_ARGS__); \
45  AppendStringToManifest(manifest, _manifest_s); \
46  pfree(_manifest_s); \
47  }
48 
49 /*
50  * Initialize state so that we can construct a backup manifest.
51  *
52  * NB: Although the checksum type for the data files is configurable, the
53  * checksum for the manifest itself always uses SHA-256. See comments in
54  * SendBackupManifest.
55  */
56 void
58  backup_manifest_option want_manifest,
59  pg_checksum_type manifest_checksum_type)
60 {
61  memset(manifest, 0, sizeof(backup_manifest_info));
62  manifest->checksum_type = manifest_checksum_type;
63 
64  if (want_manifest == MANIFEST_OPTION_NO)
65  manifest->buffile = NULL;
66  else
67  {
68  manifest->buffile = BufFileCreateTemp(false);
69  manifest->manifest_ctx = pg_cryptohash_create(PG_SHA256);
70  if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
71  elog(ERROR, "failed to initialize checksum of backup manifest: %s",
72  pg_cryptohash_error(manifest->manifest_ctx));
73  }
74 
75  manifest->manifest_size = UINT64CONST(0);
76  manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
77  manifest->first_file = true;
78  manifest->still_checksumming = true;
79 
80  if (want_manifest != MANIFEST_OPTION_NO)
82  "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
83  "\"Files\": [");
84 }
85 
86 /*
87  * Free resources assigned to a backup manifest constructed.
88  */
89 void
91 {
92  pg_cryptohash_free(manifest->manifest_ctx);
93  manifest->manifest_ctx = NULL;
94 }
95 
96 /*
97  * Add an entry to the backup manifest for a file.
98  */
99 void
101  const char *pathname, size_t size, pg_time_t mtime,
102  pg_checksum_context *checksum_ctx)
103 {
104  char pathbuf[MAXPGPATH];
105  int pathlen;
107 
109  return;
110 
111  /*
112  * If this file is part of a tablespace, the pathname passed to this
113  * function will be relative to the tar file that contains it. We want the
114  * pathname relative to the data directory (ignoring the intermediate
115  * symlink traversal).
116  */
117  if (OidIsValid(spcoid))
118  {
119  snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%u/%s", spcoid,
120  pathname);
121  pathname = pathbuf;
122  }
123 
124  /*
125  * Each file's entry needs to be separated from any entry that follows by
126  * a comma, but there's no comma before the first one or after the last
127  * one. To make that work, adding a file to the manifest starts by
128  * terminating the most recently added line, with a comma if appropriate,
129  * but does not terminate the line inserted for this file.
130  */
132  if (manifest->first_file)
133  {
134  appendStringInfoChar(&buf, '\n');
135  manifest->first_file = false;
136  }
137  else
138  appendStringInfoString(&buf, ",\n");
139 
140  /*
141  * Write the relative pathname to this file out to the manifest. The
142  * manifest is always stored in UTF-8, so we have to encode paths that are
143  * not valid in that encoding.
144  */
145  pathlen = strlen(pathname);
146  if (!manifest->force_encode &&
147  pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
148  {
149  appendStringInfoString(&buf, "{ \"Path\": ");
150  escape_json(&buf, pathname);
151  appendStringInfoString(&buf, ", ");
152  }
153  else
154  {
155  appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
156  enlargeStringInfo(&buf, 2 * pathlen);
157  buf.len += hex_encode(pathname, pathlen,
158  &buf.data[buf.len]);
159  appendStringInfoString(&buf, "\", ");
160  }
161 
162  appendStringInfo(&buf, "\"Size\": %zu, ", size);
163 
164  /*
165  * Convert last modification time to a string and append it to the
166  * manifest. Since it's not clear what time zone to use and since time
167  * zone definitions can change, possibly causing confusion, use GMT
168  * always.
169  */
170  appendStringInfoString(&buf, "\"Last-Modified\": \"");
171  enlargeStringInfo(&buf, 128);
172  buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
173  pg_gmtime(&mtime));
174  appendStringInfoChar(&buf, '"');
175 
176  /* Add checksum information. */
177  if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
178  {
179  uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
180  int checksumlen;
181 
182  checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
183  if (checksumlen < 0)
184  elog(ERROR, "could not finalize checksum of file \"%s\"",
185  pathname);
186 
188  ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
189  pg_checksum_type_name(checksum_ctx->type));
190  enlargeStringInfo(&buf, 2 * checksumlen);
191  buf.len += hex_encode((char *) checksumbuf, checksumlen,
192  &buf.data[buf.len]);
193  appendStringInfoChar(&buf, '"');
194  }
195 
196  /* Close out the object. */
197  appendStringInfoString(&buf, " }");
198 
199  /* OK, add it to the manifest. */
201 
202  /* Avoid leaking memory. */
203  pfree(buf.data);
204 }
205 
206 /*
207  * Add information about the WAL that will need to be replayed when restoring
208  * this backup to the manifest.
209  */
210 void
212  TimeLineID starttli, XLogRecPtr endptr,
213  TimeLineID endtli)
214 {
215  List *timelines;
216  ListCell *lc;
217  bool first_wal_range = true;
218  bool found_start_timeline = false;
219 
221  return;
222 
223  /* Terminate the list of files. */
224  AppendStringToManifest(manifest, "\n],\n");
225 
226  /* Read the timeline history for the ending timeline. */
227  timelines = readTimeLineHistory(endtli);
228 
229  /* Start a list of LSN ranges. */
230  AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
231 
232  foreach(lc, timelines)
233  {
234  TimeLineHistoryEntry *entry = lfirst(lc);
235  XLogRecPtr tl_beginptr;
236 
237  /*
238  * We only care about timelines that were active during the backup.
239  * Skip any that ended before the backup started. (Note that if
240  * entry->end is InvalidXLogRecPtr, it means that the timeline has not
241  * yet ended.)
242  */
243  if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
244  continue;
245 
246  /*
247  * Because the timeline history file lists newer timelines before
248  * older ones, the first timeline we encounter that is new enough to
249  * matter ought to match the ending timeline of the backup.
250  */
251  if (first_wal_range && endtli != entry->tli)
252  ereport(ERROR,
253  errmsg("expected end timeline %u but found timeline %u",
254  starttli, entry->tli));
255 
256  /*
257  * If this timeline entry matches with the timeline on which the
258  * backup started, WAL needs to be checked from the start LSN of the
259  * backup. If this entry refers to a newer timeline, WAL needs to be
260  * checked since the beginning of this timeline, so use the LSN where
261  * the timeline began.
262  */
263  if (starttli == entry->tli)
264  tl_beginptr = startptr;
265  else
266  {
267  tl_beginptr = entry->begin;
268 
269  /*
270  * If we reach a TLI that has no valid beginning LSN, there can't
271  * be any more timelines in the history after this point, so we'd
272  * better have arrived at the expected starting TLI. If not,
273  * something's gone horribly wrong.
274  */
275  if (XLogRecPtrIsInvalid(entry->begin))
276  ereport(ERROR,
277  errmsg("expected start timeline %u but found timeline %u",
278  starttli, entry->tli));
279  }
280 
282  "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
283  first_wal_range ? "" : ",\n",
284  entry->tli,
285  LSN_FORMAT_ARGS(tl_beginptr),
286  LSN_FORMAT_ARGS(endptr));
287 
288  if (starttli == entry->tli)
289  {
290  found_start_timeline = true;
291  break;
292  }
293 
294  endptr = entry->begin;
295  first_wal_range = false;
296  }
297 
298  /*
299  * The last entry in the timeline history for the ending timeline should
300  * be the ending timeline itself. Verify that this is what we observed.
301  */
302  if (!found_start_timeline)
303  ereport(ERROR,
304  errmsg("start timeline %u not found in history of timeline %u",
305  starttli, endtli));
306 
307  /* Terminate the list of WAL ranges. */
308  AppendStringToManifest(manifest, "\n],\n");
309 }
310 
311 /*
312  * Finalize the backup manifest, and send it to the client.
313  */
314 void
316 {
317  uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
318  char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
319  size_t manifest_bytes_done = 0;
320 
322  return;
323 
324  /*
325  * Append manifest checksum, so that the problems with the manifest itself
326  * can be detected.
327  *
328  * We always use SHA-256 for this, regardless of what algorithm is chosen
329  * for checksumming the files. If we ever want to make the checksum
330  * algorithm used for the manifest file variable, the client will need a
331  * way to figure out which algorithm to use as close to the beginning of
332  * the manifest file as possible, to avoid having to read the whole thing
333  * twice.
334  */
335  manifest->still_checksumming = false;
336  if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf,
337  sizeof(checksumbuf)) < 0)
338  elog(ERROR, "failed to finalize checksum of backup manifest: %s",
339  pg_cryptohash_error(manifest->manifest_ctx));
340  AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
341 
342  hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
343  checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
344 
345  AppendStringToManifest(manifest, checksumstringbuf);
347 
348  /*
349  * We've written all the data to the manifest file. Rewind the file so
350  * that we can read it all back.
351  */
352  if (BufFileSeek(manifest->buffile, 0, 0, SEEK_SET))
353  ereport(ERROR,
355  errmsg("could not rewind temporary file")));
356 
357 
358  /*
359  * Send the backup manifest.
360  */
361  bbsink_begin_manifest(sink);
362  while (manifest_bytes_done < manifest->manifest_size)
363  {
364  size_t bytes_to_read;
365 
366  bytes_to_read = Min(sink->bbs_buffer_length,
367  manifest->manifest_size - manifest_bytes_done);
368  BufFileReadExact(manifest->buffile, sink->bbs_buffer, bytes_to_read);
369  bbsink_manifest_contents(sink, bytes_to_read);
370  manifest_bytes_done += bytes_to_read;
371  }
372  bbsink_end_manifest(sink);
373 
374  /* Release resources */
375  BufFileClose(manifest->buffile);
376 }
377 
378 /*
379  * Append a cstring to the manifest.
380  */
381 static void
383 {
384  int len = strlen(s);
385 
386  Assert(manifest != NULL);
387  if (manifest->still_checksumming)
388  {
389  if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
390  elog(ERROR, "failed to update checksum of backup manifest: %s",
391  pg_cryptohash_error(manifest->manifest_ctx));
392  }
393  BufFileWrite(manifest->buffile, s, len);
394  manifest->manifest_size += len;
395 }
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76
void InitializeBackupManifest(backup_manifest_info *manifest, backup_manifest_option want_manifest, pg_checksum_type manifest_checksum_type)
void AddFileToBackupManifest(backup_manifest_info *manifest, Oid spcoid, const char *pathname, size_t size, pg_time_t mtime, pg_checksum_context *checksum_ctx)
void AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr, TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli)
void SendBackupManifest(backup_manifest_info *manifest, bbsink *sink)
void FreeBackupManifest(backup_manifest_info *manifest)
#define AppendToManifest(manifest,...)
static void AppendStringToManifest(backup_manifest_info *manifest, const char *s)
static bool IsManifestEnabled(backup_manifest_info *manifest)
@ MANIFEST_OPTION_NO
@ MANIFEST_OPTION_FORCE_ENCODE
enum manifest_option backup_manifest_option
static void bbsink_begin_manifest(bbsink *sink)
static void bbsink_end_manifest(bbsink *sink)
static void bbsink_manifest_contents(bbsink *sink, size_t len)
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:654
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:193
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition: buffile.c:676
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:740
void BufFileClose(BufFile *file)
Definition: buffile.c:412
#define Min(x, y)
Definition: c.h:993
unsigned char uint8
Definition: c.h:493
#define OidIsValid(objectId)
Definition: c.h:764
char * pg_checksum_type_name(pg_checksum_type type)
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
#define PG_CHECKSUM_MAX_LENGTH
pg_checksum_type
@ CHECKSUM_TYPE_NONE
int pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
Definition: cryptohash.c:136
int pg_cryptohash_init(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:100
void pg_cryptohash_free(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:238
pg_cryptohash_ctx * pg_cryptohash_create(pg_cryptohash_type type)
Definition: cryptohash.c:74
int pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len)
Definition: cryptohash.c:172
const char * pg_cryptohash_error(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:254
@ PG_SHA256
Definition: cryptohash.h:24
int errcode_for_file_access(void)
Definition: elog.c:881
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition: encode.c:162
void escape_json(StringInfo buf, const char *str)
Definition: json.c:1526
Assert(fmt[strlen(fmt) - 1] !='\n')
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1573
void pfree(void *pointer)
Definition: mcxt.c:1456
static bool manifest
#define MAXPGPATH
const void size_t len
#define lfirst(lc)
Definition: pg_list.h:172
static char * buf
Definition: pg_test_fsync.c:73
@ PG_UTF8
Definition: pg_wchar.h:235
struct pg_tm * pg_gmtime(const pg_time_t *timep)
Definition: localtime.c:1389
int64 pg_time_t
Definition: pgtime.h:23
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
#define snprintf
Definition: port.h:238
unsigned int Oid
Definition: postgres_ext.h:31
#define PG_SHA256_DIGEST_LENGTH
Definition: sha2.h:23
#define PG_SHA256_DIGEST_STRING_LENGTH
Definition: sha2.h:24
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:289
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:194
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Definition: pg_list.h:54
XLogRecPtr begin
Definition: timeline.h:28
TimeLineID tli
Definition: timeline.h:27
XLogRecPtr end
Definition: timeline.h:29
char * bbs_buffer
size_t bbs_buffer_length
pg_checksum_type type
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59