PostgreSQL Source Code  git master
backup_manifest.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * backup_manifest.c
4  * code for generating and sending a backup manifest
5  *
6  * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/backup_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/timeline.h"
16 #include "libpq/libpq.h"
17 #include "libpq/pqformat.h"
18 #include "mb/pg_wchar.h"
20 #include "utils/builtins.h"
21 #include "utils/json.h"
22 
24 
25 /*
26  * Does the user want a backup manifest?
27  *
28  * It's simplest to always have a manifest_info object, so that we don't need
29  * checks for NULL pointers in too many places. However, if the user doesn't
30  * want a manifest, we set manifest->buffile to NULL.
31  */
32 static inline bool
34 {
35  return (manifest->buffile != NULL);
36 }
37 
38 /*
39  * Convenience macro for appending data to the backup manifest.
40  */
41 #define AppendToManifest(manifest, ...) \
42  { \
43  char *_manifest_s = psprintf(__VA_ARGS__); \
44  AppendStringToManifest(manifest, _manifest_s); \
45  pfree(_manifest_s); \
46  }
47 
48 /*
49  * Initialize state so that we can construct a backup manifest.
50  *
51  * NB: Although the checksum type for the data files is configurable, the
52  * checksum for the manifest itself always uses SHA-256. See comments in
53  * SendBackupManifest.
54  */
55 void
57  backup_manifest_option want_manifest,
58  pg_checksum_type manifest_checksum_type)
59 {
60  memset(manifest, 0, sizeof(backup_manifest_info));
61  manifest->checksum_type = manifest_checksum_type;
62 
63  if (want_manifest == MANIFEST_OPTION_NO)
64  manifest->buffile = NULL;
65  else
66  {
67  manifest->buffile = BufFileCreateTemp(false);
69  if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
70  elog(ERROR, "failed to initialize checksum of backup manifest");
71  }
72 
73  manifest->manifest_size = UINT64CONST(0);
74  manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
75  manifest->first_file = true;
76  manifest->still_checksumming = true;
77 
78  if (want_manifest != MANIFEST_OPTION_NO)
79  AppendToManifest(manifest,
80  "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
81  "\"Files\": [");
82 }
83 
84 /*
85  * Free resources assigned to a backup manifest constructed.
86  */
87 void
89 {
91  manifest->manifest_ctx = NULL;
92 }
93 
94 /*
95  * Add an entry to the backup manifest for a file.
96  */
97 void
99  const char *pathname, size_t size, pg_time_t mtime,
100  pg_checksum_context *checksum_ctx)
101 {
102  char pathbuf[MAXPGPATH];
103  int pathlen;
105 
106  if (!IsManifestEnabled(manifest))
107  return;
108 
109  /*
110  * If this file is part of a tablespace, the pathname passed to this
111  * function will be relative to the tar file that contains it. We want the
112  * pathname relative to the data directory (ignoring the intermediate
113  * symlink traversal).
114  */
115  if (spcoid != NULL)
116  {
117  snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
118  pathname);
119  pathname = pathbuf;
120  }
121 
122  /*
123  * Each file's entry needs to be separated from any entry that follows by
124  * a comma, but there's no comma before the first one or after the last
125  * one. To make that work, adding a file to the manifest starts by
126  * terminating the most recently added line, with a comma if appropriate,
127  * but does not terminate the line inserted for this file.
128  */
129  initStringInfo(&buf);
130  if (manifest->first_file)
131  {
132  appendStringInfoChar(&buf, '\n');
133  manifest->first_file = false;
134  }
135  else
136  appendStringInfoString(&buf, ",\n");
137 
138  /*
139  * Write the relative pathname to this file out to the manifest. The
140  * manifest is always stored in UTF-8, so we have to encode paths that are
141  * not valid in that encoding.
142  */
143  pathlen = strlen(pathname);
144  if (!manifest->force_encode &&
145  pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
146  {
147  appendStringInfoString(&buf, "{ \"Path\": ");
148  escape_json(&buf, pathname);
149  appendStringInfoString(&buf, ", ");
150  }
151  else
152  {
153  appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
154  enlargeStringInfo(&buf, 2 * pathlen);
155  buf.len += hex_encode(pathname, pathlen,
156  &buf.data[buf.len]);
157  appendStringInfoString(&buf, "\", ");
158  }
159 
160  appendStringInfo(&buf, "\"Size\": %zu, ", size);
161 
162  /*
163  * Convert last modification time to a string and append it to the
164  * manifest. Since it's not clear what time zone to use and since time
165  * zone definitions can change, possibly causing confusion, use GMT
166  * always.
167  */
168  appendStringInfoString(&buf, "\"Last-Modified\": \"");
169  enlargeStringInfo(&buf, 128);
170  buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
171  pg_gmtime(&mtime));
172  appendStringInfoChar(&buf, '"');
173 
174  /* Add checksum information. */
175  if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
176  {
177  uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
178  int checksumlen;
179 
180  checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
181  if (checksumlen < 0)
182  elog(ERROR, "could not finalize checksum of file \"%s\"",
183  pathname);
184 
185  appendStringInfo(&buf,
186  ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
187  pg_checksum_type_name(checksum_ctx->type));
188  enlargeStringInfo(&buf, 2 * checksumlen);
189  buf.len += hex_encode((char *) checksumbuf, checksumlen,
190  &buf.data[buf.len]);
191  appendStringInfoChar(&buf, '"');
192  }
193 
194  /* Close out the object. */
195  appendStringInfoString(&buf, " }");
196 
197  /* OK, add it to the manifest. */
198  AppendStringToManifest(manifest, buf.data);
199 
200  /* Avoid leaking memory. */
201  pfree(buf.data);
202 }
203 
204 /*
205  * Add information about the WAL that will need to be replayed when restoring
206  * this backup to the manifest.
207  */
208 void
210  TimeLineID starttli, XLogRecPtr endptr,
211  TimeLineID endtli)
212 {
213  List *timelines;
214  ListCell *lc;
215  bool first_wal_range = true;
216  bool found_start_timeline = false;
217 
218  if (!IsManifestEnabled(manifest))
219  return;
220 
221  /* Terminate the list of files. */
222  AppendStringToManifest(manifest, "\n],\n");
223 
224  /* Read the timeline history for the ending timeline. */
225  timelines = readTimeLineHistory(endtli);
226 
227  /* Start a list of LSN ranges. */
228  AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
229 
230  foreach(lc, timelines)
231  {
232  TimeLineHistoryEntry *entry = lfirst(lc);
233  XLogRecPtr tl_beginptr;
234 
235  /*
236  * We only care about timelines that were active during the backup.
237  * Skip any that ended before the backup started. (Note that if
238  * entry->end is InvalidXLogRecPtr, it means that the timeline has not
239  * yet ended.)
240  */
241  if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
242  continue;
243 
244  /*
245  * Because the timeline history file lists newer timelines before
246  * older ones, the first timeline we encounter that is new enough to
247  * matter ought to match the ending timeline of the backup.
248  */
249  if (first_wal_range && endtli != entry->tli)
250  ereport(ERROR,
251  errmsg("expected end timeline %u but found timeline %u",
252  starttli, entry->tli));
253 
254  /*
255  * If this timeline entry matches with the timeline on which the
256  * backup started, WAL needs to be checked from the start LSN of the
257  * backup. If this entry refers to a newer timeline, WAL needs to be
258  * checked since the beginning of this timeline, so use the LSN where
259  * the timeline began.
260  */
261  if (starttli == entry->tli)
262  tl_beginptr = startptr;
263  else
264  {
265  tl_beginptr = entry->begin;
266 
267  /*
268  * If we reach a TLI that has no valid beginning LSN, there can't
269  * be any more timelines in the history after this point, so we'd
270  * better have arrived at the expected starting TLI. If not,
271  * something's gone horribly wrong.
272  */
273  if (XLogRecPtrIsInvalid(entry->begin))
274  ereport(ERROR,
275  errmsg("expected start timeline %u but found timeline %u",
276  starttli, entry->tli));
277  }
278 
279  AppendToManifest(manifest,
280  "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
281  first_wal_range ? "" : ",\n",
282  entry->tli,
283  LSN_FORMAT_ARGS(tl_beginptr),
284  LSN_FORMAT_ARGS(endptr));
285 
286  if (starttli == entry->tli)
287  {
288  found_start_timeline = true;
289  break;
290  }
291 
292  endptr = entry->begin;
293  first_wal_range = false;
294  }
295 
296  /*
297  * The last entry in the timeline history for the ending timeline should
298  * be the ending timeline itself. Verify that this is what we observed.
299  */
300  if (!found_start_timeline)
301  ereport(ERROR,
302  errmsg("start timeline %u not found in history of timeline %u",
303  starttli, endtli));
304 
305  /* Terminate the list of WAL ranges. */
306  AppendStringToManifest(manifest, "\n],\n");
307 }
308 
309 /*
310  * Finalize the backup manifest, and send it to the client.
311  */
312 void
314 {
315  StringInfoData protobuf;
316  uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
317  char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
318  size_t manifest_bytes_done = 0;
319 
320  if (!IsManifestEnabled(manifest))
321  return;
322 
323  /*
324  * Append manifest checksum, so that the problems with the manifest itself
325  * can be detected.
326  *
327  * We always use SHA-256 for this, regardless of what algorithm is chosen
328  * for checksumming the files. If we ever want to make the checksum
329  * algorithm used for the manifest file variable, the client will need a
330  * way to figure out which algorithm to use as close to the beginning of
331  * the manifest file as possible, to avoid having to read the whole thing
332  * twice.
333  */
334  manifest->still_checksumming = false;
335  if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf,
336  sizeof(checksumbuf)) < 0)
337  elog(ERROR, "failed to finalize checksum of backup manifest");
338  AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
339 
340  hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
341  checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
342 
343  AppendStringToManifest(manifest, checksumstringbuf);
344  AppendStringToManifest(manifest, "\"}\n");
345 
346  /*
347  * We've written all the data to the manifest file. Rewind the file so
348  * that we can read it all back.
349  */
350  if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
351  ereport(ERROR,
353  errmsg("could not rewind temporary file")));
354 
355  /* Send CopyOutResponse message */
356  pq_beginmessage(&protobuf, 'H');
357  pq_sendbyte(&protobuf, 0); /* overall format */
358  pq_sendint16(&protobuf, 0); /* natts */
359  pq_endmessage(&protobuf);
360 
361  /*
362  * Send CopyData messages.
363  *
364  * We choose to read back the data from the temporary file in chunks of
365  * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
366  * size, so it seems to make sense to match that value here.
367  */
368  while (manifest_bytes_done < manifest->manifest_size)
369  {
370  char manifestbuf[BLCKSZ];
371  size_t bytes_to_read;
372  size_t rc;
373 
374  bytes_to_read = Min(sizeof(manifestbuf),
375  manifest->manifest_size - manifest_bytes_done);
376  rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
377  if (rc != bytes_to_read)
378  ereport(ERROR,
380  errmsg("could not read from temporary file: %m")));
381  pq_putmessage('d', manifestbuf, bytes_to_read);
382  manifest_bytes_done += bytes_to_read;
383  }
384 
385  /* No more data, so send CopyDone message */
386  pq_putemptymessage('c');
387 
388  /* Release resources */
389  BufFileClose(manifest->buffile);
390 }
391 
392 /*
393  * Append a cstring to the manifest.
394  */
395 static void
397 {
398  int len = strlen(s);
399 
400  Assert(manifest != NULL);
401  if (manifest->still_checksumming)
402  {
403  if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
404  elog(ERROR, "failed to update checksum of backup manifest");
405  }
406  BufFileWrite(manifest->buffile, s, len);
407  manifest->manifest_size += len;
408 }
int pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len)
Definition: cryptohash.c:168
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:128
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
void InitializeBackupManifest(backup_manifest_info *manifest, backup_manifest_option want_manifest, pg_checksum_type manifest_checksum_type)
char * pg_checksum_type_name(pg_checksum_type type)
uint32 TimeLineID
Definition: xlogdefs.h:59
int64 pg_time_t
Definition: pgtime.h:23
void escape_json(StringInfo buf, const char *str)
Definition: json.c:1279
struct pg_tm * pg_gmtime(const pg_time_t *timep)
Definition: localtime.c:1387
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:662
#define Min(x, y)
Definition: c.h:986
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76
#define PG_SHA256_DIGEST_LENGTH
Definition: sha2.h:23
unsigned char uint8
Definition: c.h:439
void pq_putemptymessage(char msgtype)
Definition: pqformat.c:390
TimeLineID tli
Definition: timeline.h:27
#define PG_CHECKSUM_MAX_LENGTH
void BufFileClose(BufFile *file)
Definition: buffile.c:407
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1515
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
#define AppendToManifest(manifest,...)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
void pfree(void *pointer)
Definition: mcxt.c:1169
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
#define ERROR
Definition: elog.h:46
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:188
#define MAXPGPATH
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
static char * buf
Definition: pg_test_fsync.c:68
void SendBackupManifest(backup_manifest_info *manifest)
int errcode_for_file_access(void)
Definition: elog.c:721
enum manifest_option backup_manifest_option
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
static void AppendStringToManifest(backup_manifest_info *manifest, char *s)
void FreeBackupManifest(backup_manifest_info *manifest)
#define PG_SHA256_DIGEST_STRING_LENGTH
Definition: sha2.h:24
pg_checksum_type
void AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr, TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli)
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition: encode.c:161
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
static bool IsManifestEnabled(backup_manifest_info *manifest)
pg_checksum_type checksum_type
#define ereport(elevel,...)
Definition: elog.h:157
pg_cryptohash_ctx * pg_cryptohash_create(pg_cryptohash_type type)
Definition: cryptohash.c:66
XLogRecPtr end
Definition: timeline.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
static bool manifest
int pg_cryptohash_init(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:92
pg_cryptohash_ctx * manifest_ctx
int pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
Definition: cryptohash.c:130
int errmsg(const char *fmt,...)
Definition: elog.c:909
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
XLogRecPtr begin
Definition: timeline.h:28
#define elog(elevel,...)
Definition: elog.h:232
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:555
void AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid, const char *pathname, size_t size, pg_time_t mtime, pg_checksum_context *checksum_ctx)
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:49
void BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:598
void pg_cryptohash_free(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:216
pg_checksum_type type
Definition: pg_list.h:50
#define snprintf
Definition: port.h:217
static XLogRecPtr startptr
Definition: basebackup.c:116