PostgreSQL Source Code  git master
backup_manifest.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * backup_manifest.c
4  * code for generating and sending a backup manifest
5  *
6  * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/backup_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/timeline.h"
16 #include "common/hex.h"
17 #include "libpq/libpq.h"
18 #include "libpq/pqformat.h"
19 #include "mb/pg_wchar.h"
21 #include "utils/json.h"
22 
24 
25 /*
26  * Does the user want a backup manifest?
27  *
28  * It's simplest to always have a manifest_info object, so that we don't need
29  * checks for NULL pointers in too many places. However, if the user doesn't
30  * want a manifest, we set manifest->buffile to NULL.
31  */
32 static inline bool
34 {
35  return (manifest->buffile != NULL);
36 }
37 
38 /*
39  * Convenience macro for appending data to the backup manifest.
40  */
41 #define AppendToManifest(manifest, ...) \
42  { \
43  char *_manifest_s = psprintf(__VA_ARGS__); \
44  AppendStringToManifest(manifest, _manifest_s); \
45  pfree(_manifest_s); \
46  }
47 
48 /*
49  * Initialize state so that we can construct a backup manifest.
50  *
51  * NB: Although the checksum type for the data files is configurable, the
52  * checksum for the manifest itself always uses SHA-256. See comments in
53  * SendBackupManifest.
54  */
55 void
57  backup_manifest_option want_manifest,
58  pg_checksum_type manifest_checksum_type)
59 {
60  memset(manifest, 0, sizeof(backup_manifest_info));
61  manifest->checksum_type = manifest_checksum_type;
62 
63  if (want_manifest == MANIFEST_OPTION_NO)
64  manifest->buffile = NULL;
65  else
66  {
67  manifest->buffile = BufFileCreateTemp(false);
69  if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
70  elog(ERROR, "failed to initialize checksum of backup manifest");
71  }
72 
73  manifest->manifest_size = UINT64CONST(0);
74  manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
75  manifest->first_file = true;
76  manifest->still_checksumming = true;
77 
78  if (want_manifest != MANIFEST_OPTION_NO)
79  AppendToManifest(manifest,
80  "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
81  "\"Files\": [");
82 }
83 
84 /*
85  * Free resources assigned to a backup manifest constructed.
86  */
87 void
89 {
91  manifest->manifest_ctx = NULL;
92 }
93 
94 /*
95  * Add an entry to the backup manifest for a file.
96  */
97 void
99  const char *pathname, size_t size, pg_time_t mtime,
100  pg_checksum_context *checksum_ctx)
101 {
102  char pathbuf[MAXPGPATH];
103  int pathlen;
105 
106  if (!IsManifestEnabled(manifest))
107  return;
108 
109  /*
110  * If this file is part of a tablespace, the pathname passed to this
111  * function will be relative to the tar file that contains it. We want the
112  * pathname relative to the data directory (ignoring the intermediate
113  * symlink traversal).
114  */
115  if (spcoid != NULL)
116  {
117  snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
118  pathname);
119  pathname = pathbuf;
120  }
121 
122  /*
123  * Each file's entry needs to be separated from any entry that follows by
124  * a comma, but there's no comma before the first one or after the last
125  * one. To make that work, adding a file to the manifest starts by
126  * terminating the most recently added line, with a comma if appropriate,
127  * but does not terminate the line inserted for this file.
128  */
129  initStringInfo(&buf);
130  if (manifest->first_file)
131  {
132  appendStringInfoChar(&buf, '\n');
133  manifest->first_file = false;
134  }
135  else
136  appendStringInfoString(&buf, ",\n");
137 
138  /*
139  * Write the relative pathname to this file out to the manifest. The
140  * manifest is always stored in UTF-8, so we have to encode paths that are
141  * not valid in that encoding.
142  */
143  pathlen = strlen(pathname);
144  if (!manifest->force_encode &&
145  pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
146  {
147  appendStringInfoString(&buf, "{ \"Path\": ");
148  escape_json(&buf, pathname);
149  appendStringInfoString(&buf, ", ");
150  }
151  else
152  {
153  uint64 dstlen = pg_hex_enc_len(pathlen);
154 
155  appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
156  enlargeStringInfo(&buf, dstlen);
157  buf.len += pg_hex_encode(pathname, pathlen,
158  &buf.data[buf.len], dstlen);
159  appendStringInfoString(&buf, "\", ");
160  }
161 
162  appendStringInfo(&buf, "\"Size\": %zu, ", size);
163 
164  /*
165  * Convert last modification time to a string and append it to the
166  * manifest. Since it's not clear what time zone to use and since time
167  * zone definitions can change, possibly causing confusion, use GMT
168  * always.
169  */
170  appendStringInfoString(&buf, "\"Last-Modified\": \"");
171  enlargeStringInfo(&buf, 128);
172  buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
173  pg_gmtime(&mtime));
174  appendStringInfoChar(&buf, '"');
175 
176  /* Add checksum information. */
177  if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
178  {
179  uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
180  int checksumlen;
181  uint64 dstlen;
182 
183  checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
184  if (checksumlen < 0)
185  elog(ERROR, "could not finalize checksum of file \"%s\"",
186  pathname);
187 
188  appendStringInfo(&buf,
189  ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
190  pg_checksum_type_name(checksum_ctx->type));
191  dstlen = pg_hex_enc_len(checksumlen);
192  enlargeStringInfo(&buf, dstlen);
193  buf.len += pg_hex_encode((char *) checksumbuf, checksumlen,
194  &buf.data[buf.len], dstlen);
195  appendStringInfoChar(&buf, '"');
196  }
197 
198  /* Close out the object. */
199  appendStringInfoString(&buf, " }");
200 
201  /* OK, add it to the manifest. */
202  AppendStringToManifest(manifest, buf.data);
203 
204  /* Avoid leaking memory. */
205  pfree(buf.data);
206 }
207 
208 /*
209  * Add information about the WAL that will need to be replayed when restoring
210  * this backup to the manifest.
211  */
212 void
214  TimeLineID starttli, XLogRecPtr endptr,
215  TimeLineID endtli)
216 {
217  List *timelines;
218  ListCell *lc;
219  bool first_wal_range = true;
220  bool found_start_timeline = false;
221 
222  if (!IsManifestEnabled(manifest))
223  return;
224 
225  /* Terminate the list of files. */
226  AppendStringToManifest(manifest, "\n],\n");
227 
228  /* Read the timeline history for the ending timeline. */
229  timelines = readTimeLineHistory(endtli);
230 
231  /* Start a list of LSN ranges. */
232  AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
233 
234  foreach(lc, timelines)
235  {
236  TimeLineHistoryEntry *entry = lfirst(lc);
237  XLogRecPtr tl_beginptr;
238 
239  /*
240  * We only care about timelines that were active during the backup.
241  * Skip any that ended before the backup started. (Note that if
242  * entry->end is InvalidXLogRecPtr, it means that the timeline has not
243  * yet ended.)
244  */
245  if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
246  continue;
247 
248  /*
249  * Because the timeline history file lists newer timelines before
250  * older ones, the first timeline we encounter that is new enough to
251  * matter ought to match the ending timeline of the backup.
252  */
253  if (first_wal_range && endtli != entry->tli)
254  ereport(ERROR,
255  errmsg("expected end timeline %u but found timeline %u",
256  starttli, entry->tli));
257 
258  if (!XLogRecPtrIsInvalid(entry->begin))
259  tl_beginptr = entry->begin;
260  else
261  {
262  tl_beginptr = startptr;
263 
264  /*
265  * If we reach a TLI that has no valid beginning LSN, there can't
266  * be any more timelines in the history after this point, so we'd
267  * better have arrived at the expected starting TLI. If not,
268  * something's gone horribly wrong.
269  */
270  if (starttli != entry->tli)
271  ereport(ERROR,
272  errmsg("expected start timeline %u but found timeline %u",
273  starttli, entry->tli));
274  }
275 
276  AppendToManifest(manifest,
277  "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
278  first_wal_range ? "" : ",\n",
279  entry->tli,
280  (uint32) (tl_beginptr >> 32), (uint32) tl_beginptr,
281  (uint32) (endptr >> 32), (uint32) endptr);
282 
283  if (starttli == entry->tli)
284  {
285  found_start_timeline = true;
286  break;
287  }
288 
289  endptr = entry->begin;
290  first_wal_range = false;
291  }
292 
293  /*
294  * The last entry in the timeline history for the ending timeline should
295  * be the ending timeline itself. Verify that this is what we observed.
296  */
297  if (!found_start_timeline)
298  ereport(ERROR,
299  errmsg("start timeline %u not found in history of timeline %u",
300  starttli, endtli));
301 
302  /* Terminate the list of WAL ranges. */
303  AppendStringToManifest(manifest, "\n],\n");
304 }
305 
306 /*
307  * Finalize the backup manifest, and send it to the client.
308  */
309 void
311 {
312  StringInfoData protobuf;
313  uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
314  char *checksumstringbuf;
315  size_t manifest_bytes_done = 0;
316  uint64 dstlen;
317 
318  if (!IsManifestEnabled(manifest))
319  return;
320 
321  /*
322  * Append manifest checksum, so that the problems with the manifest itself
323  * can be detected.
324  *
325  * We always use SHA-256 for this, regardless of what algorithm is chosen
326  * for checksumming the files. If we ever want to make the checksum
327  * algorithm used for the manifest file variable, the client will need a
328  * way to figure out which algorithm to use as close to the beginning of
329  * the manifest file as possible, to avoid having to read the whole thing
330  * twice.
331  */
332  manifest->still_checksumming = false;
333  if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf) < 0)
334  elog(ERROR, "failed to finalize checksum of backup manifest");
335  AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
337  checksumstringbuf = palloc0(dstlen + 1); /* includes \0 */
338  pg_hex_encode((char *) checksumbuf, sizeof checksumbuf,
339  checksumstringbuf, dstlen);
340  checksumstringbuf[dstlen] = '\0';
341  AppendStringToManifest(manifest, checksumstringbuf);
342  AppendStringToManifest(manifest, "\"}\n");
343 
344  /*
345  * We've written all the data to the manifest file. Rewind the file so
346  * that we can read it all back.
347  */
348  if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
349  ereport(ERROR,
351  errmsg("could not rewind temporary file")));
352 
353  /* Send CopyOutResponse message */
354  pq_beginmessage(&protobuf, 'H');
355  pq_sendbyte(&protobuf, 0); /* overall format */
356  pq_sendint16(&protobuf, 0); /* natts */
357  pq_endmessage(&protobuf);
358 
359  /*
360  * Send CopyData messages.
361  *
362  * We choose to read back the data from the temporary file in chunks of
363  * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
364  * size, so it seems to make sense to match that value here.
365  */
366  while (manifest_bytes_done < manifest->manifest_size)
367  {
368  char manifestbuf[BLCKSZ];
369  size_t bytes_to_read;
370  size_t rc;
371 
372  bytes_to_read = Min(sizeof(manifestbuf),
373  manifest->manifest_size - manifest_bytes_done);
374  rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
375  if (rc != bytes_to_read)
376  ereport(ERROR,
378  errmsg("could not read from temporary file: %m")));
379  pq_putmessage('d', manifestbuf, bytes_to_read);
380  manifest_bytes_done += bytes_to_read;
381  }
382 
383  /* No more data, so send CopyDone message */
384  pq_putemptymessage('c');
385 
386  /* Release resources */
387  BufFileClose(manifest->buffile);
388 }
389 
390 /*
391  * Append a cstring to the manifest.
392  */
393 static void
395 {
396  int len = strlen(s);
397 
398  Assert(manifest != NULL);
399  if (manifest->still_checksumming)
400  {
401  if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
402  elog(ERROR, "failed to update checksum of backup manifest");
403  }
404  BufFileWrite(manifest->buffile, s, len);
405  manifest->manifest_size += len;
406 }
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:128
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
void InitializeBackupManifest(backup_manifest_info *manifest, backup_manifest_option want_manifest, pg_checksum_type manifest_checksum_type)
char * pg_checksum_type_name(pg_checksum_type type)
uint32 TimeLineID
Definition: xlogdefs.h:52
int64 pg_time_t
Definition: pgtime.h:23
void escape_json(StringInfo buf, const char *str)
Definition: json.c:1279
struct pg_tm * pg_gmtime(const pg_time_t *timep)
Definition: localtime.c:1387
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:650
uint64 pg_hex_enc_len(size_t srclen)
Definition: hex.c:175
#define Min(x, y)
Definition: c.h:974
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76
#define PG_SHA256_DIGEST_LENGTH
Definition: sha2.h:22
unsigned char uint8
Definition: c.h:427
void pq_putemptymessage(char msgtype)
Definition: pqformat.c:390
TimeLineID tli
Definition: timeline.h:27
#define PG_CHECKSUM_MAX_LENGTH
void BufFileClose(BufFile *file)
Definition: buffile.c:395
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1457
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
#define AppendToManifest(manifest,...)
void pfree(void *pointer)
Definition: mcxt.c:1057
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
#define ERROR
Definition: elog.h:45
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:188
#define MAXPGPATH
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
static char * buf
Definition: pg_test_fsync.c:68
void SendBackupManifest(backup_manifest_info *manifest)
int errcode_for_file_access(void)
Definition: elog.c:727
enum manifest_option backup_manifest_option
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
unsigned int uint32
Definition: c.h:429
static void AppendStringToManifest(backup_manifest_info *manifest, char *s)
void FreeBackupManifest(backup_manifest_info *manifest)
pg_checksum_type
int pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest)
Definition: cryptohash.c:160
void AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr, TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli)
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 pg_hex_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
Definition: hex.c:74
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
void * palloc0(Size size)
Definition: mcxt.c:981
static bool IsManifestEnabled(backup_manifest_info *manifest)
pg_checksum_type checksum_type
#define ereport(elevel,...)
Definition: elog.h:155
pg_cryptohash_ctx * pg_cryptohash_create(pg_cryptohash_type type)
Definition: cryptohash.c:64
XLogRecPtr end
Definition: timeline.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:792
#define lfirst(lc)
Definition: pg_list.h:169
static bool manifest
int pg_cryptohash_init(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:90
pg_cryptohash_ctx * manifest_ctx
int pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
Definition: cryptohash.c:125
int errmsg(const char *fmt,...)
Definition: elog.c:915
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
XLogRecPtr begin
Definition: timeline.h:28
#define elog(elevel,...)
Definition: elog.h:228
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:543
void AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid, const char *pathname, size_t size, pg_time_t mtime, pg_checksum_context *checksum_ctx)
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:42
void BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:586
void pg_cryptohash_free(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:193
pg_checksum_type type
Definition: pg_list.h:50
#define snprintf
Definition: port.h:215
static XLogRecPtr startptr
Definition: basebackup.c:116