PostgreSQL Source Code  git master
backup_manifest.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * backup_manifest.c
4  * code for generating and sending a backup manifest
5  *
6  * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/replication/backup_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/timeline.h"
16 #include "libpq/libpq.h"
17 #include "libpq/pqformat.h"
18 #include "mb/pg_wchar.h"
20 #include "utils/builtins.h"
21 #include "utils/json.h"
22 
24 
25 /*
26  * Does the user want a backup manifest?
27  *
28  * It's simplest to always have a manifest_info object, so that we don't need
29  * checks for NULL pointers in too many places. However, if the user doesn't
30  * want a manifest, we set manifest->buffile to NULL.
31  */
32 static inline bool
34 {
35  return (manifest->buffile != NULL);
36 }
37 
38 /*
39  * Convenience macro for appending data to the backup manifest.
40  */
41 #define AppendToManifest(manifest, ...) \
42  { \
43  char *_manifest_s = psprintf(__VA_ARGS__); \
44  AppendStringToManifest(manifest, _manifest_s); \
45  pfree(_manifest_s); \
46  }
47 
48 /*
49  * Initialize state so that we can construct a backup manifest.
50  *
51  * NB: Although the checksum type for the data files is configurable, the
52  * checksum for the manifest itself always uses SHA-256. See comments in
53  * SendBackupManifest.
54  */
55 void
57  backup_manifest_option want_manifest,
58  pg_checksum_type manifest_checksum_type)
59 {
60  if (want_manifest == MANIFEST_OPTION_NO)
61  manifest->buffile = NULL;
62  else
63  manifest->buffile = BufFileCreateTemp(false);
64  manifest->checksum_type = manifest_checksum_type;
65  pg_sha256_init(&manifest->manifest_ctx);
66  manifest->manifest_size = UINT64CONST(0);
67  manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
68  manifest->first_file = true;
69  manifest->still_checksumming = true;
70 
71  if (want_manifest != MANIFEST_OPTION_NO)
72  AppendToManifest(manifest,
73  "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
74  "\"Files\": [");
75 }
76 
77 /*
78  * Add an entry to the backup manifest for a file.
79  */
80 void
82  const char *pathname, size_t size, pg_time_t mtime,
83  pg_checksum_context *checksum_ctx)
84 {
85  char pathbuf[MAXPGPATH];
86  int pathlen;
88 
89  if (!IsManifestEnabled(manifest))
90  return;
91 
92  /*
93  * If this file is part of a tablespace, the pathname passed to this
94  * function will be relative to the tar file that contains it. We want the
95  * pathname relative to the data directory (ignoring the intermediate
96  * symlink traversal).
97  */
98  if (spcoid != NULL)
99  {
100  snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
101  pathname);
102  pathname = pathbuf;
103  }
104 
105  /*
106  * Each file's entry needs to be separated from any entry that follows by
107  * a comma, but there's no comma before the first one or after the last
108  * one. To make that work, adding a file to the manifest starts by
109  * terminating the most recently added line, with a comma if appropriate,
110  * but does not terminate the line inserted for this file.
111  */
112  initStringInfo(&buf);
113  if (manifest->first_file)
114  {
115  appendStringInfoChar(&buf, '\n');
116  manifest->first_file = false;
117  }
118  else
119  appendStringInfoString(&buf, ",\n");
120 
121  /*
122  * Write the relative pathname to this file out to the manifest. The
123  * manifest is always stored in UTF-8, so we have to encode paths that are
124  * not valid in that encoding.
125  */
126  pathlen = strlen(pathname);
127  if (!manifest->force_encode &&
128  pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
129  {
130  appendStringInfoString(&buf, "{ \"Path\": ");
131  escape_json(&buf, pathname);
132  appendStringInfoString(&buf, ", ");
133  }
134  else
135  {
136  appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
137  enlargeStringInfo(&buf, 2 * pathlen);
138  buf.len += hex_encode(pathname, pathlen,
139  &buf.data[buf.len]);
140  appendStringInfoString(&buf, "\", ");
141  }
142 
143  appendStringInfo(&buf, "\"Size\": %zu, ", size);
144 
145  /*
146  * Convert last modification time to a string and append it to the
147  * manifest. Since it's not clear what time zone to use and since time
148  * zone definitions can change, possibly causing confusion, use GMT
149  * always.
150  */
151  appendStringInfoString(&buf, "\"Last-Modified\": \"");
152  enlargeStringInfo(&buf, 128);
153  buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
154  pg_gmtime(&mtime));
155  appendStringInfoChar(&buf, '"');
156 
157  /* Add checksum information. */
158  if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
159  {
160  uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
161  int checksumlen;
162 
163  checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
164 
165  appendStringInfo(&buf,
166  ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
167  pg_checksum_type_name(checksum_ctx->type));
168  enlargeStringInfo(&buf, 2 * checksumlen);
169  buf.len += hex_encode((char *) checksumbuf, checksumlen,
170  &buf.data[buf.len]);
171  appendStringInfoChar(&buf, '"');
172  }
173 
174  /* Close out the object. */
175  appendStringInfoString(&buf, " }");
176 
177  /* OK, add it to the manifest. */
178  AppendStringToManifest(manifest, buf.data);
179 
180  /* Avoid leaking memory. */
181  pfree(buf.data);
182 }
183 
184 /*
185  * Add information about the WAL that will need to be replayed when restoring
186  * this backup to the manifest.
187  */
188 void
190  TimeLineID starttli, XLogRecPtr endptr,
191  TimeLineID endtli)
192 {
193  List *timelines;
194  ListCell *lc;
195  bool first_wal_range = true;
196  bool found_start_timeline = false;
197 
198  if (!IsManifestEnabled(manifest))
199  return;
200 
201  /* Terminate the list of files. */
202  AppendStringToManifest(manifest, "\n],\n");
203 
204  /* Read the timeline history for the ending timeline. */
205  timelines = readTimeLineHistory(endtli);
206 
207  /* Start a list of LSN ranges. */
208  AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
209 
210  foreach(lc, timelines)
211  {
212  TimeLineHistoryEntry *entry = lfirst(lc);
213  XLogRecPtr tl_beginptr;
214 
215  /*
216  * We only care about timelines that were active during the backup.
217  * Skip any that ended before the backup started. (Note that if
218  * entry->end is InvalidXLogRecPtr, it means that the timeline has not
219  * yet ended.)
220  */
221  if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
222  continue;
223 
224  /*
225  * Because the timeline history file lists newer timelines before
226  * older ones, the first timeline we encounter that is new enough to
227  * matter ought to match the ending timeline of the backup.
228  */
229  if (first_wal_range && endtli != entry->tli)
230  ereport(ERROR,
231  errmsg("expected end timeline %u but found timeline %u",
232  starttli, entry->tli));
233 
234  if (!XLogRecPtrIsInvalid(entry->begin))
235  tl_beginptr = entry->begin;
236  else
237  {
238  tl_beginptr = startptr;
239 
240  /*
241  * If we reach a TLI that has no valid beginning LSN, there can't
242  * be any more timelines in the history after this point, so we'd
243  * better have arrived at the expected starting TLI. If not,
244  * something's gone horribly wrong.
245  */
246  if (starttli != entry->tli)
247  ereport(ERROR,
248  errmsg("expected start timeline %u but found timeline %u",
249  starttli, entry->tli));
250  }
251 
252  AppendToManifest(manifest,
253  "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
254  first_wal_range ? "" : ",\n",
255  entry->tli,
256  (uint32) (tl_beginptr >> 32), (uint32) tl_beginptr,
257  (uint32) (endptr >> 32), (uint32) endptr);
258 
259  if (starttli == entry->tli)
260  {
261  found_start_timeline = true;
262  break;
263  }
264 
265  endptr = entry->begin;
266  first_wal_range = false;
267  }
268 
269  /*
270  * The last entry in the timeline history for the ending timeline should
271  * be the ending timeline itself. Verify that this is what we observed.
272  */
273  if (!found_start_timeline)
274  ereport(ERROR,
275  errmsg("start timeline %u not found in history of timeline %u",
276  starttli, endtli));
277 
278  /* Terminate the list of WAL ranges. */
279  AppendStringToManifest(manifest, "\n],\n");
280 }
281 
282 /*
283  * Finalize the backup manifest, and send it to the client.
284  */
285 void
287 {
288  StringInfoData protobuf;
289  uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
290  char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
291  size_t manifest_bytes_done = 0;
292 
293  if (!IsManifestEnabled(manifest))
294  return;
295 
296  /*
297  * Append manifest checksum, so that the problems with the manifest itself
298  * can be detected.
299  *
300  * We always use SHA-256 for this, regardless of what algorithm is chosen
301  * for checksumming the files. If we ever want to make the checksum
302  * algorithm used for the manifest file variable, the client will need a
303  * way to figure out which algorithm to use as close to the beginning of
304  * the manifest file as possible, to avoid having to read the whole thing
305  * twice.
306  */
307  manifest->still_checksumming = false;
308  pg_sha256_final(&manifest->manifest_ctx, checksumbuf);
309  AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
310  hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
311  checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
312  AppendStringToManifest(manifest, checksumstringbuf);
313  AppendStringToManifest(manifest, "\"}\n");
314 
315  /*
316  * We've written all the data to the manifest file. Rewind the file so
317  * that we can read it all back.
318  */
319  if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
320  ereport(ERROR,
322  errmsg("could not rewind temporary file")));
323 
324  /* Send CopyOutResponse message */
325  pq_beginmessage(&protobuf, 'H');
326  pq_sendbyte(&protobuf, 0); /* overall format */
327  pq_sendint16(&protobuf, 0); /* natts */
328  pq_endmessage(&protobuf);
329 
330  /*
331  * Send CopyData messages.
332  *
333  * We choose to read back the data from the temporary file in chunks of
334  * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
335  * size, so it seems to make sense to match that value here.
336  */
337  while (manifest_bytes_done < manifest->manifest_size)
338  {
339  char manifestbuf[BLCKSZ];
340  size_t bytes_to_read;
341  size_t rc;
342 
343  bytes_to_read = Min(sizeof(manifestbuf),
344  manifest->manifest_size - manifest_bytes_done);
345  rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
346  if (rc != bytes_to_read)
347  ereport(ERROR,
349  errmsg("could not read from temporary file: %m")));
350  pq_putmessage('d', manifestbuf, bytes_to_read);
351  manifest_bytes_done += bytes_to_read;
352  }
353 
354  /* No more data, so send CopyDone message */
355  pq_putemptymessage('c');
356 
357  /* Release resources */
358  BufFileClose(manifest->buffile);
359 }
360 
361 /*
362  * Append a cstring to the manifest.
363  */
364 static void
366 {
367  int len = strlen(s);
368 
369  Assert(manifest != NULL);
370  if (manifest->still_checksumming)
371  pg_sha256_update(&manifest->manifest_ctx, (uint8 *) s, len);
372  BufFileWrite(manifest->buffile, s, len);
373  manifest->manifest_size += len;
374 }
size_t pg_strftime(char *s, size_t max, const char *format, const struct pg_tm *tm)
Definition: strftime.c:128
static void pq_sendint16(StringInfo buf, uint16 i)
Definition: pqformat.h:137
void InitializeBackupManifest(backup_manifest_info *manifest, backup_manifest_option want_manifest, pg_checksum_type manifest_checksum_type)
char * pg_checksum_type_name(pg_checksum_type type)
uint32 TimeLineID
Definition: xlogdefs.h:52
int64 pg_time_t
Definition: pgtime.h:23
void escape_json(StringInfo buf, const char *str)
Definition: json.c:1279
struct pg_tm * pg_gmtime(const pg_time_t *timep)
Definition: localtime.c:1387
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:650
#define Min(x, y)
Definition: c.h:928
List * readTimeLineHistory(TimeLineID targetTLI)
Definition: timeline.c:76
#define PG_SHA256_DIGEST_LENGTH
Definition: sha2.h:62
unsigned char uint8
Definition: c.h:373
void pq_putemptymessage(char msgtype)
Definition: pqformat.c:390
TimeLineID tli
Definition: timeline.h:27
#define PG_CHECKSUM_MAX_LENGTH
void pg_sha256_init(pg_sha256_ctx *context)
Definition: sha2.c:268
void BufFileClose(BufFile *file)
Definition: buffile.c:395
void pq_beginmessage(StringInfo buf, char msgtype)
Definition: pqformat.c:87
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1457
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:161
#define AppendToManifest(manifest,...)
void pfree(void *pointer)
Definition: mcxt.c:1057
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
#define ERROR
Definition: elog.h:43
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:188
#define MAXPGPATH
pg_sha256_ctx manifest_ctx
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
static char * buf
Definition: pg_test_fsync.c:68
void SendBackupManifest(backup_manifest_info *manifest)
int errcode_for_file_access(void)
Definition: elog.c:633
enum manifest_option backup_manifest_option
void enlargeStringInfo(StringInfo str, int needed)
Definition: stringinfo.c:283
unsigned int uint32
Definition: c.h:375
static void AppendStringToManifest(backup_manifest_info *manifest, char *s)
#define PG_SHA256_DIGEST_STRING_LENGTH
Definition: sha2.h:63
pg_checksum_type
void AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr, TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli)
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition: encode.c:161
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
void pg_sha256_update(pg_sha256_ctx *context, const uint8 *data, size_t len)
Definition: sha2.c:465
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
static bool IsManifestEnabled(backup_manifest_info *manifest)
pg_checksum_type checksum_type
#define ereport(elevel,...)
Definition: elog.h:144
XLogRecPtr end
Definition: timeline.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:746
#define lfirst(lc)
Definition: pg_list.h:169
static bool manifest
int errmsg(const char *fmt,...)
Definition: elog.c:821
void pg_sha256_final(pg_sha256_ctx *context, uint8 *digest)
Definition: sha2.c:566
void pq_endmessage(StringInfo buf)
Definition: pqformat.c:298
XLogRecPtr begin
Definition: timeline.h:28
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:543
void AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid, const char *pathname, size_t size, pg_time_t mtime, pg_checksum_context *checksum_ctx)
#define pq_putmessage(msgtype, s, len)
Definition: libpq.h:42
void BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:586
pg_checksum_type type
Definition: pg_list.h:50
#define snprintf
Definition: port.h:215
static XLogRecPtr startptr
Definition: basebackup.c:116