PostgreSQL Source Code  git master
bbstreamer_tar.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * bbstreamer_tar.c
4  *
5  * This module implements three types of tar processing. A tar parser
6  * expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits
7  * it into labelled chunks (any other value of bbstreamer_archive_context).
8  * A tar archiver does the reverse: it takes a bunch of labelled chunks
9  * and produces a tarfile, optionally replacing member headers and trailers
10  * so that upstream bbstreamer objects can perform surgery on the tarfile
11  * contents without knowing the details of the tar format. A tar terminator
12  * just adds two blocks of NUL bytes to the end of the file, since older
13  * server versions produce files with this terminator omitted.
14  *
15  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
16  *
17  * IDENTIFICATION
18  * src/bin/pg_basebackup/bbstreamer_tar.c
19  *-------------------------------------------------------------------------
20  */
21 
22 #include "postgres_fe.h"
23 
24 #include <time.h>
25 
26 #include "bbstreamer.h"
27 #include "common/logging.h"
28 #include "pgtar.h"
29 
30 typedef struct bbstreamer_tar_parser
31 {
38 
40 {
44 
45 static void bbstreamer_tar_parser_content(bbstreamer *streamer,
46  bbstreamer_member *member,
47  const char *data, int len,
49 static void bbstreamer_tar_parser_finalize(bbstreamer *streamer);
50 static void bbstreamer_tar_parser_free(bbstreamer *streamer);
51 static bool bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer);
52 
57 };
58 
59 static void bbstreamer_tar_archiver_content(bbstreamer *streamer,
60  bbstreamer_member *member,
61  const char *data, int len,
63 static void bbstreamer_tar_archiver_finalize(bbstreamer *streamer);
64 static void bbstreamer_tar_archiver_free(bbstreamer *streamer);
65 
70 };
71 
72 static void bbstreamer_tar_terminator_content(bbstreamer *streamer,
73  bbstreamer_member *member,
74  const char *data, int len,
76 static void bbstreamer_tar_terminator_finalize(bbstreamer *streamer);
77 static void bbstreamer_tar_terminator_free(bbstreamer *streamer);
78 
83 };
84 
85 /*
86  * Create a bbstreamer that can parse a stream of content as tar data.
87  *
88  * The input should be a series of BBSTREAMER_UNKNOWN chunks; the bbstreamer
89  * specified by 'next' will receive a series of typed chunks, as per the
90  * conventions described in bbstreamer.h.
91  */
92 extern bbstreamer *
94 {
95  bbstreamer_tar_parser *streamer;
96 
97  streamer = palloc0(sizeof(bbstreamer_tar_parser));
98  *((const bbstreamer_ops **) &streamer->base.bbs_ops) =
100  streamer->base.bbs_next = next;
101  initStringInfo(&streamer->base.bbs_buffer);
103 
104  return &streamer->base;
105 }
106 
107 /*
108  * Parse unknown content as tar data.
109  */
110 static void
112  const char *data, int len,
114 {
115  bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
116  size_t nbytes;
117 
118  /* Expect unparsed input. */
119  Assert(member == NULL);
120  Assert(context == BBSTREAMER_UNKNOWN);
121 
122  while (len > 0)
123  {
124  switch (mystreamer->next_context)
125  {
127 
128  /*
129  * If we're expecting an archive member header, accumulate a
130  * full block of data before doing anything further.
131  */
132  if (!bbstreamer_buffer_until(streamer, &data, &len,
134  return;
135 
136  /*
137  * Now we can process the header and get ready to process the
138  * file contents; however, we might find out that what we
139  * thought was the next file header is actually the start of
140  * the archive trailer. Switch modes accordingly.
141  */
142  if (bbstreamer_tar_header(mystreamer))
143  {
144  if (mystreamer->member.size == 0)
145  {
146  /* No content; trailer is zero-length. */
147  bbstreamer_content(mystreamer->base.bbs_next,
148  &mystreamer->member,
149  NULL, 0,
151 
152  /* Expect next header. */
154  }
155  else
156  {
157  /* Expect contents. */
159  }
160  mystreamer->base.bbs_buffer.len = 0;
161  mystreamer->file_bytes_sent = 0;
162  }
163  else
165  break;
166 
168 
169  /*
170  * Send as much content as we have, but not more than the
171  * remaining file length.
172  */
173  Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174  nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175  nbytes = Min(nbytes, len);
176  Assert(nbytes > 0);
177  bbstreamer_content(mystreamer->base.bbs_next,
178  &mystreamer->member,
179  data, nbytes,
181  mystreamer->file_bytes_sent += nbytes;
182  data += nbytes;
183  len -= nbytes;
184 
185  /*
186  * If we've not yet sent the whole file, then there's more
187  * content to come; otherwise, it's time to expect the file
188  * trailer.
189  */
190  Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191  if (mystreamer->file_bytes_sent == mystreamer->member.size)
192  {
193  if (mystreamer->pad_bytes_expected == 0)
194  {
195  /* Trailer is zero-length. */
196  bbstreamer_content(mystreamer->base.bbs_next,
197  &mystreamer->member,
198  NULL, 0,
200 
201  /* Expect next header. */
203  }
204  else
205  {
206  /* Trailer is not zero-length. */
208  }
209  mystreamer->base.bbs_buffer.len = 0;
210  }
211  break;
212 
214 
215  /*
216  * If we're expecting an archive member trailer, accumulate
217  * the expected number of padding bytes before sending
218  * anything onward.
219  */
220  if (!bbstreamer_buffer_until(streamer, &data, &len,
221  mystreamer->pad_bytes_expected))
222  return;
223 
224  /* OK, now we can send it. */
225  bbstreamer_content(mystreamer->base.bbs_next,
226  &mystreamer->member,
227  data, mystreamer->pad_bytes_expected,
229 
230  /* Expect next file header. */
232  mystreamer->base.bbs_buffer.len = 0;
233  break;
234 
236 
237  /*
238  * We've seen an end-of-archive indicator, so anything more is
239  * buffered and sent as part of the archive trailer. But we
240  * don't expect more than 2 blocks.
241  */
242  bbstreamer_buffer_bytes(streamer, &data, &len, len);
243  if (len > 2 * TAR_BLOCK_SIZE)
244  pg_fatal("tar file trailer exceeds 2 blocks");
245  return;
246 
247  default:
248  /* Shouldn't happen. */
249  pg_fatal("unexpected state while parsing tar archive");
250  }
251  }
252 }
253 
254 /*
255  * Parse a file header within a tar stream.
256  *
257  * The return value is true if we found a file header and passed it on to the
258  * next bbstreamer; it is false if we have reached the archive trailer.
259  */
260 static bool
262 {
263  bool has_nonzero_byte = false;
264  int i;
265  bbstreamer_member *member = &mystreamer->member;
266  char *buffer = mystreamer->base.bbs_buffer.data;
267 
268  Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
269 
270  /* Check whether we've got a block of all zero bytes. */
271  for (i = 0; i < TAR_BLOCK_SIZE; ++i)
272  {
273  if (buffer[i] != '\0')
274  {
275  has_nonzero_byte = true;
276  break;
277  }
278  }
279 
280  /*
281  * If the entire block was zeros, this is the end of the archive, not the
282  * start of the next file.
283  */
284  if (!has_nonzero_byte)
285  return false;
286 
287  /*
288  * Parse key fields out of the header.
289  *
290  * FIXME: It's terrible that we use hard-coded values here instead of some
291  * more principled approach. It's been like this for a long time, but we
292  * ought to do better.
293  */
294  strlcpy(member->pathname, &buffer[0], MAXPGPATH);
295  if (member->pathname[0] == '\0')
296  pg_fatal("tar member has empty name");
297  member->size = read_tar_number(&buffer[124], 12);
298  member->mode = read_tar_number(&buffer[100], 8);
299  member->uid = read_tar_number(&buffer[108], 8);
300  member->gid = read_tar_number(&buffer[116], 8);
301  member->is_directory = (buffer[156] == '5');
302  member->is_link = (buffer[156] == '2');
303  if (member->is_link)
304  strlcpy(member->linktarget, &buffer[157], 100);
305 
306  /* Compute number of padding bytes. */
307  mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
308 
309  /* Forward the entire header to the next bbstreamer. */
310  bbstreamer_content(mystreamer->base.bbs_next, member,
311  buffer, TAR_BLOCK_SIZE,
313 
314  return true;
315 }
316 
317 /*
318  * End-of-stream processing for a tar parser.
319  */
320 static void
322 {
323  bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
324 
325  if (mystreamer->next_context != BBSTREAMER_ARCHIVE_TRAILER &&
326  (mystreamer->next_context != BBSTREAMER_MEMBER_HEADER ||
327  mystreamer->base.bbs_buffer.len > 0))
328  pg_fatal("COPY stream ended before last file was finished");
329 
330  /* Send the archive trailer, even if empty. */
331  bbstreamer_content(streamer->bbs_next, NULL,
332  streamer->bbs_buffer.data, streamer->bbs_buffer.len,
334 
335  /* Now finalize successor. */
336  bbstreamer_finalize(streamer->bbs_next);
337 }
338 
339 /*
340  * Free memory associated with a tar parser.
341  */
342 static void
344 {
345  pfree(streamer->bbs_buffer.data);
346  bbstreamer_free(streamer->bbs_next);
347 }
348 
349 /*
350  * Create an bbstreamer that can generate a tar archive.
351  *
352  * This is intended to be usable either for generating a brand-new tar archive
353  * or for modifying one on the fly. The input should be a series of typed
354  * chunks (i.e. not BBSTREAMER_UNKNOWN). See also the comments for
355  * bbstreamer_tar_parser_content.
356  */
357 extern bbstreamer *
359 {
360  bbstreamer_tar_archiver *streamer;
361 
362  streamer = palloc0(sizeof(bbstreamer_tar_archiver));
363  *((const bbstreamer_ops **) &streamer->base.bbs_ops) =
365  streamer->base.bbs_next = next;
366 
367  return &streamer->base;
368 }
369 
370 /*
371  * Fix up the stream of input chunks to create a valid tar file.
372  *
373  * If a BBSTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
374  * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
375  * passed through without change. Any other size is a fatal error (and
376  * indicates a bug).
377  *
378  * Whenever a new BBSTREAMER_MEMBER_HEADER chunk is constructed, the
379  * corresponding BBSTREAMER_MEMBER_TRAILER chunk is also constructed from
380  * scratch. Specifically, we construct a block of zero bytes sufficient to
381  * pad out to a block boundary, as required by the tar format. Other
382  * BBSTREAMER_MEMBER_TRAILER chunks are passed through without change.
383  *
384  * Any BBSTREAMER_MEMBER_CONTENTS chunks are passed through without change.
385  *
386  * The BBSTREAMER_ARCHIVE_TRAILER chunk is replaced with two
387  * blocks of zero bytes. Not all tar programs require this, but apparently
388  * some do. The server does not supply this trailer. If no archive trailer is
389  * present, one will be added by bbstreamer_tar_parser_finalize.
390  */
391 static void
393  bbstreamer_member *member,
394  const char *data, int len,
396 {
397  bbstreamer_tar_archiver *mystreamer = (bbstreamer_tar_archiver *) streamer;
398  char buffer[2 * TAR_BLOCK_SIZE];
399 
400  Assert(context != BBSTREAMER_UNKNOWN);
401 
402  if (context == BBSTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
403  {
404  Assert(len == 0);
405 
406  /* Replace zero-length tar header with a newly constructed one. */
407  tarCreateHeader(buffer, member->pathname, NULL,
408  member->size, member->mode, member->uid, member->gid,
409  time(NULL));
410  data = buffer;
412 
413  /* Also make a note to replace padding, in case size changed. */
414  mystreamer->rearchive_member = true;
415  }
416  else if (context == BBSTREAMER_MEMBER_TRAILER &&
417  mystreamer->rearchive_member)
418  {
419  int pad_bytes = tarPaddingBytesRequired(member->size);
420 
421  /* Also replace padding, if we regenerated the header. */
422  memset(buffer, 0, pad_bytes);
423  data = buffer;
424  len = pad_bytes;
425 
426  /* Don't do this again unless we replace another header. */
427  mystreamer->rearchive_member = false;
428  }
429  else if (context == BBSTREAMER_ARCHIVE_TRAILER)
430  {
431  /* Trailer should always be two blocks of zero bytes. */
432  memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
433  data = buffer;
434  len = 2 * TAR_BLOCK_SIZE;
435  }
436 
437  bbstreamer_content(streamer->bbs_next, member, data, len, context);
438 }
439 
440 /*
441  * End-of-stream processing for a tar archiver.
442  */
443 static void
445 {
446  bbstreamer_finalize(streamer->bbs_next);
447 }
448 
449 /*
450  * Free memory associated with a tar archiver.
451  */
452 static void
454 {
455  bbstreamer_free(streamer->bbs_next);
456  pfree(streamer);
457 }
458 
459 /*
460  * Create a bbstreamer that blindly adds two blocks of NUL bytes to the
461  * end of an incomplete tarfile that the server might send us.
462  */
463 bbstreamer *
465 {
466  bbstreamer *streamer;
467 
468  streamer = palloc0(sizeof(bbstreamer));
469  *((const bbstreamer_ops **) &streamer->bbs_ops) =
471  streamer->bbs_next = next;
472 
473  return streamer;
474 }
475 
476 /*
477  * Pass all the content through without change.
478  */
479 static void
481  bbstreamer_member *member,
482  const char *data, int len,
484 {
485  /* Expect unparsed input. */
486  Assert(member == NULL);
487  Assert(context == BBSTREAMER_UNKNOWN);
488 
489  /* Just forward it. */
490  bbstreamer_content(streamer->bbs_next, member, data, len, context);
491 }
492 
493 /*
494  * At the end, blindly add the two blocks of NUL bytes which the server fails
495  * to supply.
496  */
497 static void
499 {
500  char buffer[2 * TAR_BLOCK_SIZE];
501 
502  memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
503  bbstreamer_content(streamer->bbs_next, NULL, buffer,
505  bbstreamer_finalize(streamer->bbs_next);
506 }
507 
508 /*
509  * Free memory associated with a tar terminator.
510  */
511 static void
513 {
514  bbstreamer_free(streamer->bbs_next);
515  pfree(streamer);
516 }
static void bbstreamer_content(bbstreamer *streamer, bbstreamer_member *member, const char *data, int len, bbstreamer_archive_context context)
Definition: bbstreamer.h:126
static void bbstreamer_finalize(bbstreamer *streamer)
Definition: bbstreamer.h:136
static void bbstreamer_buffer_bytes(bbstreamer *streamer, const char **data, int *len, int nbytes)
Definition: bbstreamer.h:157
static bool bbstreamer_buffer_until(bbstreamer *streamer, const char **data, int *len, int target_bytes)
Definition: bbstreamer.h:175
bbstreamer_archive_context
Definition: bbstreamer.h:54
@ BBSTREAMER_ARCHIVE_TRAILER
Definition: bbstreamer.h:59
@ BBSTREAMER_MEMBER_HEADER
Definition: bbstreamer.h:56
@ BBSTREAMER_MEMBER_TRAILER
Definition: bbstreamer.h:58
@ BBSTREAMER_UNKNOWN
Definition: bbstreamer.h:55
@ BBSTREAMER_MEMBER_CONTENTS
Definition: bbstreamer.h:57
static void bbstreamer_free(bbstreamer *streamer)
Definition: bbstreamer.h:144
static void bbstreamer_tar_parser_content(bbstreamer *streamer, bbstreamer_member *member, const char *data, int len, bbstreamer_archive_context context)
const bbstreamer_ops bbstreamer_tar_terminator_ops
struct bbstreamer_tar_archiver bbstreamer_tar_archiver
struct bbstreamer_tar_parser bbstreamer_tar_parser
static void bbstreamer_tar_parser_finalize(bbstreamer *streamer)
static void bbstreamer_tar_terminator_free(bbstreamer *streamer)
bbstreamer * bbstreamer_tar_terminator_new(bbstreamer *next)
static void bbstreamer_tar_archiver_finalize(bbstreamer *streamer)
static void bbstreamer_tar_parser_free(bbstreamer *streamer)
bbstreamer * bbstreamer_tar_parser_new(bbstreamer *next)
const bbstreamer_ops bbstreamer_tar_parser_ops
static void bbstreamer_tar_archiver_free(bbstreamer *streamer)
static bool bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer)
static void bbstreamer_tar_archiver_content(bbstreamer *streamer, bbstreamer_member *member, const char *data, int len, bbstreamer_archive_context context)
static void bbstreamer_tar_terminator_content(bbstreamer *streamer, bbstreamer_member *member, const char *data, int len, bbstreamer_archive_context context)
bbstreamer * bbstreamer_tar_archiver_new(bbstreamer *next)
const bbstreamer_ops bbstreamer_tar_archiver_ops
static void bbstreamer_tar_terminator_finalize(bbstreamer *streamer)
static int32 next
Definition: blutils.c:219
#define Min(x, y)
Definition: c.h:988
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc0(Size size)
Definition: mcxt.c:1257
#define pg_fatal(...)
#define MAXPGPATH
const void size_t len
const void * data
uint64 read_tar_number(const char *s, int len)
Definition: tar.c:58
static size_t tarPaddingBytesRequired(size_t len)
Definition: pgtar.h:40
enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime)
Definition: tar.c:114
#define TAR_BLOCK_SIZE
Definition: pgtar.h:17
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
char pathname[MAXPGPATH]
Definition: bbstreamer.h:72
char linktarget[MAXPGPATH]
Definition: bbstreamer.h:79
void(* content)(bbstreamer *streamer, bbstreamer_member *member, const char *data, int len, bbstreamer_archive_context context)
Definition: bbstreamer.h:117
bbstreamer_archive_context next_context
bbstreamer_member member
const bbstreamer_ops * bbs_ops
Definition: bbstreamer.h:100
StringInfoData bbs_buffer
Definition: bbstreamer.h:102
bbstreamer * bbs_next
Definition: bbstreamer.h:101