PostgreSQL Source Code  git master
astreamer_tar.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * astreamer_tar.c
4  *
5  * This module implements three types of tar processing. A tar parser
6  * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
7  * it into labelled chunks (any other value of astreamer_archive_context).
8  * A tar archiver does the reverse: it takes a bunch of labelled chunks
9  * and produces a tarfile, optionally replacing member headers and trailers
10  * so that upstream astreamer objects can perform surgery on the tarfile
11  * contents without knowing the details of the tar format. A tar terminator
12  * just adds two blocks of NUL bytes to the end of the file, since older
13  * server versions produce files with this terminator omitted.
14  *
15  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16  *
17  * IDENTIFICATION
18  * src/bin/pg_basebackup/astreamer_tar.c
19  *-------------------------------------------------------------------------
20  */
21 
22 #include "postgres_fe.h"
23 
24 #include <time.h>
25 
26 #include "common/logging.h"
27 #include "fe_utils/astreamer.h"
28 #include "pgtar.h"
29 
30 typedef struct astreamer_tar_parser
31 {
38 
39 typedef struct astreamer_tar_archiver
40 {
44 
45 static void astreamer_tar_parser_content(astreamer *streamer,
46  astreamer_member *member,
47  const char *data, int len,
49 static void astreamer_tar_parser_finalize(astreamer *streamer);
50 static void astreamer_tar_parser_free(astreamer *streamer);
51 static bool astreamer_tar_header(astreamer_tar_parser *mystreamer);
52 
57 };
58 
59 static void astreamer_tar_archiver_content(astreamer *streamer,
60  astreamer_member *member,
61  const char *data, int len,
63 static void astreamer_tar_archiver_finalize(astreamer *streamer);
64 static void astreamer_tar_archiver_free(astreamer *streamer);
65 
70 };
71 
72 static void astreamer_tar_terminator_content(astreamer *streamer,
73  astreamer_member *member,
74  const char *data, int len,
76 static void astreamer_tar_terminator_finalize(astreamer *streamer);
77 static void astreamer_tar_terminator_free(astreamer *streamer);
78 
83 };
84 
85 /*
86  * Create a astreamer that can parse a stream of content as tar data.
87  *
88  * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
89  * specified by 'next' will receive a series of typed chunks, as per the
90  * conventions described in astreamer.h.
91  */
92 astreamer *
94 {
95  astreamer_tar_parser *streamer;
96 
97  streamer = palloc0(sizeof(astreamer_tar_parser));
98  *((const astreamer_ops **) &streamer->base.bbs_ops) =
100  streamer->base.bbs_next = next;
101  initStringInfo(&streamer->base.bbs_buffer);
103 
104  return &streamer->base;
105 }
106 
107 /*
108  * Parse unknown content as tar data.
109  */
110 static void
112  const char *data, int len,
114 {
115  astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
116  size_t nbytes;
117 
118  /* Expect unparsed input. */
119  Assert(member == NULL);
121 
122  while (len > 0)
123  {
124  switch (mystreamer->next_context)
125  {
127 
128  /*
129  * If we're expecting an archive member header, accumulate a
130  * full block of data before doing anything further.
131  */
132  if (!astreamer_buffer_until(streamer, &data, &len,
134  return;
135 
136  /*
137  * Now we can process the header and get ready to process the
138  * file contents; however, we might find out that what we
139  * thought was the next file header is actually the start of
140  * the archive trailer. Switch modes accordingly.
141  */
142  if (astreamer_tar_header(mystreamer))
143  {
144  if (mystreamer->member.size == 0)
145  {
146  /* No content; trailer is zero-length. */
147  astreamer_content(mystreamer->base.bbs_next,
148  &mystreamer->member,
149  NULL, 0,
151 
152  /* Expect next header. */
154  }
155  else
156  {
157  /* Expect contents. */
159  }
160  mystreamer->base.bbs_buffer.len = 0;
161  mystreamer->file_bytes_sent = 0;
162  }
163  else
165  break;
166 
168 
169  /*
170  * Send as much content as we have, but not more than the
171  * remaining file length.
172  */
173  Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174  nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175  nbytes = Min(nbytes, len);
176  Assert(nbytes > 0);
177  astreamer_content(mystreamer->base.bbs_next,
178  &mystreamer->member,
179  data, nbytes,
181  mystreamer->file_bytes_sent += nbytes;
182  data += nbytes;
183  len -= nbytes;
184 
185  /*
186  * If we've not yet sent the whole file, then there's more
187  * content to come; otherwise, it's time to expect the file
188  * trailer.
189  */
190  Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191  if (mystreamer->file_bytes_sent == mystreamer->member.size)
192  {
193  if (mystreamer->pad_bytes_expected == 0)
194  {
195  /* Trailer is zero-length. */
196  astreamer_content(mystreamer->base.bbs_next,
197  &mystreamer->member,
198  NULL, 0,
200 
201  /* Expect next header. */
203  }
204  else
205  {
206  /* Trailer is not zero-length. */
208  }
209  mystreamer->base.bbs_buffer.len = 0;
210  }
211  break;
212 
214 
215  /*
216  * If we're expecting an archive member trailer, accumulate
217  * the expected number of padding bytes before sending
218  * anything onward.
219  */
220  if (!astreamer_buffer_until(streamer, &data, &len,
221  mystreamer->pad_bytes_expected))
222  return;
223 
224  /* OK, now we can send it. */
225  astreamer_content(mystreamer->base.bbs_next,
226  &mystreamer->member,
227  data, mystreamer->pad_bytes_expected,
229 
230  /* Expect next file header. */
232  mystreamer->base.bbs_buffer.len = 0;
233  break;
234 
236 
237  /*
238  * We've seen an end-of-archive indicator, so anything more is
239  * buffered and sent as part of the archive trailer. But we
240  * don't expect more than 2 blocks.
241  */
242  astreamer_buffer_bytes(streamer, &data, &len, len);
243  if (len > 2 * TAR_BLOCK_SIZE)
244  pg_fatal("tar file trailer exceeds 2 blocks");
245  return;
246 
247  default:
248  /* Shouldn't happen. */
249  pg_fatal("unexpected state while parsing tar archive");
250  }
251  }
252 }
253 
254 /*
255  * Parse a file header within a tar stream.
256  *
257  * The return value is true if we found a file header and passed it on to the
258  * next astreamer; it is false if we have reached the archive trailer.
259  */
260 static bool
262 {
263  bool has_nonzero_byte = false;
264  int i;
265  astreamer_member *member = &mystreamer->member;
266  char *buffer = mystreamer->base.bbs_buffer.data;
267 
268  Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
269 
270  /* Check whether we've got a block of all zero bytes. */
271  for (i = 0; i < TAR_BLOCK_SIZE; ++i)
272  {
273  if (buffer[i] != '\0')
274  {
275  has_nonzero_byte = true;
276  break;
277  }
278  }
279 
280  /*
281  * If the entire block was zeros, this is the end of the archive, not the
282  * start of the next file.
283  */
284  if (!has_nonzero_byte)
285  return false;
286 
287  /*
288  * Parse key fields out of the header.
289  */
290  strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
291  if (member->pathname[0] == '\0')
292  pg_fatal("tar member has empty name");
293  member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
294  member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
295  member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
296  member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
297  member->is_directory =
299  member->is_link =
301  if (member->is_link)
302  strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
303 
304  /* Compute number of padding bytes. */
305  mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
306 
307  /* Forward the entire header to the next astreamer. */
308  astreamer_content(mystreamer->base.bbs_next, member,
309  buffer, TAR_BLOCK_SIZE,
311 
312  return true;
313 }
314 
315 /*
316  * End-of-stream processing for a tar parser.
317  */
318 static void
320 {
321  astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
322 
323  if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
324  (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
325  mystreamer->base.bbs_buffer.len > 0))
326  pg_fatal("COPY stream ended before last file was finished");
327 
328  /* Send the archive trailer, even if empty. */
329  astreamer_content(streamer->bbs_next, NULL,
330  streamer->bbs_buffer.data, streamer->bbs_buffer.len,
332 
333  /* Now finalize successor. */
334  astreamer_finalize(streamer->bbs_next);
335 }
336 
337 /*
338  * Free memory associated with a tar parser.
339  */
340 static void
342 {
343  pfree(streamer->bbs_buffer.data);
344  astreamer_free(streamer->bbs_next);
345 }
346 
347 /*
348  * Create a astreamer that can generate a tar archive.
349  *
350  * This is intended to be usable either for generating a brand-new tar archive
351  * or for modifying one on the fly. The input should be a series of typed
352  * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
353  * astreamer_tar_parser_content.
354  */
355 astreamer *
357 {
358  astreamer_tar_archiver *streamer;
359 
360  streamer = palloc0(sizeof(astreamer_tar_archiver));
361  *((const astreamer_ops **) &streamer->base.bbs_ops) =
363  streamer->base.bbs_next = next;
364 
365  return &streamer->base;
366 }
367 
368 /*
369  * Fix up the stream of input chunks to create a valid tar file.
370  *
371  * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
372  * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
373  * passed through without change. Any other size is a fatal error (and
374  * indicates a bug).
375  *
376  * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
377  * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
378  * scratch. Specifically, we construct a block of zero bytes sufficient to
379  * pad out to a block boundary, as required by the tar format. Other
380  * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
381  *
382  * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
383  *
384  * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
385  * blocks of zero bytes. Not all tar programs require this, but apparently
386  * some do. The server does not supply this trailer. If no archive trailer is
387  * present, one will be added by astreamer_tar_parser_finalize.
388  */
389 static void
391  astreamer_member *member,
392  const char *data, int len,
394 {
395  astreamer_tar_archiver *mystreamer = (astreamer_tar_archiver *) streamer;
396  char buffer[2 * TAR_BLOCK_SIZE];
397 
399 
401  {
402  Assert(len == 0);
403 
404  /* Replace zero-length tar header with a newly constructed one. */
405  tarCreateHeader(buffer, member->pathname, NULL,
406  member->size, member->mode, member->uid, member->gid,
407  time(NULL));
408  data = buffer;
410 
411  /* Also make a note to replace padding, in case size changed. */
412  mystreamer->rearchive_member = true;
413  }
414  else if (context == ASTREAMER_MEMBER_TRAILER &&
415  mystreamer->rearchive_member)
416  {
417  int pad_bytes = tarPaddingBytesRequired(member->size);
418 
419  /* Also replace padding, if we regenerated the header. */
420  memset(buffer, 0, pad_bytes);
421  data = buffer;
422  len = pad_bytes;
423 
424  /* Don't do this again unless we replace another header. */
425  mystreamer->rearchive_member = false;
426  }
428  {
429  /* Trailer should always be two blocks of zero bytes. */
430  memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
431  data = buffer;
432  len = 2 * TAR_BLOCK_SIZE;
433  }
434 
435  astreamer_content(streamer->bbs_next, member, data, len, context);
436 }
437 
438 /*
439  * End-of-stream processing for a tar archiver.
440  */
441 static void
443 {
444  astreamer_finalize(streamer->bbs_next);
445 }
446 
447 /*
448  * Free memory associated with a tar archiver.
449  */
450 static void
452 {
453  astreamer_free(streamer->bbs_next);
454  pfree(streamer);
455 }
456 
457 /*
458  * Create a astreamer that blindly adds two blocks of NUL bytes to the
459  * end of an incomplete tarfile that the server might send us.
460  */
461 astreamer *
463 {
464  astreamer *streamer;
465 
466  streamer = palloc0(sizeof(astreamer));
467  *((const astreamer_ops **) &streamer->bbs_ops) =
469  streamer->bbs_next = next;
470 
471  return streamer;
472 }
473 
474 /*
475  * Pass all the content through without change.
476  */
477 static void
479  astreamer_member *member,
480  const char *data, int len,
482 {
483  /* Expect unparsed input. */
484  Assert(member == NULL);
486 
487  /* Just forward it. */
488  astreamer_content(streamer->bbs_next, member, data, len, context);
489 }
490 
491 /*
492  * At the end, blindly add the two blocks of NUL bytes which the server fails
493  * to supply.
494  */
495 static void
497 {
498  char buffer[2 * TAR_BLOCK_SIZE];
499 
500  memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
501  astreamer_content(streamer->bbs_next, NULL, buffer,
503  astreamer_finalize(streamer->bbs_next);
504 }
505 
506 /*
507  * Free memory associated with a tar terminator.
508  */
509 static void
511 {
512  astreamer_free(streamer->bbs_next);
513  pfree(streamer);
514 }
static void astreamer_free(astreamer *streamer)
Definition: astreamer.h:153
static void astreamer_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition: astreamer.h:135
static bool astreamer_buffer_until(astreamer *streamer, const char **data, int *len, int target_bytes)
Definition: astreamer.h:184
static void astreamer_finalize(astreamer *streamer)
Definition: astreamer.h:145
astreamer_archive_context
Definition: astreamer.h:63
@ ASTREAMER_MEMBER_HEADER
Definition: astreamer.h:65
@ ASTREAMER_MEMBER_CONTENTS
Definition: astreamer.h:66
@ ASTREAMER_MEMBER_TRAILER
Definition: astreamer.h:67
@ ASTREAMER_ARCHIVE_TRAILER
Definition: astreamer.h:68
@ ASTREAMER_UNKNOWN
Definition: astreamer.h:64
static void astreamer_buffer_bytes(astreamer *streamer, const char **data, int *len, int nbytes)
Definition: astreamer.h:166
static const astreamer_ops astreamer_tar_terminator_ops
Definition: astreamer_tar.c:79
static const astreamer_ops astreamer_tar_parser_ops
Definition: astreamer_tar.c:53
struct astreamer_tar_parser astreamer_tar_parser
struct astreamer_tar_archiver astreamer_tar_archiver
static void astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
astreamer * astreamer_tar_parser_new(astreamer *next)
Definition: astreamer_tar.c:93
static void astreamer_tar_terminator_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void astreamer_tar_archiver_free(astreamer *streamer)
astreamer * astreamer_tar_archiver_new(astreamer *next)
astreamer * astreamer_tar_terminator_new(astreamer *next)
static bool astreamer_tar_header(astreamer_tar_parser *mystreamer)
static void astreamer_tar_parser_finalize(astreamer *streamer)
static void astreamer_tar_archiver_finalize(astreamer *streamer)
static const astreamer_ops astreamer_tar_archiver_ops
Definition: astreamer_tar.c:66
static void astreamer_tar_terminator_free(astreamer *streamer)
static void astreamer_tar_terminator_finalize(astreamer *streamer)
static void astreamer_tar_parser_free(astreamer *streamer)
static void astreamer_tar_archiver_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static int32 next
Definition: blutils.c:221
#define Min(x, y)
Definition: c.h:1004
#define Assert(condition)
Definition: c.h:858
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
#define pg_fatal(...)
#define MAXPGPATH
const void size_t len
const void * data
uint64 read_tar_number(const char *s, int len)
Definition: tar.c:58
static size_t tarPaddingBytesRequired(size_t len)
Definition: pgtar.h:79
@ TAR_FILETYPE_SYMLINK
Definition: pgtar.h:61
@ TAR_FILETYPE_DIRECTORY
Definition: pgtar.h:62
@ TAR_OFFSET_MODE
Definition: pgtar.h:40
@ TAR_OFFSET_UID
Definition: pgtar.h:41
@ TAR_OFFSET_TYPEFLAG
Definition: pgtar.h:46
@ TAR_OFFSET_NAME
Definition: pgtar.h:39
@ TAR_OFFSET_SIZE
Definition: pgtar.h:43
@ TAR_OFFSET_GID
Definition: pgtar.h:42
@ TAR_OFFSET_LINKNAME
Definition: pgtar.h:47
enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime)
Definition: tar.c:114
#define TAR_BLOCK_SIZE
Definition: pgtar.h:17
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
tree context
Definition: radixtree.h:1835
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
char linktarget[MAXPGPATH]
Definition: astreamer.h:88
char pathname[MAXPGPATH]
Definition: astreamer.h:81
pgoff_t size
Definition: astreamer.h:82
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition: astreamer.h:126
astreamer_archive_context next_context
Definition: astreamer_tar.c:33
astreamer_member member
Definition: astreamer_tar.c:34
StringInfoData bbs_buffer
Definition: astreamer.h:111
const astreamer_ops * bbs_ops
Definition: astreamer.h:109
astreamer * bbs_next
Definition: astreamer.h:110