PostgreSQL Source Code git master
Loading...
Searching...
No Matches
astreamer_tar.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * astreamer_tar.c
4 *
5 * This module implements three types of tar processing. A tar parser
6 * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
7 * it into labelled chunks (any other value of astreamer_archive_context).
8 * A tar archiver does the reverse: it takes a bunch of labelled chunks
9 * and produces a tarfile, optionally replacing member headers and trailers
10 * so that upstream astreamer objects can perform surgery on the tarfile
11 * contents without knowing the details of the tar format. A tar terminator
12 * just adds two blocks of NUL bytes to the end of the file, since older
13 * server versions produce files with this terminator omitted.
14 *
15 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
16 *
17 * IDENTIFICATION
18 * src/fe_utils/astreamer_tar.c
19 *-------------------------------------------------------------------------
20 */
21
22#include "postgres_fe.h"
23
24#include <time.h>
25
26#include "common/logging.h"
27#include "fe_utils/astreamer.h"
28#include "pgtar.h"
29
38
44
45static void astreamer_tar_parser_content(astreamer *streamer,
46 astreamer_member *member,
47 const char *data, int len,
49static void astreamer_tar_parser_finalize(astreamer *streamer);
50static void astreamer_tar_parser_free(astreamer *streamer);
52
58
59static void astreamer_tar_archiver_content(astreamer *streamer,
60 astreamer_member *member,
61 const char *data, int len,
63static void astreamer_tar_archiver_finalize(astreamer *streamer);
64static void astreamer_tar_archiver_free(astreamer *streamer);
65
71
73 astreamer_member *member,
74 const char *data, int len,
76static void astreamer_tar_terminator_finalize(astreamer *streamer);
77static void astreamer_tar_terminator_free(astreamer *streamer);
78
84
85/*
86 * Create a astreamer that can parse a stream of content as tar data.
87 *
88 * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
89 * specified by 'next' will receive a series of typed chunks, as per the
90 * conventions described in astreamer.h.
91 */
94{
95 astreamer_tar_parser *streamer;
96
98 *((const astreamer_ops **) &streamer->base.bbs_ops) =
100 streamer->base.bbs_next = next;
101 initStringInfo(&streamer->base.bbs_buffer);
103
104 return &streamer->base;
105}
106
107/*
108 * Parse unknown content as tar data.
109 */
110static void
112 const char *data, int len,
114{
116 size_t nbytes;
117
118 /* Expect unparsed input. */
119 Assert(member == NULL);
120 Assert(context == ASTREAMER_UNKNOWN);
121
122 while (len > 0)
123 {
124 switch (mystreamer->next_context)
125 {
127
128 /*
129 * If we're expecting an archive member header, accumulate a
130 * full block of data before doing anything further.
131 */
132 if (!astreamer_buffer_until(streamer, &data, &len,
134 return;
135
136 /*
137 * Now we can process the header and get ready to process the
138 * file contents; however, we might find out that what we
139 * thought was the next file header is actually the start of
140 * the archive trailer. Switch modes accordingly.
141 */
143 {
144 if (mystreamer->member.size == 0)
145 {
146 /* No content; trailer is zero-length. */
147 astreamer_content(mystreamer->base.bbs_next,
148 &mystreamer->member,
149 NULL, 0,
151
152 /* Expect next header. */
153 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
154 }
155 else
156 {
157 /* Expect contents. */
159 }
160 mystreamer->base.bbs_buffer.len = 0;
161 mystreamer->file_bytes_sent = 0;
162 }
163 else
165 break;
166
168
169 /*
170 * Send as much content as we have, but not more than the
171 * remaining file length.
172 */
173 Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174 nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175 nbytes = Min(nbytes, len);
176 Assert(nbytes > 0);
177 astreamer_content(mystreamer->base.bbs_next,
178 &mystreamer->member,
179 data, nbytes,
181 mystreamer->file_bytes_sent += nbytes;
182 data += nbytes;
183 len -= nbytes;
184
185 /*
186 * If we've not yet sent the whole file, then there's more
187 * content to come; otherwise, it's time to expect the file
188 * trailer.
189 */
190 Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191 if (mystreamer->file_bytes_sent == mystreamer->member.size)
192 {
193 if (mystreamer->pad_bytes_expected == 0)
194 {
195 /* Trailer is zero-length. */
196 astreamer_content(mystreamer->base.bbs_next,
197 &mystreamer->member,
198 NULL, 0,
200
201 /* Expect next header. */
202 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
203 }
204 else
205 {
206 /* Trailer is not zero-length. */
207 mystreamer->next_context = ASTREAMER_MEMBER_TRAILER;
208 }
209 mystreamer->base.bbs_buffer.len = 0;
210 }
211 break;
212
214
215 /*
216 * If we're expecting an archive member trailer, accumulate
217 * the expected number of padding bytes before sending
218 * anything onward.
219 */
220 if (!astreamer_buffer_until(streamer, &data, &len,
221 mystreamer->pad_bytes_expected))
222 return;
223
224 /* OK, now we can send it. */
225 astreamer_content(mystreamer->base.bbs_next,
226 &mystreamer->member,
227 mystreamer->base.bbs_buffer.data,
228 mystreamer->pad_bytes_expected,
230
231 /* Expect next file header. */
232 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
233 mystreamer->base.bbs_buffer.len = 0;
234 break;
235
237
238 /*
239 * We've seen an end-of-archive indicator, so anything more is
240 * buffered and sent as part of the archive trailer.
241 *
242 * Per POSIX, the last physical block of a tar archive is
243 * always full-sized, so there may be undefined data after the
244 * two zero blocks that mark end-of-archive. GNU tar, for
245 * example, zero-pads to a 10kB boundary by default. We just
246 * buffer whatever we receive and pass it along at finalize
247 * time.
248 */
249 astreamer_buffer_bytes(streamer, &data, &len, len);
250 return;
251
252 default:
253 /* Shouldn't happen. */
254 pg_fatal("unexpected state while parsing tar archive");
255 }
256 }
257}
258
259/*
260 * Parse a file header within a tar stream.
261 *
262 * The return value is true if we found a file header and passed it on to the
263 * next astreamer; it is false if we have found the archive trailer.
264 * We throw error if we see invalid data.
265 */
266static bool
268{
269 bool has_nonzero_byte = false;
270 int i;
271 astreamer_member *member = &mystreamer->member;
272 char *buffer = mystreamer->base.bbs_buffer.data;
273
274 Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
275
276 /* Zero out fields of *member, just for consistency. */
277 memset(member, 0, sizeof(astreamer_member));
278
279 /* Check whether we've got a block of all zero bytes. */
280 for (i = 0; i < TAR_BLOCK_SIZE; ++i)
281 {
282 if (buffer[i] != '\0')
283 {
284 has_nonzero_byte = true;
285 break;
286 }
287 }
288
289 /*
290 * If the entire block was zeros, this is the end of the archive, not the
291 * start of the next file.
292 */
293 if (!has_nonzero_byte)
294 return false;
295
296 /*
297 * Verify that we have a reasonable-looking header.
298 */
299 if (!isValidTarHeader(buffer))
300 pg_fatal("input file does not appear to be a valid tar archive");
301
302 /*
303 * Parse key fields out of the header.
304 */
305 strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
306 if (member->pathname[0] == '\0')
307 pg_fatal("tar member has empty name");
308 member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
309 member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
310 member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
311 member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
312
313 switch (buffer[TAR_OFFSET_TYPEFLAG])
314 {
317 member->is_regular = true;
318 break;
320 member->is_directory = true;
321 break;
323 member->is_symlink = true;
324 strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
325 break;
328 pg_fatal("pax extensions to tar format are not supported");
329 break;
330 default:
331 /* For special filetypes, set none of the three is_xxx flags */
332 break;
333 }
334
335 /* Compute number of padding bytes. */
336 mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
337
338 /* Forward the entire header to the next astreamer. */
339 astreamer_content(mystreamer->base.bbs_next, member,
340 buffer, TAR_BLOCK_SIZE,
342
343 return true;
344}
345
346/*
347 * End-of-stream processing for a tar parser.
348 */
349static void
351{
353
354 if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
355 (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
356 mystreamer->base.bbs_buffer.len > 0))
357 pg_fatal("COPY stream ended before last file was finished");
358
359 /* Send the archive trailer, even if empty. */
361 streamer->bbs_buffer.data, streamer->bbs_buffer.len,
363
364 /* Now finalize successor. */
365 astreamer_finalize(streamer->bbs_next);
366}
367
368/*
369 * Free memory associated with a tar parser.
370 */
371static void
373{
374 pfree(streamer->bbs_buffer.data);
375 astreamer_free(streamer->bbs_next);
376 pfree(streamer);
377}
378
379/*
380 * Create a astreamer that can generate a tar archive.
381 *
382 * This is intended to be usable either for generating a brand-new tar archive
383 * or for modifying one on the fly. The input should be a series of typed
384 * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
385 * astreamer_tar_parser_content.
386 */
387astreamer *
389{
390 astreamer_tar_archiver *streamer;
391
393 *((const astreamer_ops **) &streamer->base.bbs_ops) =
395 streamer->base.bbs_next = next;
396
397 return &streamer->base;
398}
399
400/*
401 * Fix up the stream of input chunks to create a valid tar file.
402 *
403 * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
404 * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
405 * passed through without change. Any other size is a fatal error (and
406 * indicates a bug).
407 *
408 * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
409 * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
410 * scratch. Specifically, we construct a block of zero bytes sufficient to
411 * pad out to a block boundary, as required by the tar format. Other
412 * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
413 *
414 * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
415 *
416 * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
417 * blocks of zero bytes. Not all tar programs require this, but apparently
418 * some do. The server does not supply this trailer. If no archive trailer is
419 * present, one will be added by astreamer_tar_parser_finalize.
420 */
421static void
423 astreamer_member *member,
424 const char *data, int len,
426{
428 char buffer[2 * TAR_BLOCK_SIZE];
429
430 Assert(context != ASTREAMER_UNKNOWN);
431
432 if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
433 {
434 Assert(len == 0);
435
436 /* Replace zero-length tar header with a newly constructed one. */
437 tarCreateHeader(buffer, member->pathname, NULL,
438 member->size, member->mode, member->uid, member->gid,
439 time(NULL));
440 data = buffer;
442
443 /* Also make a note to replace padding, in case size changed. */
444 mystreamer->rearchive_member = true;
445 }
446 else if (context == ASTREAMER_MEMBER_TRAILER &&
447 mystreamer->rearchive_member)
448 {
450
451 /* Also replace padding, if we regenerated the header. */
452 memset(buffer, 0, pad_bytes);
453 data = buffer;
454 len = pad_bytes;
455
456 /* Don't do this again unless we replace another header. */
457 mystreamer->rearchive_member = false;
458 }
459 else if (context == ASTREAMER_ARCHIVE_TRAILER)
460 {
461 /* Trailer should always be two blocks of zero bytes. */
462 memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
463 data = buffer;
464 len = 2 * TAR_BLOCK_SIZE;
465 }
466
467 astreamer_content(streamer->bbs_next, member, data, len, context);
468}
469
470/*
471 * End-of-stream processing for a tar archiver.
472 */
473static void
478
479/*
480 * Free memory associated with a tar archiver.
481 */
482static void
484{
485 astreamer_free(streamer->bbs_next);
486 pfree(streamer);
487}
488
489/*
490 * Create a astreamer that blindly adds two blocks of NUL bytes to the
491 * end of an incomplete tarfile that the server might send us.
492 */
493astreamer *
495{
496 astreamer *streamer;
497
498 streamer = palloc0_object(astreamer);
499 *((const astreamer_ops **) &streamer->bbs_ops) =
501 streamer->bbs_next = next;
502
503 return streamer;
504}
505
506/*
507 * Pass all the content through without change.
508 */
509static void
511 astreamer_member *member,
512 const char *data, int len,
514{
515 /* Expect unparsed input. */
516 Assert(member == NULL);
517 Assert(context == ASTREAMER_UNKNOWN);
518
519 /* Just forward it. */
520 astreamer_content(streamer->bbs_next, member, data, len, context);
521}
522
523/*
524 * At the end, blindly add the two blocks of NUL bytes which the server fails
525 * to supply.
526 */
527static void
529{
530 char buffer[2 * TAR_BLOCK_SIZE];
531
532 memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
533 astreamer_content(streamer->bbs_next, NULL, buffer,
535 astreamer_finalize(streamer->bbs_next);
536}
537
538/*
539 * Free memory associated with a tar terminator.
540 */
541static void
543{
544 astreamer_free(streamer->bbs_next);
545 pfree(streamer);
546}
static void astreamer_free(astreamer *streamer)
Definition astreamer.h:155
static void astreamer_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition astreamer.h:137
static bool astreamer_buffer_until(astreamer *streamer, const char **data, int *len, int target_bytes)
Definition astreamer.h:186
static void astreamer_finalize(astreamer *streamer)
Definition astreamer.h:147
astreamer_archive_context
Definition astreamer.h:63
@ ASTREAMER_MEMBER_HEADER
Definition astreamer.h:65
@ ASTREAMER_MEMBER_CONTENTS
Definition astreamer.h:66
@ ASTREAMER_MEMBER_TRAILER
Definition astreamer.h:67
@ ASTREAMER_ARCHIVE_TRAILER
Definition astreamer.h:68
@ ASTREAMER_UNKNOWN
Definition astreamer.h:64
static void astreamer_buffer_bytes(astreamer *streamer, const char **data, int *len, int nbytes)
Definition astreamer.h:168
static const astreamer_ops astreamer_tar_terminator_ops
astreamer * astreamer_tar_parser_new(astreamer *next)
static const astreamer_ops astreamer_tar_parser_ops
static void astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void astreamer_tar_terminator_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void astreamer_tar_archiver_free(astreamer *streamer)
static bool astreamer_tar_header(astreamer_tar_parser *mystreamer)
astreamer * astreamer_tar_terminator_new(astreamer *next)
static void astreamer_tar_parser_finalize(astreamer *streamer)
static void astreamer_tar_archiver_finalize(astreamer *streamer)
static const astreamer_ops astreamer_tar_archiver_ops
astreamer * astreamer_tar_archiver_new(astreamer *next)
static void astreamer_tar_terminator_free(astreamer *streamer)
static void astreamer_tar_terminator_finalize(astreamer *streamer)
static void astreamer_tar_parser_free(astreamer *streamer)
static void astreamer_tar_archiver_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static int32 next
Definition blutils.c:225
#define Min(x, y)
Definition c.h:1091
#define Assert(condition)
Definition c.h:943
#define palloc0_object(type)
Definition fe_memutils.h:75
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
#define pg_fatal(...)
#define MAXPGPATH
const void size_t len
const void * data
uint64 read_tar_number(const char *s, int len)
Definition tar.c:58
static size_t tarPaddingBytesRequired(size_t len)
Definition pgtar.h:84
@ TAR_FILETYPE_PAX_EXTENDED_GLOBAL
Definition pgtar.h:66
@ TAR_FILETYPE_SYMLINK
Definition pgtar.h:63
@ TAR_FILETYPE_PLAIN_OLD
Definition pgtar.h:62
@ TAR_FILETYPE_DIRECTORY
Definition pgtar.h:64
@ TAR_FILETYPE_PLAIN
Definition pgtar.h:61
@ TAR_FILETYPE_PAX_EXTENDED
Definition pgtar.h:65
@ TAR_OFFSET_MODE
Definition pgtar.h:40
@ TAR_OFFSET_UID
Definition pgtar.h:41
@ TAR_OFFSET_TYPEFLAG
Definition pgtar.h:46
@ TAR_OFFSET_NAME
Definition pgtar.h:39
@ TAR_OFFSET_SIZE
Definition pgtar.h:43
@ TAR_OFFSET_GID
Definition pgtar.h:42
@ TAR_OFFSET_LINKNAME
Definition pgtar.h:47
enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime)
Definition tar.c:143
bool isValidTarHeader(const char *header)
Definition tar.c:112
#define TAR_BLOCK_SIZE
Definition pgtar.h:17
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
static int fb(int x)
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
char linktarget[MAXPGPATH]
Definition astreamer.h:90
char pathname[MAXPGPATH]
Definition astreamer.h:81
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition astreamer.h:128
astreamer_archive_context next_context
astreamer_member member
StringInfoData bbs_buffer
Definition astreamer.h:113
const astreamer_ops * bbs_ops
Definition astreamer.h:111
astreamer * bbs_next
Definition astreamer.h:112