PostgreSQL Source Code git master
astreamer_tar.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * astreamer_tar.c
4 *
5 * This module implements three types of tar processing. A tar parser
6 * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
7 * it into labelled chunks (any other value of astreamer_archive_context).
8 * A tar archiver does the reverse: it takes a bunch of labelled chunks
9 * and produces a tarfile, optionally replacing member headers and trailers
10 * so that upstream astreamer objects can perform surgery on the tarfile
11 * contents without knowing the details of the tar format. A tar terminator
12 * just adds two blocks of NUL bytes to the end of the file, since older
13 * server versions produce files with this terminator omitted.
14 *
15 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 *
17 * IDENTIFICATION
18 * src/fe_utils/astreamer_tar.c
19 *-------------------------------------------------------------------------
20 */
21
22#include "postgres_fe.h"
23
24#include <time.h>
25
26#include "common/logging.h"
27#include "fe_utils/astreamer.h"
28#include "pgtar.h"
29
31{
38
40{
44
45static void astreamer_tar_parser_content(astreamer *streamer,
46 astreamer_member *member,
47 const char *data, int len,
49static void astreamer_tar_parser_finalize(astreamer *streamer);
50static void astreamer_tar_parser_free(astreamer *streamer);
51static bool astreamer_tar_header(astreamer_tar_parser *mystreamer);
52
57};
58
59static void astreamer_tar_archiver_content(astreamer *streamer,
60 astreamer_member *member,
61 const char *data, int len,
63static void astreamer_tar_archiver_finalize(astreamer *streamer);
64static void astreamer_tar_archiver_free(astreamer *streamer);
65
70};
71
73 astreamer_member *member,
74 const char *data, int len,
76static void astreamer_tar_terminator_finalize(astreamer *streamer);
77static void astreamer_tar_terminator_free(astreamer *streamer);
78
83};
84
85/*
86 * Create a astreamer that can parse a stream of content as tar data.
87 *
88 * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
89 * specified by 'next' will receive a series of typed chunks, as per the
90 * conventions described in astreamer.h.
91 */
94{
95 astreamer_tar_parser *streamer;
96
97 streamer = palloc0(sizeof(astreamer_tar_parser));
98 *((const astreamer_ops **) &streamer->base.bbs_ops) =
100 streamer->base.bbs_next = next;
101 initStringInfo(&streamer->base.bbs_buffer);
103
104 return &streamer->base;
105}
106
107/*
108 * Parse unknown content as tar data.
109 */
110static void
112 const char *data, int len,
114{
115 astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
116 size_t nbytes;
117
118 /* Expect unparsed input. */
119 Assert(member == NULL);
120 Assert(context == ASTREAMER_UNKNOWN);
121
122 while (len > 0)
123 {
124 switch (mystreamer->next_context)
125 {
127
128 /*
129 * If we're expecting an archive member header, accumulate a
130 * full block of data before doing anything further.
131 */
132 if (!astreamer_buffer_until(streamer, &data, &len,
134 return;
135
136 /*
137 * Now we can process the header and get ready to process the
138 * file contents; however, we might find out that what we
139 * thought was the next file header is actually the start of
140 * the archive trailer. Switch modes accordingly.
141 */
142 if (astreamer_tar_header(mystreamer))
143 {
144 if (mystreamer->member.size == 0)
145 {
146 /* No content; trailer is zero-length. */
147 astreamer_content(mystreamer->base.bbs_next,
148 &mystreamer->member,
149 NULL, 0,
151
152 /* Expect next header. */
154 }
155 else
156 {
157 /* Expect contents. */
159 }
160 mystreamer->base.bbs_buffer.len = 0;
161 mystreamer->file_bytes_sent = 0;
162 }
163 else
165 break;
166
168
169 /*
170 * Send as much content as we have, but not more than the
171 * remaining file length.
172 */
173 Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174 nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175 nbytes = Min(nbytes, len);
176 Assert(nbytes > 0);
177 astreamer_content(mystreamer->base.bbs_next,
178 &mystreamer->member,
179 data, nbytes,
181 mystreamer->file_bytes_sent += nbytes;
182 data += nbytes;
183 len -= nbytes;
184
185 /*
186 * If we've not yet sent the whole file, then there's more
187 * content to come; otherwise, it's time to expect the file
188 * trailer.
189 */
190 Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191 if (mystreamer->file_bytes_sent == mystreamer->member.size)
192 {
193 if (mystreamer->pad_bytes_expected == 0)
194 {
195 /* Trailer is zero-length. */
196 astreamer_content(mystreamer->base.bbs_next,
197 &mystreamer->member,
198 NULL, 0,
200
201 /* Expect next header. */
203 }
204 else
205 {
206 /* Trailer is not zero-length. */
208 }
209 mystreamer->base.bbs_buffer.len = 0;
210 }
211 break;
212
214
215 /*
216 * If we're expecting an archive member trailer, accumulate
217 * the expected number of padding bytes before sending
218 * anything onward.
219 */
220 if (!astreamer_buffer_until(streamer, &data, &len,
221 mystreamer->pad_bytes_expected))
222 return;
223
224 /* OK, now we can send it. */
225 astreamer_content(mystreamer->base.bbs_next,
226 &mystreamer->member,
227 data, mystreamer->pad_bytes_expected,
229
230 /* Expect next file header. */
232 mystreamer->base.bbs_buffer.len = 0;
233 break;
234
236
237 /*
238 * We've seen an end-of-archive indicator, so anything more is
239 * buffered and sent as part of the archive trailer. But we
240 * don't expect more than 2 blocks.
241 */
242 astreamer_buffer_bytes(streamer, &data, &len, len);
243 if (len > 2 * TAR_BLOCK_SIZE)
244 pg_fatal("tar file trailer exceeds 2 blocks");
245 return;
246
247 default:
248 /* Shouldn't happen. */
249 pg_fatal("unexpected state while parsing tar archive");
250 }
251 }
252}
253
254/*
255 * Parse a file header within a tar stream.
256 *
257 * The return value is true if we found a file header and passed it on to the
258 * next astreamer; it is false if we have reached the archive trailer.
259 */
260static bool
262{
263 bool has_nonzero_byte = false;
264 int i;
265 astreamer_member *member = &mystreamer->member;
266 char *buffer = mystreamer->base.bbs_buffer.data;
267
268 Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
269
270 /* Check whether we've got a block of all zero bytes. */
271 for (i = 0; i < TAR_BLOCK_SIZE; ++i)
272 {
273 if (buffer[i] != '\0')
274 {
275 has_nonzero_byte = true;
276 break;
277 }
278 }
279
280 /*
281 * If the entire block was zeros, this is the end of the archive, not the
282 * start of the next file.
283 */
284 if (!has_nonzero_byte)
285 return false;
286
287 /*
288 * Parse key fields out of the header.
289 */
290 strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
291 if (member->pathname[0] == '\0')
292 pg_fatal("tar member has empty name");
293 member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
294 member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
295 member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
296 member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
297 member->is_directory =
299 member->is_link =
301 if (member->is_link)
302 strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
303
304 /* Compute number of padding bytes. */
305 mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
306
307 /* Forward the entire header to the next astreamer. */
308 astreamer_content(mystreamer->base.bbs_next, member,
309 buffer, TAR_BLOCK_SIZE,
311
312 return true;
313}
314
315/*
316 * End-of-stream processing for a tar parser.
317 */
318static void
320{
321 astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
322
323 if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
324 (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
325 mystreamer->base.bbs_buffer.len > 0))
326 pg_fatal("COPY stream ended before last file was finished");
327
328 /* Send the archive trailer, even if empty. */
329 astreamer_content(streamer->bbs_next, NULL,
330 streamer->bbs_buffer.data, streamer->bbs_buffer.len,
332
333 /* Now finalize successor. */
334 astreamer_finalize(streamer->bbs_next);
335}
336
337/*
338 * Free memory associated with a tar parser.
339 */
340static void
342{
343 pfree(streamer->bbs_buffer.data);
344 astreamer_free(streamer->bbs_next);
345}
346
347/*
348 * Create a astreamer that can generate a tar archive.
349 *
350 * This is intended to be usable either for generating a brand-new tar archive
351 * or for modifying one on the fly. The input should be a series of typed
352 * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
353 * astreamer_tar_parser_content.
354 */
355astreamer *
357{
358 astreamer_tar_archiver *streamer;
359
360 streamer = palloc0(sizeof(astreamer_tar_archiver));
361 *((const astreamer_ops **) &streamer->base.bbs_ops) =
363 streamer->base.bbs_next = next;
364
365 return &streamer->base;
366}
367
368/*
369 * Fix up the stream of input chunks to create a valid tar file.
370 *
371 * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
372 * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
373 * passed through without change. Any other size is a fatal error (and
374 * indicates a bug).
375 *
376 * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
377 * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
378 * scratch. Specifically, we construct a block of zero bytes sufficient to
379 * pad out to a block boundary, as required by the tar format. Other
380 * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
381 *
382 * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
383 *
384 * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
385 * blocks of zero bytes. Not all tar programs require this, but apparently
386 * some do. The server does not supply this trailer. If no archive trailer is
387 * present, one will be added by astreamer_tar_parser_finalize.
388 */
389static void
391 astreamer_member *member,
392 const char *data, int len,
394{
395 astreamer_tar_archiver *mystreamer = (astreamer_tar_archiver *) streamer;
396 char buffer[2 * TAR_BLOCK_SIZE];
397
398 Assert(context != ASTREAMER_UNKNOWN);
399
400 if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
401 {
402 Assert(len == 0);
403
404 /* Replace zero-length tar header with a newly constructed one. */
405 tarCreateHeader(buffer, member->pathname, NULL,
406 member->size, member->mode, member->uid, member->gid,
407 time(NULL));
408 data = buffer;
410
411 /* Also make a note to replace padding, in case size changed. */
412 mystreamer->rearchive_member = true;
413 }
414 else if (context == ASTREAMER_MEMBER_TRAILER &&
415 mystreamer->rearchive_member)
416 {
417 int pad_bytes = tarPaddingBytesRequired(member->size);
418
419 /* Also replace padding, if we regenerated the header. */
420 memset(buffer, 0, pad_bytes);
421 data = buffer;
422 len = pad_bytes;
423
424 /* Don't do this again unless we replace another header. */
425 mystreamer->rearchive_member = false;
426 }
427 else if (context == ASTREAMER_ARCHIVE_TRAILER)
428 {
429 /* Trailer should always be two blocks of zero bytes. */
430 memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
431 data = buffer;
432 len = 2 * TAR_BLOCK_SIZE;
433 }
434
435 astreamer_content(streamer->bbs_next, member, data, len, context);
436}
437
438/*
439 * End-of-stream processing for a tar archiver.
440 */
441static void
443{
444 astreamer_finalize(streamer->bbs_next);
445}
446
447/*
448 * Free memory associated with a tar archiver.
449 */
450static void
452{
453 astreamer_free(streamer->bbs_next);
454 pfree(streamer);
455}
456
457/*
458 * Create a astreamer that blindly adds two blocks of NUL bytes to the
459 * end of an incomplete tarfile that the server might send us.
460 */
461astreamer *
463{
464 astreamer *streamer;
465
466 streamer = palloc0(sizeof(astreamer));
467 *((const astreamer_ops **) &streamer->bbs_ops) =
469 streamer->bbs_next = next;
470
471 return streamer;
472}
473
474/*
475 * Pass all the content through without change.
476 */
477static void
479 astreamer_member *member,
480 const char *data, int len,
482{
483 /* Expect unparsed input. */
484 Assert(member == NULL);
485 Assert(context == ASTREAMER_UNKNOWN);
486
487 /* Just forward it. */
488 astreamer_content(streamer->bbs_next, member, data, len, context);
489}
490
491/*
492 * At the end, blindly add the two blocks of NUL bytes which the server fails
493 * to supply.
494 */
495static void
497{
498 char buffer[2 * TAR_BLOCK_SIZE];
499
500 memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
501 astreamer_content(streamer->bbs_next, NULL, buffer,
503 astreamer_finalize(streamer->bbs_next);
504}
505
506/*
507 * Free memory associated with a tar terminator.
508 */
509static void
511{
512 astreamer_free(streamer->bbs_next);
513 pfree(streamer);
514}
static void astreamer_free(astreamer *streamer)
Definition: astreamer.h:153
static void astreamer_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition: astreamer.h:135
static bool astreamer_buffer_until(astreamer *streamer, const char **data, int *len, int target_bytes)
Definition: astreamer.h:184
static void astreamer_finalize(astreamer *streamer)
Definition: astreamer.h:145
astreamer_archive_context
Definition: astreamer.h:63
@ ASTREAMER_MEMBER_HEADER
Definition: astreamer.h:65
@ ASTREAMER_MEMBER_CONTENTS
Definition: astreamer.h:66
@ ASTREAMER_MEMBER_TRAILER
Definition: astreamer.h:67
@ ASTREAMER_ARCHIVE_TRAILER
Definition: astreamer.h:68
@ ASTREAMER_UNKNOWN
Definition: astreamer.h:64
static void astreamer_buffer_bytes(astreamer *streamer, const char **data, int *len, int nbytes)
Definition: astreamer.h:166
static const astreamer_ops astreamer_tar_terminator_ops
Definition: astreamer_tar.c:79
astreamer * astreamer_tar_parser_new(astreamer *next)
Definition: astreamer_tar.c:93
static const astreamer_ops astreamer_tar_parser_ops
Definition: astreamer_tar.c:53
struct astreamer_tar_parser astreamer_tar_parser
struct astreamer_tar_archiver astreamer_tar_archiver
static void astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void astreamer_tar_terminator_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void astreamer_tar_archiver_free(astreamer *streamer)
static bool astreamer_tar_header(astreamer_tar_parser *mystreamer)
astreamer * astreamer_tar_terminator_new(astreamer *next)
static void astreamer_tar_parser_finalize(astreamer *streamer)
static void astreamer_tar_archiver_finalize(astreamer *streamer)
static const astreamer_ops astreamer_tar_archiver_ops
Definition: astreamer_tar.c:66
astreamer * astreamer_tar_archiver_new(astreamer *next)
static void astreamer_tar_terminator_free(astreamer *streamer)
static void astreamer_tar_terminator_finalize(astreamer *streamer)
static void astreamer_tar_parser_free(astreamer *streamer)
static void astreamer_tar_archiver_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static int32 next
Definition: blutils.c:221
#define Min(x, y)
Definition: c.h:961
#define Assert(condition)
Definition: c.h:815
int i
Definition: isn.c:72
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
#define pg_fatal(...)
#define MAXPGPATH
const void size_t len
const void * data
uint64 read_tar_number(const char *s, int len)
Definition: tar.c:58
static size_t tarPaddingBytesRequired(size_t len)
Definition: pgtar.h:79
@ TAR_FILETYPE_SYMLINK
Definition: pgtar.h:61
@ TAR_FILETYPE_DIRECTORY
Definition: pgtar.h:62
@ TAR_OFFSET_MODE
Definition: pgtar.h:40
@ TAR_OFFSET_UID
Definition: pgtar.h:41
@ TAR_OFFSET_TYPEFLAG
Definition: pgtar.h:46
@ TAR_OFFSET_NAME
Definition: pgtar.h:39
@ TAR_OFFSET_SIZE
Definition: pgtar.h:43
@ TAR_OFFSET_GID
Definition: pgtar.h:42
@ TAR_OFFSET_LINKNAME
Definition: pgtar.h:47
enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime)
Definition: tar.c:114
#define TAR_BLOCK_SIZE
Definition: pgtar.h:17
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
char linktarget[MAXPGPATH]
Definition: astreamer.h:88
char pathname[MAXPGPATH]
Definition: astreamer.h:81
pgoff_t size
Definition: astreamer.h:82
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition: astreamer.h:126
astreamer_archive_context next_context
Definition: astreamer_tar.c:33
astreamer_member member
Definition: astreamer_tar.c:34
StringInfoData bbs_buffer
Definition: astreamer.h:111
const astreamer_ops * bbs_ops
Definition: astreamer.h:109
astreamer * bbs_next
Definition: astreamer.h:110