PostgreSQL Source Code git master
Loading...
Searching...
No Matches
astreamer_tar.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * astreamer_tar.c
4 *
5 * This module implements three types of tar processing. A tar parser
6 * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
7 * it into labelled chunks (any other value of astreamer_archive_context).
8 * A tar archiver does the reverse: it takes a bunch of labelled chunks
9 * and produces a tarfile, optionally replacing member headers and trailers
10 * so that upstream astreamer objects can perform surgery on the tarfile
11 * contents without knowing the details of the tar format. A tar terminator
12 * just adds two blocks of NUL bytes to the end of the file, since older
13 * server versions produce files with this terminator omitted.
14 *
15 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
16 *
17 * IDENTIFICATION
18 * src/fe_utils/astreamer_tar.c
19 *-------------------------------------------------------------------------
20 */
21
22#include "postgres_fe.h"
23
24#include <time.h>
25
26#include "common/logging.h"
27#include "fe_utils/astreamer.h"
28#include "pgtar.h"
29
38
44
45static void astreamer_tar_parser_content(astreamer *streamer,
46 astreamer_member *member,
47 const char *data, int len,
49static void astreamer_tar_parser_finalize(astreamer *streamer);
50static void astreamer_tar_parser_free(astreamer *streamer);
52
58
59static void astreamer_tar_archiver_content(astreamer *streamer,
60 astreamer_member *member,
61 const char *data, int len,
63static void astreamer_tar_archiver_finalize(astreamer *streamer);
64static void astreamer_tar_archiver_free(astreamer *streamer);
65
71
73 astreamer_member *member,
74 const char *data, int len,
76static void astreamer_tar_terminator_finalize(astreamer *streamer);
77static void astreamer_tar_terminator_free(astreamer *streamer);
78
84
85/*
86 * Create a astreamer that can parse a stream of content as tar data.
87 *
88 * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
89 * specified by 'next' will receive a series of typed chunks, as per the
90 * conventions described in astreamer.h.
91 */
94{
95 astreamer_tar_parser *streamer;
96
98 *((const astreamer_ops **) &streamer->base.bbs_ops) =
100 streamer->base.bbs_next = next;
101 initStringInfo(&streamer->base.bbs_buffer);
103
104 return &streamer->base;
105}
106
107/*
108 * Parse unknown content as tar data.
109 */
110static void
112 const char *data, int len,
114{
116 size_t nbytes;
117
118 /* Expect unparsed input. */
119 Assert(member == NULL);
120 Assert(context == ASTREAMER_UNKNOWN);
121
122 while (len > 0)
123 {
124 switch (mystreamer->next_context)
125 {
127
128 /*
129 * If we're expecting an archive member header, accumulate a
130 * full block of data before doing anything further.
131 */
132 if (!astreamer_buffer_until(streamer, &data, &len,
134 return;
135
136 /*
137 * Now we can process the header and get ready to process the
138 * file contents; however, we might find out that what we
139 * thought was the next file header is actually the start of
140 * the archive trailer. Switch modes accordingly.
141 */
143 {
144 if (mystreamer->member.size == 0)
145 {
146 /* No content; trailer is zero-length. */
147 astreamer_content(mystreamer->base.bbs_next,
148 &mystreamer->member,
149 NULL, 0,
151
152 /* Expect next header. */
153 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
154 }
155 else
156 {
157 /* Expect contents. */
159 }
160 mystreamer->base.bbs_buffer.len = 0;
161 mystreamer->file_bytes_sent = 0;
162 }
163 else
165 break;
166
168
169 /*
170 * Send as much content as we have, but not more than the
171 * remaining file length.
172 */
173 Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174 nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175 nbytes = Min(nbytes, len);
176 Assert(nbytes > 0);
177 astreamer_content(mystreamer->base.bbs_next,
178 &mystreamer->member,
179 data, nbytes,
181 mystreamer->file_bytes_sent += nbytes;
182 data += nbytes;
183 len -= nbytes;
184
185 /*
186 * If we've not yet sent the whole file, then there's more
187 * content to come; otherwise, it's time to expect the file
188 * trailer.
189 */
190 Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191 if (mystreamer->file_bytes_sent == mystreamer->member.size)
192 {
193 if (mystreamer->pad_bytes_expected == 0)
194 {
195 /* Trailer is zero-length. */
196 astreamer_content(mystreamer->base.bbs_next,
197 &mystreamer->member,
198 NULL, 0,
200
201 /* Expect next header. */
202 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
203 }
204 else
205 {
206 /* Trailer is not zero-length. */
207 mystreamer->next_context = ASTREAMER_MEMBER_TRAILER;
208 }
209 mystreamer->base.bbs_buffer.len = 0;
210 }
211 break;
212
214
215 /*
216 * If we're expecting an archive member trailer, accumulate
217 * the expected number of padding bytes before sending
218 * anything onward.
219 */
220 if (!astreamer_buffer_until(streamer, &data, &len,
221 mystreamer->pad_bytes_expected))
222 return;
223
224 /* OK, now we can send it. */
225 astreamer_content(mystreamer->base.bbs_next,
226 &mystreamer->member,
227 mystreamer->base.bbs_buffer.data,
228 mystreamer->pad_bytes_expected,
230
231 /* Expect next file header. */
232 mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
233 mystreamer->base.bbs_buffer.len = 0;
234 break;
235
237
238 /*
239 * We've seen an end-of-archive indicator, so anything more is
240 * buffered and sent as part of the archive trailer.
241 *
242 * Per POSIX, the last physical block of a tar archive is
243 * always full-sized, so there may be undefined data after the
244 * two zero blocks that mark end-of-archive. GNU tar, for
245 * example, zero-pads to a 10kB boundary by default. We just
246 * buffer whatever we receive and pass it along at finalize
247 * time.
248 */
249 astreamer_buffer_bytes(streamer, &data, &len, len);
250 return;
251
252 default:
253 /* Shouldn't happen. */
254 pg_fatal("unexpected state while parsing tar archive");
255 }
256 }
257}
258
259/*
260 * Parse a file header within a tar stream.
261 *
262 * The return value is true if we found a file header and passed it on to the
263 * next astreamer; it is false if we have found the archive trailer.
264 * We throw error if we see invalid data.
265 */
266static bool
268{
269 bool has_nonzero_byte = false;
270 int i;
271 astreamer_member *member = &mystreamer->member;
272 char *buffer = mystreamer->base.bbs_buffer.data;
273
274 Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
275
276 /* Zero out fields of *member, just for consistency. */
277 memset(member, 0, sizeof(astreamer_member));
278
279 /* Check whether we've got a block of all zero bytes. */
280 for (i = 0; i < TAR_BLOCK_SIZE; ++i)
281 {
282 if (buffer[i] != '\0')
283 {
284 has_nonzero_byte = true;
285 break;
286 }
287 }
288
289 /*
290 * If the entire block was zeros, this is the end of the archive, not the
291 * start of the next file.
292 */
293 if (!has_nonzero_byte)
294 return false;
295
296 /*
297 * Verify that we have a reasonable-looking header.
298 */
299 if (!isValidTarHeader(buffer))
300 pg_fatal("input file does not appear to be a valid tar archive");
301
302 /*
303 * Parse key fields out of the header.
304 */
305 strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
306 if (member->pathname[0] == '\0')
307 pg_fatal("tar member has empty name");
309 pg_fatal("tar member has unsafe path name: \"%s\"",
310 member->pathname);
311
312 member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
313 member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
314 member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
315 member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
316
317 switch (buffer[TAR_OFFSET_TYPEFLAG])
318 {
321 member->is_regular = true;
322 break;
324 member->is_directory = true;
325 break;
327 member->is_symlink = true;
328 strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
329 break;
332 pg_fatal("pax extensions to tar format are not supported");
333 break;
334 default:
335 /* For special filetypes, set none of the three is_xxx flags */
336 break;
337 }
338
339 /* Compute number of padding bytes. */
340 mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
341
342 /* Forward the entire header to the next astreamer. */
343 astreamer_content(mystreamer->base.bbs_next, member,
344 buffer, TAR_BLOCK_SIZE,
346
347 return true;
348}
349
350/*
351 * End-of-stream processing for a tar parser.
352 */
353static void
355{
357
358 if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
359 (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
360 mystreamer->base.bbs_buffer.len > 0))
361 pg_fatal("COPY stream ended before last file was finished");
362
363 /* Send the archive trailer, even if empty. */
365 streamer->bbs_buffer.data, streamer->bbs_buffer.len,
367
368 /* Now finalize successor. */
369 astreamer_finalize(streamer->bbs_next);
370}
371
372/*
373 * Free memory associated with a tar parser.
374 */
375static void
377{
378 pfree(streamer->bbs_buffer.data);
379 astreamer_free(streamer->bbs_next);
380 pfree(streamer);
381}
382
383/*
384 * Create a astreamer that can generate a tar archive.
385 *
386 * This is intended to be usable either for generating a brand-new tar archive
387 * or for modifying one on the fly. The input should be a series of typed
388 * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
389 * astreamer_tar_parser_content.
390 */
391astreamer *
393{
394 astreamer_tar_archiver *streamer;
395
397 *((const astreamer_ops **) &streamer->base.bbs_ops) =
399 streamer->base.bbs_next = next;
400
401 return &streamer->base;
402}
403
404/*
405 * Fix up the stream of input chunks to create a valid tar file.
406 *
407 * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
408 * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
409 * passed through without change. Any other size is a fatal error (and
410 * indicates a bug).
411 *
412 * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
413 * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
414 * scratch. Specifically, we construct a block of zero bytes sufficient to
415 * pad out to a block boundary, as required by the tar format. Other
416 * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
417 *
418 * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
419 *
420 * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
421 * blocks of zero bytes. Not all tar programs require this, but apparently
422 * some do. The server does not supply this trailer. If no archive trailer is
423 * present, one will be added by astreamer_tar_parser_finalize.
424 */
425static void
427 astreamer_member *member,
428 const char *data, int len,
430{
432 char buffer[2 * TAR_BLOCK_SIZE];
433
434 Assert(context != ASTREAMER_UNKNOWN);
435
436 if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
437 {
438 Assert(len == 0);
439
440 /* Replace zero-length tar header with a newly constructed one. */
441 tarCreateHeader(buffer, member->pathname, NULL,
442 member->size, member->mode, member->uid, member->gid,
443 time(NULL));
444 data = buffer;
446
447 /* Also make a note to replace padding, in case size changed. */
448 mystreamer->rearchive_member = true;
449 }
450 else if (context == ASTREAMER_MEMBER_TRAILER &&
451 mystreamer->rearchive_member)
452 {
454
455 /* Also replace padding, if we regenerated the header. */
456 memset(buffer, 0, pad_bytes);
457 data = buffer;
458 len = pad_bytes;
459
460 /* Don't do this again unless we replace another header. */
461 mystreamer->rearchive_member = false;
462 }
463 else if (context == ASTREAMER_ARCHIVE_TRAILER)
464 {
465 /* Trailer should always be two blocks of zero bytes. */
466 memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
467 data = buffer;
468 len = 2 * TAR_BLOCK_SIZE;
469 }
470
471 astreamer_content(streamer->bbs_next, member, data, len, context);
472}
473
474/*
475 * End-of-stream processing for a tar archiver.
476 */
477static void
482
483/*
484 * Free memory associated with a tar archiver.
485 */
486static void
488{
489 astreamer_free(streamer->bbs_next);
490 pfree(streamer);
491}
492
493/*
494 * Create a astreamer that blindly adds two blocks of NUL bytes to the
495 * end of an incomplete tarfile that the server might send us.
496 */
497astreamer *
499{
500 astreamer *streamer;
501
502 streamer = palloc0_object(astreamer);
503 *((const astreamer_ops **) &streamer->bbs_ops) =
505 streamer->bbs_next = next;
506
507 return streamer;
508}
509
510/*
511 * Pass all the content through without change.
512 */
513static void
515 astreamer_member *member,
516 const char *data, int len,
518{
519 /* Expect unparsed input. */
520 Assert(member == NULL);
521 Assert(context == ASTREAMER_UNKNOWN);
522
523 /* Just forward it. */
524 astreamer_content(streamer->bbs_next, member, data, len, context);
525}
526
527/*
528 * At the end, blindly add the two blocks of NUL bytes which the server fails
529 * to supply.
530 */
531static void
533{
534 char buffer[2 * TAR_BLOCK_SIZE];
535
536 memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
537 astreamer_content(streamer->bbs_next, NULL, buffer,
539 astreamer_finalize(streamer->bbs_next);
540}
541
542/*
543 * Free memory associated with a tar terminator.
544 */
545static void
547{
548 astreamer_free(streamer->bbs_next);
549 pfree(streamer);
550}
static void astreamer_free(astreamer *streamer)
Definition astreamer.h:155
static void astreamer_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition astreamer.h:137
static bool astreamer_buffer_until(astreamer *streamer, const char **data, int *len, int target_bytes)
Definition astreamer.h:186
static void astreamer_finalize(astreamer *streamer)
Definition astreamer.h:147
astreamer_archive_context
Definition astreamer.h:63
@ ASTREAMER_MEMBER_HEADER
Definition astreamer.h:65
@ ASTREAMER_MEMBER_CONTENTS
Definition astreamer.h:66
@ ASTREAMER_MEMBER_TRAILER
Definition astreamer.h:67
@ ASTREAMER_ARCHIVE_TRAILER
Definition astreamer.h:68
@ ASTREAMER_UNKNOWN
Definition astreamer.h:64
static void astreamer_buffer_bytes(astreamer *streamer, const char **data, int *len, int nbytes)
Definition astreamer.h:168
static const astreamer_ops astreamer_tar_terminator_ops
astreamer * astreamer_tar_parser_new(astreamer *next)
static const astreamer_ops astreamer_tar_parser_ops
static void astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void astreamer_tar_terminator_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void astreamer_tar_archiver_free(astreamer *streamer)
static bool astreamer_tar_header(astreamer_tar_parser *mystreamer)
astreamer * astreamer_tar_terminator_new(astreamer *next)
static void astreamer_tar_parser_finalize(astreamer *streamer)
static void astreamer_tar_archiver_finalize(astreamer *streamer)
static const astreamer_ops astreamer_tar_archiver_ops
astreamer * astreamer_tar_archiver_new(astreamer *next)
static void astreamer_tar_terminator_free(astreamer *streamer)
static void astreamer_tar_terminator_finalize(astreamer *streamer)
static void astreamer_tar_parser_free(astreamer *streamer)
static void astreamer_tar_archiver_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static int32 next
Definition blutils.c:225
#define Min(x, y)
Definition c.h:1091
#define Assert(condition)
Definition c.h:943
#define palloc0_object(type)
Definition fe_memutils.h:90
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1619
#define pg_fatal(...)
#define MAXPGPATH
const void size_t len
const void * data
uint64 read_tar_number(const char *s, int len)
Definition tar.c:58
static size_t tarPaddingBytesRequired(size_t len)
Definition pgtar.h:84
@ TAR_FILETYPE_PAX_EXTENDED_GLOBAL
Definition pgtar.h:66
@ TAR_FILETYPE_SYMLINK
Definition pgtar.h:63
@ TAR_FILETYPE_PLAIN_OLD
Definition pgtar.h:62
@ TAR_FILETYPE_DIRECTORY
Definition pgtar.h:64
@ TAR_FILETYPE_PLAIN
Definition pgtar.h:61
@ TAR_FILETYPE_PAX_EXTENDED
Definition pgtar.h:65
@ TAR_OFFSET_MODE
Definition pgtar.h:40
@ TAR_OFFSET_UID
Definition pgtar.h:41
@ TAR_OFFSET_TYPEFLAG
Definition pgtar.h:46
@ TAR_OFFSET_NAME
Definition pgtar.h:39
@ TAR_OFFSET_SIZE
Definition pgtar.h:43
@ TAR_OFFSET_GID
Definition pgtar.h:42
@ TAR_OFFSET_LINKNAME
Definition pgtar.h:47
enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime)
Definition tar.c:143
bool isValidTarHeader(const char *header)
Definition tar.c:112
#define TAR_BLOCK_SIZE
Definition pgtar.h:17
bool path_is_safe_for_extraction(const char *path)
Definition path.c:637
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
static int fb(int x)
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
char linktarget[MAXPGPATH]
Definition astreamer.h:90
char pathname[MAXPGPATH]
Definition astreamer.h:81
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition astreamer.h:128
astreamer_archive_context next_context
astreamer_member member
StringInfoData bbs_buffer
Definition astreamer.h:113
const astreamer_ops * bbs_ops
Definition astreamer.h:111
astreamer * bbs_next
Definition astreamer.h:112