PostgreSQL Source Code  git master
astreamer_file.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * astreamer_file.c
4  *
5  * Archive streamers that write to files. astreamer_plain_writer writes
6  * the whole archive to a single file, and astreamer_extractor writes
7  * each archive member to a separate file in a given directory.
8  *
9  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
10  *
11  * IDENTIFICATION
12  * src/bin/pg_basebackup/astreamer_file.c
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres_fe.h"
17 
18 #include <unistd.h>
19 
20 #include "common/file_perm.h"
21 #include "common/logging.h"
22 #include "common/string.h"
23 #include "fe_utils/astreamer.h"
24 
25 typedef struct astreamer_plain_writer
26 {
28  char *pathname;
29  FILE *file;
32 
33 typedef struct astreamer_extractor
34 {
36  char *basepath;
37  const char *(*link_map) (const char *);
38  void (*report_output_file) (const char *);
40  FILE *file;
42 
43 static void astreamer_plain_writer_content(astreamer *streamer,
44  astreamer_member *member,
45  const char *data, int len,
47 static void astreamer_plain_writer_finalize(astreamer *streamer);
48 static void astreamer_plain_writer_free(astreamer *streamer);
49 
54 };
55 
56 static void astreamer_extractor_content(astreamer *streamer,
57  astreamer_member *member,
58  const char *data, int len,
60 static void astreamer_extractor_finalize(astreamer *streamer);
61 static void astreamer_extractor_free(astreamer *streamer);
62 static void extract_directory(const char *filename, mode_t mode);
63 static void extract_link(const char *filename, const char *linktarget);
64 static FILE *create_file_for_extract(const char *filename, mode_t mode);
65 
68  .finalize = astreamer_extractor_finalize,
70 };
71 
72 /*
73  * Create a astreamer that just writes data to a file.
74  *
75  * The caller must specify a pathname and may specify a file. The pathname is
76  * used for error-reporting purposes either way. If file is NULL, the pathname
77  * also identifies the file to which the data should be written: it is opened
78  * for writing and closed when done. If file is not NULL, the data is written
79  * there.
80  */
81 astreamer *
82 astreamer_plain_writer_new(char *pathname, FILE *file)
83 {
84  astreamer_plain_writer *streamer;
85 
86  streamer = palloc0(sizeof(astreamer_plain_writer));
87  *((const astreamer_ops **) &streamer->base.bbs_ops) =
89 
90  streamer->pathname = pstrdup(pathname);
91  streamer->file = file;
92 
93  if (file == NULL)
94  {
95  streamer->file = fopen(pathname, "wb");
96  if (streamer->file == NULL)
97  pg_fatal("could not create file \"%s\": %m", pathname);
98  streamer->should_close_file = true;
99  }
100 
101  return &streamer->base;
102 }
103 
104 /*
105  * Write archive content to file.
106  */
107 static void
109  astreamer_member *member, const char *data,
111 {
112  astreamer_plain_writer *mystreamer;
113 
114  mystreamer = (astreamer_plain_writer *) streamer;
115 
116  if (len == 0)
117  return;
118 
119  errno = 0;
120  if (fwrite(data, len, 1, mystreamer->file) != 1)
121  {
122  /* if write didn't set errno, assume problem is no disk space */
123  if (errno == 0)
124  errno = ENOSPC;
125  pg_fatal("could not write to file \"%s\": %m",
126  mystreamer->pathname);
127  }
128 }
129 
130 /*
131  * End-of-archive processing when writing to a plain file consists of closing
132  * the file if we opened it, but not if the caller provided it.
133  */
134 static void
136 {
137  astreamer_plain_writer *mystreamer;
138 
139  mystreamer = (astreamer_plain_writer *) streamer;
140 
141  if (mystreamer->should_close_file && fclose(mystreamer->file) != 0)
142  pg_fatal("could not close file \"%s\": %m",
143  mystreamer->pathname);
144 
145  mystreamer->file = NULL;
146  mystreamer->should_close_file = false;
147 }
148 
149 /*
150  * Free memory associated with this astreamer.
151  */
152 static void
154 {
155  astreamer_plain_writer *mystreamer;
156 
157  mystreamer = (astreamer_plain_writer *) streamer;
158 
159  Assert(!mystreamer->should_close_file);
160  Assert(mystreamer->base.bbs_next == NULL);
161 
162  pfree(mystreamer->pathname);
163  pfree(mystreamer);
164 }
165 
166 /*
167  * Create a astreamer that extracts an archive.
168  *
169  * All pathnames in the archive are interpreted relative to basepath.
170  *
171  * Unlike e.g. astreamer_plain_writer_new() we can't do anything useful here
172  * with untyped chunks; we need typed chunks which follow the rules described
173  * in astreamer.h. Assuming we have that, we don't need to worry about the
174  * original archive format; it's enough to just look at the member information
175  * provided and write to the corresponding file.
176  *
177  * 'link_map' is a function that will be applied to the target of any
178  * symbolic link, and which should return a replacement pathname to be used
179  * in its place. If NULL, the symbolic link target is used without
180  * modification.
181  *
182  * 'report_output_file' is a function that will be called each time we open a
183  * new output file. The pathname to that file is passed as an argument. If
184  * NULL, the call is skipped.
185  */
186 astreamer *
187 astreamer_extractor_new(const char *basepath,
188  const char *(*link_map) (const char *),
189  void (*report_output_file) (const char *))
190 {
191  astreamer_extractor *streamer;
192 
193  streamer = palloc0(sizeof(astreamer_extractor));
194  *((const astreamer_ops **) &streamer->base.bbs_ops) =
196  streamer->basepath = pstrdup(basepath);
197  streamer->link_map = link_map;
198  streamer->report_output_file = report_output_file;
199 
200  return &streamer->base;
201 }
202 
203 /*
204  * Extract archive contents to the filesystem.
205  */
206 static void
208  const char *data, int len,
210 {
211  astreamer_extractor *mystreamer = (astreamer_extractor *) streamer;
212  int fnamelen;
213 
214  Assert(member != NULL || context == ASTREAMER_ARCHIVE_TRAILER);
216 
217  switch (context)
218  {
220  Assert(mystreamer->file == NULL);
221 
222  /* Prepend basepath. */
223  snprintf(mystreamer->filename, sizeof(mystreamer->filename),
224  "%s/%s", mystreamer->basepath, member->pathname);
225 
226  /* Remove any trailing slash. */
227  fnamelen = strlen(mystreamer->filename);
228  if (mystreamer->filename[fnamelen - 1] == '/')
229  mystreamer->filename[fnamelen - 1] = '\0';
230 
231  /* Dispatch based on file type. */
232  if (member->is_directory)
233  extract_directory(mystreamer->filename, member->mode);
234  else if (member->is_link)
235  {
236  const char *linktarget = member->linktarget;
237 
238  if (mystreamer->link_map)
239  linktarget = mystreamer->link_map(linktarget);
240  extract_link(mystreamer->filename, linktarget);
241  }
242  else
243  mystreamer->file =
244  create_file_for_extract(mystreamer->filename,
245  member->mode);
246 
247  /* Report output file change. */
248  if (mystreamer->report_output_file)
249  mystreamer->report_output_file(mystreamer->filename);
250  break;
251 
253  if (mystreamer->file == NULL)
254  break;
255 
256  errno = 0;
257  if (len > 0 && fwrite(data, len, 1, mystreamer->file) != 1)
258  {
259  /* if write didn't set errno, assume problem is no disk space */
260  if (errno == 0)
261  errno = ENOSPC;
262  pg_fatal("could not write to file \"%s\": %m",
263  mystreamer->filename);
264  }
265  break;
266 
268  if (mystreamer->file == NULL)
269  break;
270  fclose(mystreamer->file);
271  mystreamer->file = NULL;
272  break;
273 
275  break;
276 
277  default:
278  /* Shouldn't happen. */
279  pg_fatal("unexpected state while extracting archive");
280  }
281 }
282 
283 /*
284  * Should we tolerate an already-existing directory?
285  *
286  * When streaming WAL, pg_wal (or pg_xlog for pre-9.6 clusters) will have been
287  * created by the wal receiver process. Also, when the WAL directory location
288  * was specified, pg_wal (or pg_xlog) has already been created as a symbolic
289  * link before starting the actual backup. So just ignore creation failures
290  * on related directories.
291  *
292  * If in-place tablespaces are used, pg_tblspc and subdirectories may already
293  * exist when we get here. So tolerate that case, too.
294  */
295 static bool
296 should_allow_existing_directory(const char *pathname)
297 {
298  const char *filename = last_dir_separator(pathname) + 1;
299 
300  if (strcmp(filename, "pg_wal") == 0 ||
301  strcmp(filename, "pg_xlog") == 0 ||
302  strcmp(filename, "archive_status") == 0 ||
303  strcmp(filename, "summaries") == 0 ||
304  strcmp(filename, "pg_tblspc") == 0)
305  return true;
306 
307  if (strspn(filename, "0123456789") == strlen(filename))
308  {
309  const char *pg_tblspc = strstr(pathname, "/pg_tblspc/");
310 
311  return pg_tblspc != NULL && pg_tblspc + 11 == filename;
312  }
313 
314  return false;
315 }
316 
317 /*
318  * Create a directory.
319  */
320 static void
321 extract_directory(const char *filename, mode_t mode)
322 {
323  if (mkdir(filename, pg_dir_create_mode) != 0 &&
324  (errno != EEXIST || !should_allow_existing_directory(filename)))
325  pg_fatal("could not create directory \"%s\": %m",
326  filename);
327 
328 #ifndef WIN32
329  if (chmod(filename, mode))
330  pg_fatal("could not set permissions on directory \"%s\": %m",
331  filename);
332 #endif
333 }
334 
335 /*
336  * Create a symbolic link.
337  *
338  * It's most likely a link in pg_tblspc directory, to the location of a
339  * tablespace. Apply any tablespace mapping given on the command line
340  * (--tablespace-mapping). (We blindly apply the mapping without checking that
341  * the link really is inside pg_tblspc. We don't expect there to be other
342  * symlinks in a data directory, but if there are, you can call it an
343  * undocumented feature that you can map them too.)
344  */
345 static void
346 extract_link(const char *filename, const char *linktarget)
347 {
348  if (symlink(linktarget, filename) != 0)
349  pg_fatal("could not create symbolic link from \"%s\" to \"%s\": %m",
350  filename, linktarget);
351 }
352 
353 /*
354  * Create a regular file.
355  *
356  * Return the resulting handle so we can write the content to the file.
357  */
358 static FILE *
360 {
361  FILE *file;
362 
363  file = fopen(filename, "wb");
364  if (file == NULL)
365  pg_fatal("could not create file \"%s\": %m", filename);
366 
367 #ifndef WIN32
368  if (chmod(filename, mode))
369  pg_fatal("could not set permissions on file \"%s\": %m",
370  filename);
371 #endif
372 
373  return file;
374 }
375 
376 /*
377  * End-of-stream processing for extracting an archive.
378  *
379  * There's nothing to do here but sanity checking.
380  */
381 static void
383 {
385  = (astreamer_extractor *) streamer;
386 
387  Assert(mystreamer->file == NULL);
388 }
389 
390 /*
391  * Free memory.
392  */
393 static void
395 {
396  astreamer_extractor *mystreamer = (astreamer_extractor *) streamer;
397 
398  pfree(mystreamer->basepath);
399  pfree(mystreamer);
400 }
astreamer_archive_context
Definition: astreamer.h:63
@ ASTREAMER_MEMBER_HEADER
Definition: astreamer.h:65
@ ASTREAMER_MEMBER_CONTENTS
Definition: astreamer.h:66
@ ASTREAMER_MEMBER_TRAILER
Definition: astreamer.h:67
@ ASTREAMER_ARCHIVE_TRAILER
Definition: astreamer.h:68
@ ASTREAMER_UNKNOWN
Definition: astreamer.h:64
static void astreamer_plain_writer_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void astreamer_extractor_finalize(astreamer *streamer)
static void extract_directory(const char *filename, mode_t mode)
static FILE * create_file_for_extract(const char *filename, mode_t mode)
static const astreamer_ops astreamer_extractor_ops
astreamer * astreamer_plain_writer_new(char *pathname, FILE *file)
struct astreamer_extractor astreamer_extractor
astreamer * astreamer_extractor_new(const char *basepath, const char *(*link_map)(const char *), void(*report_output_file)(const char *))
struct astreamer_plain_writer astreamer_plain_writer
static void astreamer_plain_writer_finalize(astreamer *streamer)
static void astreamer_extractor_free(astreamer *streamer)
static void astreamer_extractor_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static const astreamer_ops astreamer_plain_writer_ops
static bool should_allow_existing_directory(const char *pathname)
static void extract_link(const char *filename, const char *linktarget)
static void astreamer_plain_writer_free(astreamer *streamer)
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:185
#define Assert(condition)
Definition: c.h:849
int pg_dir_create_mode
Definition: file_perm.c:18
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
#define pg_fatal(...)
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define MAXPGPATH
const void size_t len
const void * data
static char * filename
Definition: pg_dumpall.c:119
char * last_dir_separator(const char *filename)
Definition: path.c:140
#define snprintf
Definition: port.h:238
tree context
Definition: radixtree.h:1835
void(* report_output_file)(const char *)
char filename[MAXPGPATH]
const char *(* link_map)(const char *)
char linktarget[MAXPGPATH]
Definition: astreamer.h:88
char pathname[MAXPGPATH]
Definition: astreamer.h:81
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition: astreamer.h:126
const astreamer_ops * bbs_ops
Definition: astreamer.h:109
astreamer * bbs_next
Definition: astreamer.h:110
#define mkdir(a, b)
Definition: win32_port.h:80
#define symlink(oldpath, newpath)
Definition: win32_port.h:235