PostgreSQL Source Code  git master
file_utils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * File-processing utility routines.
4  *
5  * Assorted utility functions to work on files.
6  *
7  *
8  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
9  * Portions Copyright (c) 1994, Regents of the University of California
10  *
11  * src/common/file_utils.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #ifndef FRONTEND
17 #error "This file is not expected to be compiled for backend code"
18 #endif
19 
20 #include "postgres_fe.h"
21 
22 #include <dirent.h>
23 #include <fcntl.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 
27 #include "common/file_utils.h"
28 #include "common/logging.h"
29 
30 
31 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
32 #if defined(HAVE_SYNC_FILE_RANGE)
33 #define PG_FLUSH_DATA_WORKS 1
34 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
35 #define PG_FLUSH_DATA_WORKS 1
36 #endif
37 
38 /*
39  * pg_xlog has been renamed to pg_wal in version 10.
40  */
41 #define MINIMUM_VERSION_FOR_PG_WAL 100000
42 
43 #ifdef PG_FLUSH_DATA_WORKS
44 static int pre_sync_fname(const char *fname, bool isdir);
45 #endif
46 static void walkdir(const char *path,
47  int (*action) (const char *fname, bool isdir),
48  bool process_symlinks);
49 
50 /*
51  * Issue fsync recursively on PGDATA and all its contents.
52  *
53  * We fsync regular files and directories wherever they are, but we follow
54  * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
55  * Other symlinks are presumed to point at files we're not responsible for
56  * fsyncing, and might not have privileges to write at all.
57  *
58  * serverVersion indicates the version of the server to be fsync'd.
59  */
60 void
61 fsync_pgdata(const char *pg_data,
62  int serverVersion)
63 {
64  bool xlog_is_symlink;
65  char pg_wal[MAXPGPATH];
66  char pg_tblspc[MAXPGPATH];
67 
68  /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
69  snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
70  serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
71  snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
72 
73  /*
74  * If pg_wal is a symlink, we'll need to recurse into it separately,
75  * because the first walkdir below will ignore it.
76  */
77  xlog_is_symlink = false;
78 
79 #ifndef WIN32
80  {
81  struct stat st;
82 
83  if (lstat(pg_wal, &st) < 0)
84  pg_log_error("could not stat file \"%s\": %m", pg_wal);
85  else if (S_ISLNK(st.st_mode))
86  xlog_is_symlink = true;
87  }
88 #else
89  if (pgwin32_is_junction(pg_wal))
90  xlog_is_symlink = true;
91 #endif
92 
93  /*
94  * If possible, hint to the kernel that we're soon going to fsync the data
95  * directory and its contents.
96  */
97 #ifdef PG_FLUSH_DATA_WORKS
98  walkdir(pg_data, pre_sync_fname, false);
99  if (xlog_is_symlink)
100  walkdir(pg_wal, pre_sync_fname, false);
101  walkdir(pg_tblspc, pre_sync_fname, true);
102 #endif
103 
104  /*
105  * Now we do the fsync()s in the same order.
106  *
107  * The main call ignores symlinks, so in addition to specially processing
108  * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
109  * process_symlinks = true. Note that if there are any plain directories
110  * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
111  * so we don't worry about optimizing it.
112  */
113  walkdir(pg_data, fsync_fname, false);
114  if (xlog_is_symlink)
115  walkdir(pg_wal, fsync_fname, false);
116  walkdir(pg_tblspc, fsync_fname, true);
117 }
118 
119 /*
120  * Issue fsync recursively on the given directory and all its contents.
121  *
122  * This is a convenient wrapper on top of walkdir().
123  */
124 void
125 fsync_dir_recurse(const char *dir)
126 {
127  /*
128  * If possible, hint to the kernel that we're soon going to fsync the data
129  * directory and its contents.
130  */
131 #ifdef PG_FLUSH_DATA_WORKS
132  walkdir(dir, pre_sync_fname, false);
133 #endif
134 
135  walkdir(dir, fsync_fname, false);
136 }
137 
138 /*
139  * walkdir: recursively walk a directory, applying the action to each
140  * regular file and directory (including the named directory itself).
141  *
142  * If process_symlinks is true, the action and recursion are also applied
143  * to regular files and directories that are pointed to by symlinks in the
144  * given directory; otherwise symlinks are ignored. Symlinks are always
145  * ignored in subdirectories, ie we intentionally don't pass down the
146  * process_symlinks flag to recursive calls.
147  *
148  * Errors are reported but not considered fatal.
149  *
150  * See also walkdir in fd.c, which is a backend version of this logic.
151  */
152 static void
153 walkdir(const char *path,
154  int (*action) (const char *fname, bool isdir),
155  bool process_symlinks)
156 {
157  DIR *dir;
158  struct dirent *de;
159 
160  dir = opendir(path);
161  if (dir == NULL)
162  {
163  pg_log_error("could not open directory \"%s\": %m", path);
164  return;
165  }
166 
167  while (errno = 0, (de = readdir(dir)) != NULL)
168  {
169  char subpath[MAXPGPATH * 2];
170  struct stat fst;
171  int sret;
172 
173  if (strcmp(de->d_name, ".") == 0 ||
174  strcmp(de->d_name, "..") == 0)
175  continue;
176 
177  snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
178 
179  if (process_symlinks)
180  sret = stat(subpath, &fst);
181  else
182  sret = lstat(subpath, &fst);
183 
184  if (sret < 0)
185  {
186  pg_log_error("could not stat file \"%s\": %m", subpath);
187  continue;
188  }
189 
190  if (S_ISREG(fst.st_mode))
191  (*action) (subpath, false);
192  else if (S_ISDIR(fst.st_mode))
193  walkdir(subpath, action, false);
194  }
195 
196  if (errno)
197  pg_log_error("could not read directory \"%s\": %m", path);
198 
199  (void) closedir(dir);
200 
201  /*
202  * It's important to fsync the destination directory itself as individual
203  * file fsyncs don't guarantee that the directory entry for the file is
204  * synced. Recent versions of ext4 have made the window much wider but
205  * it's been an issue for ext3 and other filesystems in the past.
206  */
207  (*action) (path, true);
208 }
209 
210 /*
211  * Hint to the OS that it should get ready to fsync() this file.
212  *
213  * Ignores errors trying to open unreadable files, and reports other errors
214  * non-fatally.
215  */
216 #ifdef PG_FLUSH_DATA_WORKS
217 
218 static int
219 pre_sync_fname(const char *fname, bool isdir)
220 {
221  int fd;
222 
223  fd = open(fname, O_RDONLY | PG_BINARY, 0);
224 
225  if (fd < 0)
226  {
227  if (errno == EACCES || (isdir && errno == EISDIR))
228  return 0;
229  pg_log_error("could not open file \"%s\": %m", fname);
230  return -1;
231  }
232 
233  /*
234  * We do what pg_flush_data() would do in the backend: prefer to use
235  * sync_file_range, but fall back to posix_fadvise. We ignore errors
236  * because this is only a hint.
237  */
238 #if defined(HAVE_SYNC_FILE_RANGE)
239  (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
240 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
241  (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
242 #else
243 #error PG_FLUSH_DATA_WORKS should not have been defined
244 #endif
245 
246  (void) close(fd);
247  return 0;
248 }
249 
250 #endif /* PG_FLUSH_DATA_WORKS */
251 
252 /*
253  * fsync_fname -- Try to fsync a file or directory
254  *
255  * Ignores errors trying to open unreadable files, or trying to fsync
256  * directories on systems where that isn't allowed/required. All other errors
257  * are fatal.
258  */
259 int
260 fsync_fname(const char *fname, bool isdir)
261 {
262  int fd;
263  int flags;
264  int returncode;
265 
266  /*
267  * Some OSs require directories to be opened read-only whereas other
268  * systems don't allow us to fsync files opened read-only; so we need both
269  * cases here. Using O_RDWR will cause us to fail to fsync files that are
270  * not writable by our userid, but we assume that's OK.
271  */
272  flags = PG_BINARY;
273  if (!isdir)
274  flags |= O_RDWR;
275  else
276  flags |= O_RDONLY;
277 
278  /*
279  * Open the file, silently ignoring errors about unreadable files (or
280  * unsupported operations, e.g. opening a directory under Windows), and
281  * logging others.
282  */
283  fd = open(fname, flags, 0);
284  if (fd < 0)
285  {
286  if (errno == EACCES || (isdir && errno == EISDIR))
287  return 0;
288  pg_log_error("could not open file \"%s\": %m", fname);
289  return -1;
290  }
291 
292  returncode = fsync(fd);
293 
294  /*
295  * Some OSes don't allow us to fsync directories at all, so we can ignore
296  * those errors. Anything else needs to be reported.
297  */
298  if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
299  {
300  pg_log_fatal("could not fsync file \"%s\": %m", fname);
301  (void) close(fd);
302  exit(EXIT_FAILURE);
303  }
304 
305  (void) close(fd);
306  return 0;
307 }
308 
309 /*
310  * fsync_parent_path -- fsync the parent path of a file or directory
311  *
312  * This is aimed at making file operations persistent on disk in case of
313  * an OS crash or power failure.
314  */
315 int
316 fsync_parent_path(const char *fname)
317 {
318  char parentpath[MAXPGPATH];
319 
320  strlcpy(parentpath, fname, MAXPGPATH);
321  get_parent_directory(parentpath);
322 
323  /*
324  * get_parent_directory() returns an empty string if the input argument is
325  * just a file name (see comments in path.c), so handle that as being the
326  * current directory.
327  */
328  if (strlen(parentpath) == 0)
329  strlcpy(parentpath, ".", MAXPGPATH);
330 
331  if (fsync_fname(parentpath, true) != 0)
332  return -1;
333 
334  return 0;
335 }
336 
337 /*
338  * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
339  *
340  * Wrapper around rename, similar to the backend version.
341  */
342 int
343 durable_rename(const char *oldfile, const char *newfile)
344 {
345  int fd;
346 
347  /*
348  * First fsync the old and target path (if it exists), to ensure that they
349  * are properly persistent on disk. Syncing the target file is not
350  * strictly necessary, but it makes it easier to reason about crashes;
351  * because it's then guaranteed that either source or target file exists
352  * after a crash.
353  */
354  if (fsync_fname(oldfile, false) != 0)
355  return -1;
356 
357  fd = open(newfile, PG_BINARY | O_RDWR, 0);
358  if (fd < 0)
359  {
360  if (errno != ENOENT)
361  {
362  pg_log_error("could not open file \"%s\": %m", newfile);
363  return -1;
364  }
365  }
366  else
367  {
368  if (fsync(fd) != 0)
369  {
370  pg_log_fatal("could not fsync file \"%s\": %m", newfile);
371  close(fd);
372  exit(EXIT_FAILURE);
373  }
374  close(fd);
375  }
376 
377  /* Time to do the real deal... */
378  if (rename(oldfile, newfile) != 0)
379  {
380  pg_log_error("could not rename file \"%s\" to \"%s\": %m",
381  oldfile, newfile);
382  return -1;
383  }
384 
385  /*
386  * To guarantee renaming the file is persistent, fsync the file with its
387  * new name, and its containing directory.
388  */
389  if (fsync_fname(newfile, false) != 0)
390  return -1;
391 
392  if (fsync_parent_path(newfile) != 0)
393  return -1;
394 
395  return 0;
396 }
int fsync_fname(const char *fname, bool isdir)
Definition: file_utils.c:260
#define MINIMUM_VERSION_FOR_PG_WAL
Definition: file_utils.c:41
int fsync_parent_path(const char *fname)
Definition: file_utils.c:316
#define pg_log_error(...)
Definition: logging.h:79
int durable_rename(const char *oldfile, const char *newfile)
Definition: file_utils.c:343
int closedir(DIR *)
Definition: dirent.c:113
static void walkdir(const char *path, int(*action)(const char *fname, bool isdir), bool process_symlinks)
Definition: file_utils.c:153
Definition: dirent.h:9
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1240
void fsync_pgdata(const char *pg_data, int serverVersion)
Definition: file_utils.c:61
#define fsync(fd)
Definition: win32_port.h:62
Definition: dirent.c:25
void fsync_dir_recurse(const char *dir)
Definition: file_utils.c:125
#define MAXPGPATH
DIR * opendir(const char *)
Definition: dirent.c:33
void get_parent_directory(char *path)
Definition: path.c:854
static char * pg_data
Definition: initdb.c:123
#define S_ISREG(m)
Definition: win32_port.h:299
#define stat(a, b)
Definition: win32_port.h:255
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
struct dirent * readdir(DIR *)
Definition: dirent.c:77
#define S_ISDIR(m)
Definition: win32_port.h:296
#define lstat(path, sb)
Definition: win32_port.h:244
#define EXIT_FAILURE
Definition: settings.h:154
char d_name[MAX_PATH]
Definition: dirent.h:14
#define close(a)
Definition: win32.h:12
#define snprintf
Definition: port.h:193
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:241
bool pgwin32_is_junction(const char *path)
#define pg_log_fatal(...)
Definition: logging.h:75