PostgreSQL Source Code  git master
file_utils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * File-processing utility routines.
4  *
5  * Assorted utility functions to work on files.
6  *
7  *
8  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
9  * Portions Copyright (c) 1994, Regents of the University of California
10  *
11  * src/common/file_utils.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres_fe.h"
16 
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 
22 #include "common/file_utils.h"
23 #include "common/logging.h"
24 
25 
26 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
27 #if defined(HAVE_SYNC_FILE_RANGE)
28 #define PG_FLUSH_DATA_WORKS 1
29 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
30 #define PG_FLUSH_DATA_WORKS 1
31 #endif
32 
33 /*
34  * pg_xlog has been renamed to pg_wal in version 10.
35  */
36 #define MINIMUM_VERSION_FOR_PG_WAL 100000
37 
38 #ifdef PG_FLUSH_DATA_WORKS
39 static int pre_sync_fname(const char *fname, bool isdir);
40 #endif
41 static void walkdir(const char *path,
42  int (*action) (const char *fname, bool isdir),
43  bool process_symlinks);
44 
45 /*
46  * Issue fsync recursively on PGDATA and all its contents.
47  *
48  * We fsync regular files and directories wherever they are, but we follow
49  * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
50  * Other symlinks are presumed to point at files we're not responsible for
51  * fsyncing, and might not have privileges to write at all.
52  *
53  * serverVersion indicates the version of the server to be fsync'd.
54  */
55 void
56 fsync_pgdata(const char *pg_data,
57  int serverVersion)
58 {
59  bool xlog_is_symlink;
60  char pg_wal[MAXPGPATH];
61  char pg_tblspc[MAXPGPATH];
62 
63  /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
64  snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
65  serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
66  snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
67 
68  /*
69  * If pg_wal is a symlink, we'll need to recurse into it separately,
70  * because the first walkdir below will ignore it.
71  */
72  xlog_is_symlink = false;
73 
74 #ifndef WIN32
75  {
76  struct stat st;
77 
78  if (lstat(pg_wal, &st) < 0)
79  pg_log_error("could not stat file \"%s\": %m", pg_wal);
80  else if (S_ISLNK(st.st_mode))
81  xlog_is_symlink = true;
82  }
83 #else
84  if (pgwin32_is_junction(pg_wal))
85  xlog_is_symlink = true;
86 #endif
87 
88  /*
89  * If possible, hint to the kernel that we're soon going to fsync the data
90  * directory and its contents.
91  */
92 #ifdef PG_FLUSH_DATA_WORKS
93  walkdir(pg_data, pre_sync_fname, false);
94  if (xlog_is_symlink)
95  walkdir(pg_wal, pre_sync_fname, false);
96  walkdir(pg_tblspc, pre_sync_fname, true);
97 #endif
98 
99  /*
100  * Now we do the fsync()s in the same order.
101  *
102  * The main call ignores symlinks, so in addition to specially processing
103  * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
104  * process_symlinks = true. Note that if there are any plain directories
105  * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
106  * so we don't worry about optimizing it.
107  */
108  walkdir(pg_data, fsync_fname, false);
109  if (xlog_is_symlink)
110  walkdir(pg_wal, fsync_fname, false);
111  walkdir(pg_tblspc, fsync_fname, true);
112 }
113 
114 /*
115  * Issue fsync recursively on the given directory and all its contents.
116  *
117  * This is a convenient wrapper on top of walkdir().
118  */
119 void
120 fsync_dir_recurse(const char *dir)
121 {
122  /*
123  * If possible, hint to the kernel that we're soon going to fsync the data
124  * directory and its contents.
125  */
126 #ifdef PG_FLUSH_DATA_WORKS
127  walkdir(dir, pre_sync_fname, false);
128 #endif
129 
130  walkdir(dir, fsync_fname, false);
131 }
132 
133 /*
134  * walkdir: recursively walk a directory, applying the action to each
135  * regular file and directory (including the named directory itself).
136  *
137  * If process_symlinks is true, the action and recursion are also applied
138  * to regular files and directories that are pointed to by symlinks in the
139  * given directory; otherwise symlinks are ignored. Symlinks are always
140  * ignored in subdirectories, ie we intentionally don't pass down the
141  * process_symlinks flag to recursive calls.
142  *
143  * Errors are reported but not considered fatal.
144  *
145  * See also walkdir in fd.c, which is a backend version of this logic.
146  */
147 static void
148 walkdir(const char *path,
149  int (*action) (const char *fname, bool isdir),
150  bool process_symlinks)
151 {
152  DIR *dir;
153  struct dirent *de;
154 
155  dir = opendir(path);
156  if (dir == NULL)
157  {
158  pg_log_error("could not open directory \"%s\": %m", path);
159  return;
160  }
161 
162  while (errno = 0, (de = readdir(dir)) != NULL)
163  {
164  char subpath[MAXPGPATH * 2];
165  struct stat fst;
166  int sret;
167 
168  if (strcmp(de->d_name, ".") == 0 ||
169  strcmp(de->d_name, "..") == 0)
170  continue;
171 
172  snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
173 
174  if (process_symlinks)
175  sret = stat(subpath, &fst);
176  else
177  sret = lstat(subpath, &fst);
178 
179  if (sret < 0)
180  {
181  pg_log_error("could not stat file \"%s\": %m", subpath);
182  continue;
183  }
184 
185  if (S_ISREG(fst.st_mode))
186  (*action) (subpath, false);
187  else if (S_ISDIR(fst.st_mode))
188  walkdir(subpath, action, false);
189  }
190 
191  if (errno)
192  pg_log_error("could not read directory \"%s\": %m", path);
193 
194  (void) closedir(dir);
195 
196  /*
197  * It's important to fsync the destination directory itself as individual
198  * file fsyncs don't guarantee that the directory entry for the file is
199  * synced. Recent versions of ext4 have made the window much wider but
200  * it's been an issue for ext3 and other filesystems in the past.
201  */
202  (*action) (path, true);
203 }
204 
205 /*
206  * Hint to the OS that it should get ready to fsync() this file.
207  *
208  * Ignores errors trying to open unreadable files, and reports other errors
209  * non-fatally.
210  */
211 #ifdef PG_FLUSH_DATA_WORKS
212 
213 static int
214 pre_sync_fname(const char *fname, bool isdir)
215 {
216  int fd;
217 
218  fd = open(fname, O_RDONLY | PG_BINARY, 0);
219 
220  if (fd < 0)
221  {
222  if (errno == EACCES || (isdir && errno == EISDIR))
223  return 0;
224  pg_log_error("could not open file \"%s\": %m", fname);
225  return -1;
226  }
227 
228  /*
229  * We do what pg_flush_data() would do in the backend: prefer to use
230  * sync_file_range, but fall back to posix_fadvise. We ignore errors
231  * because this is only a hint.
232  */
233 #if defined(HAVE_SYNC_FILE_RANGE)
234  (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
235 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
236  (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
237 #else
238 #error PG_FLUSH_DATA_WORKS should not have been defined
239 #endif
240 
241  (void) close(fd);
242  return 0;
243 }
244 
245 #endif /* PG_FLUSH_DATA_WORKS */
246 
247 /*
248  * fsync_fname -- Try to fsync a file or directory
249  *
250  * Ignores errors trying to open unreadable files, or trying to fsync
251  * directories on systems where that isn't allowed/required. All other errors
252  * are fatal.
253  */
254 int
255 fsync_fname(const char *fname, bool isdir)
256 {
257  int fd;
258  int flags;
259  int returncode;
260 
261  /*
262  * Some OSs require directories to be opened read-only whereas other
263  * systems don't allow us to fsync files opened read-only; so we need both
264  * cases here. Using O_RDWR will cause us to fail to fsync files that are
265  * not writable by our userid, but we assume that's OK.
266  */
267  flags = PG_BINARY;
268  if (!isdir)
269  flags |= O_RDWR;
270  else
271  flags |= O_RDONLY;
272 
273  /*
274  * Open the file, silently ignoring errors about unreadable files (or
275  * unsupported operations, e.g. opening a directory under Windows), and
276  * logging others.
277  */
278  fd = open(fname, flags, 0);
279  if (fd < 0)
280  {
281  if (errno == EACCES || (isdir && errno == EISDIR))
282  return 0;
283  pg_log_error("could not open file \"%s\": %m", fname);
284  return -1;
285  }
286 
287  returncode = fsync(fd);
288 
289  /*
290  * Some OSes don't allow us to fsync directories at all, so we can ignore
291  * those errors. Anything else needs to be reported.
292  */
293  if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
294  {
295  pg_log_fatal("could not fsync file \"%s\": %m", fname);
296  (void) close(fd);
297  exit(EXIT_FAILURE);
298  }
299 
300  (void) close(fd);
301  return 0;
302 }
303 
304 /*
305  * fsync_parent_path -- fsync the parent path of a file or directory
306  *
307  * This is aimed at making file operations persistent on disk in case of
308  * an OS crash or power failure.
309  */
310 int
311 fsync_parent_path(const char *fname)
312 {
313  char parentpath[MAXPGPATH];
314 
315  strlcpy(parentpath, fname, MAXPGPATH);
316  get_parent_directory(parentpath);
317 
318  /*
319  * get_parent_directory() returns an empty string if the input argument is
320  * just a file name (see comments in path.c), so handle that as being the
321  * current directory.
322  */
323  if (strlen(parentpath) == 0)
324  strlcpy(parentpath, ".", MAXPGPATH);
325 
326  if (fsync_fname(parentpath, true) != 0)
327  return -1;
328 
329  return 0;
330 }
331 
332 /*
333  * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
334  *
335  * Wrapper around rename, similar to the backend version.
336  */
337 int
338 durable_rename(const char *oldfile, const char *newfile)
339 {
340  int fd;
341 
342  /*
343  * First fsync the old and target path (if it exists), to ensure that they
344  * are properly persistent on disk. Syncing the target file is not
345  * strictly necessary, but it makes it easier to reason about crashes;
346  * because it's then guaranteed that either source or target file exists
347  * after a crash.
348  */
349  if (fsync_fname(oldfile, false) != 0)
350  return -1;
351 
352  fd = open(newfile, PG_BINARY | O_RDWR, 0);
353  if (fd < 0)
354  {
355  if (errno != ENOENT)
356  {
357  pg_log_error("could not open file \"%s\": %m", newfile);
358  return -1;
359  }
360  }
361  else
362  {
363  if (fsync(fd) != 0)
364  {
365  pg_log_fatal("could not fsync file \"%s\": %m", newfile);
366  close(fd);
367  exit(EXIT_FAILURE);
368  }
369  close(fd);
370  }
371 
372  /* Time to do the real deal... */
373  if (rename(oldfile, newfile) != 0)
374  {
375  pg_log_error("could not rename file \"%s\" to \"%s\": %m",
376  oldfile, newfile);
377  return -1;
378  }
379 
380  /*
381  * To guarantee renaming the file is persistent, fsync the file with its
382  * new name, and its containing directory.
383  */
384  if (fsync_fname(newfile, false) != 0)
385  return -1;
386 
387  if (fsync_parent_path(newfile) != 0)
388  return -1;
389 
390  return 0;
391 }
int fsync_fname(const char *fname, bool isdir)
Definition: file_utils.c:255
#define MINIMUM_VERSION_FOR_PG_WAL
Definition: file_utils.c:36
int fsync_parent_path(const char *fname)
Definition: file_utils.c:311
#define pg_log_error(...)
Definition: logging.h:79
int durable_rename(const char *oldfile, const char *newfile)
Definition: file_utils.c:338
int closedir(DIR *)
Definition: dirent.c:113
static void walkdir(const char *path, int(*action)(const char *fname, bool isdir), bool process_symlinks)
Definition: file_utils.c:148
Definition: dirent.h:9
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1234
void fsync_pgdata(const char *pg_data, int serverVersion)
Definition: file_utils.c:56
#define fsync(fd)
Definition: win32_port.h:62
Definition: dirent.c:25
void fsync_dir_recurse(const char *dir)
Definition: file_utils.c:120
#define MAXPGPATH
DIR * opendir(const char *)
Definition: dirent.c:33
void get_parent_directory(char *path)
Definition: path.c:854
static char * pg_data
Definition: initdb.c:123
#define S_ISREG(m)
Definition: win32_port.h:299
#define stat(a, b)
Definition: win32_port.h:255
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
struct dirent * readdir(DIR *)
Definition: dirent.c:77
#define S_ISDIR(m)
Definition: win32_port.h:296
#define lstat(path, sb)
Definition: win32_port.h:244
#define EXIT_FAILURE
Definition: settings.h:154
char d_name[MAX_PATH]
Definition: dirent.h:14
#define close(a)
Definition: win32.h:12
#define snprintf
Definition: port.h:193
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:241
bool pgwin32_is_junction(const char *path)
#define pg_log_fatal(...)
Definition: logging.h:75