PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
file_utils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * File-processing utility routines.
4  *
5  * Assorted utility functions to work on files.
6  *
7  *
8  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
9  * Portions Copyright (c) 1994, Regents of the University of California
10  *
11  * src/common/file_utils.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres_fe.h"
16 
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 
22 #include "common/file_utils.h"
23 
24 
25 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
26 #if defined(HAVE_SYNC_FILE_RANGE)
27 #define PG_FLUSH_DATA_WORKS 1
28 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
29 #define PG_FLUSH_DATA_WORKS 1
30 #endif
31 
32 /*
33  * pg_xlog has been renamed to pg_wal in version 10.
34  */
35 #define MINIMUM_VERSION_FOR_PG_WAL 100000
36 
37 #ifdef PG_FLUSH_DATA_WORKS
38 static int pre_sync_fname(const char *fname, bool isdir,
39  const char *progname);
40 #endif
41 static void walkdir(const char *path,
42  int (*action) (const char *fname, bool isdir, const char *progname),
43  bool process_symlinks, const char *progname);
44 
45 /*
46  * Issue fsync recursively on PGDATA and all its contents.
47  *
48  * We fsync regular files and directories wherever they are, but we follow
49  * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
50  * Other symlinks are presumed to point at files we're not responsible for
51  * fsyncing, and might not have privileges to write at all.
52  *
53  * serverVersion indicates the version of the server to be fsync'd.
54  *
55  * Errors are reported but not considered fatal.
56  */
57 void
58 fsync_pgdata(const char *pg_data,
59  const char *progname,
60  int serverVersion)
61 {
62  bool xlog_is_symlink;
63  char pg_wal[MAXPGPATH];
64  char pg_tblspc[MAXPGPATH];
65 
66  /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
67  snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
68  serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
69  snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
70 
71  /*
72  * If pg_wal is a symlink, we'll need to recurse into it separately,
73  * because the first walkdir below will ignore it.
74  */
75  xlog_is_symlink = false;
76 
77 #ifndef WIN32
78  {
79  struct stat st;
80 
81  if (lstat(pg_wal, &st) < 0)
82  fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
83  progname, pg_wal, strerror(errno));
84  else if (S_ISLNK(st.st_mode))
85  xlog_is_symlink = true;
86  }
87 #else
88  if (pgwin32_is_junction(pg_wal))
89  xlog_is_symlink = true;
90 #endif
91 
92  /*
93  * If possible, hint to the kernel that we're soon going to fsync the data
94  * directory and its contents.
95  */
96 #ifdef PG_FLUSH_DATA_WORKS
97  walkdir(pg_data, pre_sync_fname, false, progname);
98  if (xlog_is_symlink)
99  walkdir(pg_wal, pre_sync_fname, false, progname);
100  walkdir(pg_tblspc, pre_sync_fname, true, progname);
101 #endif
102 
103  /*
104  * Now we do the fsync()s in the same order.
105  *
106  * The main call ignores symlinks, so in addition to specially processing
107  * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
108  * process_symlinks = true. Note that if there are any plain directories
109  * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
110  * so we don't worry about optimizing it.
111  */
112  walkdir(pg_data, fsync_fname, false, progname);
113  if (xlog_is_symlink)
114  walkdir(pg_wal, fsync_fname, false, progname);
115  walkdir(pg_tblspc, fsync_fname, true, progname);
116 }
117 
118 /*
119  * Issue fsync recursively on the given directory and all its contents.
120  *
121  * This is a convenient wrapper on top of walkdir().
122  */
123 void
124 fsync_dir_recurse(const char *dir, const char *progname)
125 {
126  /*
127  * If possible, hint to the kernel that we're soon going to fsync the data
128  * directory and its contents.
129  */
130 #ifdef PG_FLUSH_DATA_WORKS
131  walkdir(dir, pre_sync_fname, false, progname);
132 #endif
133 
134  walkdir(dir, fsync_fname, false, progname);
135 }
136 
137 /*
138  * walkdir: recursively walk a directory, applying the action to each
139  * regular file and directory (including the named directory itself).
140  *
141  * If process_symlinks is true, the action and recursion are also applied
142  * to regular files and directories that are pointed to by symlinks in the
143  * given directory; otherwise symlinks are ignored. Symlinks are always
144  * ignored in subdirectories, ie we intentionally don't pass down the
145  * process_symlinks flag to recursive calls.
146  *
147  * Errors are reported but not considered fatal.
148  *
149  * See also walkdir in fd.c, which is a backend version of this logic.
150  */
151 static void
152 walkdir(const char *path,
153  int (*action) (const char *fname, bool isdir, const char *progname),
154  bool process_symlinks, const char *progname)
155 {
156  DIR *dir;
157  struct dirent *de;
158 
159  dir = opendir(path);
160  if (dir == NULL)
161  {
162  fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
163  progname, path, strerror(errno));
164  return;
165  }
166 
167  while (errno = 0, (de = readdir(dir)) != NULL)
168  {
169  char subpath[MAXPGPATH * 2];
170  struct stat fst;
171  int sret;
172 
173  if (strcmp(de->d_name, ".") == 0 ||
174  strcmp(de->d_name, "..") == 0)
175  continue;
176 
177  snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
178 
179  if (process_symlinks)
180  sret = stat(subpath, &fst);
181  else
182  sret = lstat(subpath, &fst);
183 
184  if (sret < 0)
185  {
186  fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
187  progname, subpath, strerror(errno));
188  continue;
189  }
190 
191  if (S_ISREG(fst.st_mode))
192  (*action) (subpath, false, progname);
193  else if (S_ISDIR(fst.st_mode))
194  walkdir(subpath, action, false, progname);
195  }
196 
197  if (errno)
198  fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
199  progname, path, strerror(errno));
200 
201  (void) closedir(dir);
202 
203  /*
204  * It's important to fsync the destination directory itself as individual
205  * file fsyncs don't guarantee that the directory entry for the file is
206  * synced. Recent versions of ext4 have made the window much wider but
207  * it's been an issue for ext3 and other filesystems in the past.
208  */
209  (*action) (path, true, progname);
210 }
211 
212 /*
213  * Hint to the OS that it should get ready to fsync() this file.
214  *
215  * Ignores errors trying to open unreadable files, and reports other errors
216  * non-fatally.
217  */
218 #ifdef PG_FLUSH_DATA_WORKS
219 
220 static int
221 pre_sync_fname(const char *fname, bool isdir, const char *progname)
222 {
223  int fd;
224 
225  fd = open(fname, O_RDONLY | PG_BINARY);
226 
227  if (fd < 0)
228  {
229  if (errno == EACCES || (isdir && errno == EISDIR))
230  return 0;
231  fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
232  progname, fname, strerror(errno));
233  return -1;
234  }
235 
236  /*
237  * We do what pg_flush_data() would do in the backend: prefer to use
238  * sync_file_range, but fall back to posix_fadvise. We ignore errors
239  * because this is only a hint.
240  */
241 #if defined(HAVE_SYNC_FILE_RANGE)
242  (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
243 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
244  (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
245 #else
246 #error PG_FLUSH_DATA_WORKS should not have been defined
247 #endif
248 
249  (void) close(fd);
250  return 0;
251 }
252 
253 #endif /* PG_FLUSH_DATA_WORKS */
254 
255 /*
256  * fsync_fname -- Try to fsync a file or directory
257  *
258  * Ignores errors trying to open unreadable files, or trying to fsync
259  * directories on systems where that isn't allowed/required. Reports
260  * other errors non-fatally.
261  */
262 int
263 fsync_fname(const char *fname, bool isdir, const char *progname)
264 {
265  int fd;
266  int flags;
267  int returncode;
268 
269  /*
270  * Some OSs require directories to be opened read-only whereas other
271  * systems don't allow us to fsync files opened read-only; so we need both
272  * cases here. Using O_RDWR will cause us to fail to fsync files that are
273  * not writable by our userid, but we assume that's OK.
274  */
275  flags = PG_BINARY;
276  if (!isdir)
277  flags |= O_RDWR;
278  else
279  flags |= O_RDONLY;
280 
281  /*
282  * Open the file, silently ignoring errors about unreadable files (or
283  * unsupported operations, e.g. opening a directory under Windows), and
284  * logging others.
285  */
286  fd = open(fname, flags);
287  if (fd < 0)
288  {
289  if (errno == EACCES || (isdir && errno == EISDIR))
290  return 0;
291  fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
292  progname, fname, strerror(errno));
293  return -1;
294  }
295 
296  returncode = fsync(fd);
297 
298  /*
299  * Some OSes don't allow us to fsync directories at all, so we can ignore
300  * those errors. Anything else needs to be reported.
301  */
302  if (returncode != 0 && !(isdir && errno == EBADF))
303  {
304  fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
305  progname, fname, strerror(errno));
306  (void) close(fd);
307  return -1;
308  }
309 
310  (void) close(fd);
311  return 0;
312 }
313 
314 /*
315  * fsync_parent_path -- fsync the parent path of a file or directory
316  *
317  * This is aimed at making file operations persistent on disk in case of
318  * an OS crash or power failure.
319  */
320 int
321 fsync_parent_path(const char *fname, const char *progname)
322 {
323  char parentpath[MAXPGPATH];
324 
325  strlcpy(parentpath, fname, MAXPGPATH);
326  get_parent_directory(parentpath);
327 
328  /*
329  * get_parent_directory() returns an empty string if the input argument is
330  * just a file name (see comments in path.c), so handle that as being the
331  * current directory.
332  */
333  if (strlen(parentpath) == 0)
334  strlcpy(parentpath, ".", MAXPGPATH);
335 
336  if (fsync_fname(parentpath, true, progname) != 0)
337  return -1;
338 
339  return 0;
340 }
341 
342 /*
343  * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
344  *
345  * Wrapper around rename, similar to the backend version.
346  */
347 int
348 durable_rename(const char *oldfile, const char *newfile, const char *progname)
349 {
350  int fd;
351 
352  /*
353  * First fsync the old and target path (if it exists), to ensure that they
354  * are properly persistent on disk. Syncing the target file is not
355  * strictly necessary, but it makes it easier to reason about crashes;
356  * because it's then guaranteed that either source or target file exists
357  * after a crash.
358  */
359  if (fsync_fname(oldfile, false, progname) != 0)
360  return -1;
361 
362  fd = open(newfile, PG_BINARY | O_RDWR, 0);
363  if (fd < 0)
364  {
365  if (errno != ENOENT)
366  {
367  fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
368  progname, newfile, strerror(errno));
369  return -1;
370  }
371  }
372  else
373  {
374  if (fsync(fd) != 0)
375  {
376  fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
377  progname, newfile, strerror(errno));
378  close(fd);
379  return -1;
380  }
381  close(fd);
382  }
383 
384  /* Time to do the real deal... */
385  if (rename(oldfile, newfile) != 0)
386  {
387  fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"),
388  progname, oldfile, newfile, strerror(errno));
389  return -1;
390  }
391 
392  /*
393  * To guarantee renaming the file is persistent, fsync the file with its
394  * new name, and its containing directory.
395  */
396  if (fsync_fname(newfile, false, progname) != 0)
397  return -1;
398 
399  if (fsync_parent_path(newfile, progname) != 0)
400  return -1;
401 
402  return 0;
403 }
void fsync_pgdata(const char *pg_data, const char *progname, int serverVersion)
Definition: file_utils.c:58
#define MINIMUM_VERSION_FOR_PG_WAL
Definition: file_utils.c:35
static void walkdir(const char *path, int(*action)(const char *fname, bool isdir, const char *progname), bool process_symlinks, const char *progname)
Definition: file_utils.c:152
int closedir(DIR *)
Definition: dirent.c:111
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
const char * progname
Definition: pg_standby.c:37
Definition: dirent.h:9
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1044
void fsync_dir_recurse(const char *dir, const char *progname)
Definition: file_utils.c:124
Definition: dirent.c:25
#define MAXPGPATH
DIR * opendir(const char *)
Definition: dirent.c:33
void get_parent_directory(char *path)
Definition: path.c:854
static char * pg_data
Definition: initdb.c:122
#define fsync(fd)
Definition: win32.h:62
int durable_rename(const char *oldfile, const char *newfile, const char *progname)
Definition: file_utils.c:348
int fsync_fname(const char *fname, bool isdir, const char *progname)
Definition: file_utils.c:263
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
int fsync_parent_path(const char *fname, const char *progname)
Definition: file_utils.c:321
struct dirent * readdir(DIR *)
Definition: dirent.c:77
const char * strerror(int errnum)
Definition: strerror.c:19
char d_name[MAX_PATH]
Definition: dirent.h:14
#define close(a)
Definition: win32.h:12
#define lstat(path, sb)
Definition: win32.h:262
#define _(x)
Definition: elog.c:84
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:234