PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
file_utils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * File-processing utility routines.
4  *
5  * Assorted utility functions to work on files.
6  *
7  *
8  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
9  * Portions Copyright (c) 1994, Regents of the University of California
10  *
11  * src/common/file_utils.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres_fe.h"
16 
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 
22 #include "common/file_utils.h"
23 
24 
25 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
26 #if defined(HAVE_SYNC_FILE_RANGE)
27 #define PG_FLUSH_DATA_WORKS 1
28 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
29 #define PG_FLUSH_DATA_WORKS 1
30 #endif
31 
32 /*
33  * pg_xlog has been renamed to pg_wal in version 10.
34  */
35 #define MINIMUM_VERSION_FOR_PG_WAL 100000
36 
37 #ifdef PG_FLUSH_DATA_WORKS
38 static int pre_sync_fname(const char *fname, bool isdir,
39  const char *progname);
40 #endif
41 static void walkdir(const char *path,
42  int (*action) (const char *fname, bool isdir, const char *progname),
43  bool process_symlinks, const char *progname);
44 
45 /*
46  * Issue fsync recursively on PGDATA and all its contents.
47  *
48  * We fsync regular files and directories wherever they are, but we follow
49  * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
50  * Other symlinks are presumed to point at files we're not responsible for
51  * fsyncing, and might not have privileges to write at all.
52  *
53  * serverVersion indicates the version of the server to be fsync'd.
54  *
55  * Errors are reported but not considered fatal.
56  */
57 void
58 fsync_pgdata(const char *pg_data,
59  const char *progname,
60  int serverVersion)
61 {
62  bool xlog_is_symlink;
63  char pg_wal[MAXPGPATH];
64  char pg_tblspc[MAXPGPATH];
65 
66  /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
67  snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
68  serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
69  snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
70 
71  /*
72  * If pg_wal is a symlink, we'll need to recurse into it separately,
73  * because the first walkdir below will ignore it.
74  */
75  xlog_is_symlink = false;
76 
77 #ifndef WIN32
78  {
79  struct stat st;
80 
81  if (lstat(pg_wal, &st) < 0)
82  fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
83  progname, pg_wal, strerror(errno));
84  else if (S_ISLNK(st.st_mode))
85  xlog_is_symlink = true;
86  }
87 #else
88  if (pgwin32_is_junction(pg_wal))
89  xlog_is_symlink = true;
90 #endif
91 
92  /*
93  * If possible, hint to the kernel that we're soon going to fsync the data
94  * directory and its contents.
95  */
96 #ifdef PG_FLUSH_DATA_WORKS
97  walkdir(pg_data, pre_sync_fname, false, progname);
98  if (xlog_is_symlink)
99  walkdir(pg_wal, pre_sync_fname, false, progname);
100  walkdir(pg_tblspc, pre_sync_fname, true, progname);
101 #endif
102 
103  /*
104  * Now we do the fsync()s in the same order.
105  *
106  * The main call ignores symlinks, so in addition to specially processing
107  * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
108  * process_symlinks = true. Note that if there are any plain directories
109  * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
110  * so we don't worry about optimizing it.
111  */
112  walkdir(pg_data, fsync_fname, false, progname);
113  if (xlog_is_symlink)
114  walkdir(pg_wal, fsync_fname, false, progname);
115  walkdir(pg_tblspc, fsync_fname, true, progname);
116 }
117 
118 /*
119  * walkdir: recursively walk a directory, applying the action to each
120  * regular file and directory (including the named directory itself).
121  *
122  * If process_symlinks is true, the action and recursion are also applied
123  * to regular files and directories that are pointed to by symlinks in the
124  * given directory; otherwise symlinks are ignored. Symlinks are always
125  * ignored in subdirectories, ie we intentionally don't pass down the
126  * process_symlinks flag to recursive calls.
127  *
128  * Errors are reported but not considered fatal.
129  *
130  * See also walkdir in fd.c, which is a backend version of this logic.
131  */
132 static void
133 walkdir(const char *path,
134  int (*action) (const char *fname, bool isdir, const char *progname),
135  bool process_symlinks, const char *progname)
136 {
137  DIR *dir;
138  struct dirent *de;
139 
140  dir = opendir(path);
141  if (dir == NULL)
142  {
143  fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
144  progname, path, strerror(errno));
145  return;
146  }
147 
148  while (errno = 0, (de = readdir(dir)) != NULL)
149  {
150  char subpath[MAXPGPATH];
151  struct stat fst;
152  int sret;
153 
154  if (strcmp(de->d_name, ".") == 0 ||
155  strcmp(de->d_name, "..") == 0)
156  continue;
157 
158  snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name);
159 
160  if (process_symlinks)
161  sret = stat(subpath, &fst);
162  else
163  sret = lstat(subpath, &fst);
164 
165  if (sret < 0)
166  {
167  fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
168  progname, subpath, strerror(errno));
169  continue;
170  }
171 
172  if (S_ISREG(fst.st_mode))
173  (*action) (subpath, false, progname);
174  else if (S_ISDIR(fst.st_mode))
175  walkdir(subpath, action, false, progname);
176  }
177 
178  if (errno)
179  fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
180  progname, path, strerror(errno));
181 
182  (void) closedir(dir);
183 
184  /*
185  * It's important to fsync the destination directory itself as individual
186  * file fsyncs don't guarantee that the directory entry for the file is
187  * synced. Recent versions of ext4 have made the window much wider but
188  * it's been an issue for ext3 and other filesystems in the past.
189  */
190  (*action) (path, true, progname);
191 }
192 
193 /*
194  * Hint to the OS that it should get ready to fsync() this file.
195  *
196  * Ignores errors trying to open unreadable files, and reports other errors
197  * non-fatally.
198  */
199 #ifdef PG_FLUSH_DATA_WORKS
200 
201 static int
202 pre_sync_fname(const char *fname, bool isdir, const char *progname)
203 {
204  int fd;
205 
206  fd = open(fname, O_RDONLY | PG_BINARY);
207 
208  if (fd < 0)
209  {
210  if (errno == EACCES || (isdir && errno == EISDIR))
211  return 0;
212  fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
213  progname, fname, strerror(errno));
214  return -1;
215  }
216 
217  /*
218  * We do what pg_flush_data() would do in the backend: prefer to use
219  * sync_file_range, but fall back to posix_fadvise. We ignore errors
220  * because this is only a hint.
221  */
222 #if defined(HAVE_SYNC_FILE_RANGE)
223  (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
224 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
225  (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
226 #else
227 #error PG_FLUSH_DATA_WORKS should not have been defined
228 #endif
229 
230  (void) close(fd);
231  return 0;
232 }
233 
234 #endif /* PG_FLUSH_DATA_WORKS */
235 
236 /*
237  * fsync_fname -- Try to fsync a file or directory
238  *
239  * Ignores errors trying to open unreadable files, or trying to fsync
240  * directories on systems where that isn't allowed/required. Reports
241  * other errors non-fatally.
242  */
243 int
244 fsync_fname(const char *fname, bool isdir, const char *progname)
245 {
246  int fd;
247  int flags;
248  int returncode;
249 
250  /*
251  * Some OSs require directories to be opened read-only whereas other
252  * systems don't allow us to fsync files opened read-only; so we need both
253  * cases here. Using O_RDWR will cause us to fail to fsync files that are
254  * not writable by our userid, but we assume that's OK.
255  */
256  flags = PG_BINARY;
257  if (!isdir)
258  flags |= O_RDWR;
259  else
260  flags |= O_RDONLY;
261 
262  /*
263  * Open the file, silently ignoring errors about unreadable files (or
264  * unsupported operations, e.g. opening a directory under Windows), and
265  * logging others.
266  */
267  fd = open(fname, flags);
268  if (fd < 0)
269  {
270  if (errno == EACCES || (isdir && errno == EISDIR))
271  return 0;
272  fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
273  progname, fname, strerror(errno));
274  return -1;
275  }
276 
277  returncode = fsync(fd);
278 
279  /*
280  * Some OSes don't allow us to fsync directories at all, so we can ignore
281  * those errors. Anything else needs to be reported.
282  */
283  if (returncode != 0 && !(isdir && errno == EBADF))
284  {
285  fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
286  progname, fname, strerror(errno));
287  (void) close(fd);
288  return -1;
289  }
290 
291  (void) close(fd);
292  return 0;
293 }
294 
295 /*
296  * fsync_parent_path -- fsync the parent path of a file or directory
297  *
298  * This is aimed at making file operations persistent on disk in case of
299  * an OS crash or power failure.
300  */
301 int
302 fsync_parent_path(const char *fname, const char *progname)
303 {
304  char parentpath[MAXPGPATH];
305 
306  strlcpy(parentpath, fname, MAXPGPATH);
307  get_parent_directory(parentpath);
308 
309  /*
310  * get_parent_directory() returns an empty string if the input argument is
311  * just a file name (see comments in path.c), so handle that as being the
312  * current directory.
313  */
314  if (strlen(parentpath) == 0)
315  strlcpy(parentpath, ".", MAXPGPATH);
316 
317  if (fsync_fname(parentpath, true, progname) != 0)
318  return -1;
319 
320  return 0;
321 }
322 
323 /*
324  * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
325  *
326  * Wrapper around rename, similar to the backend version.
327  */
328 int
329 durable_rename(const char *oldfile, const char *newfile, const char *progname)
330 {
331  int fd;
332 
333  /*
334  * First fsync the old and target path (if it exists), to ensure that they
335  * are properly persistent on disk. Syncing the target file is not
336  * strictly necessary, but it makes it easier to reason about crashes;
337  * because it's then guaranteed that either source or target file exists
338  * after a crash.
339  */
340  if (fsync_fname(oldfile, false, progname) != 0)
341  return -1;
342 
343  fd = open(newfile, PG_BINARY | O_RDWR, 0);
344  if (fd < 0)
345  {
346  if (errno != ENOENT)
347  {
348  fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
349  progname, newfile, strerror(errno));
350  return -1;
351  }
352  }
353  else
354  {
355  if (fsync(fd) != 0)
356  {
357  fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
358  progname, newfile, strerror(errno));
359  close(fd);
360  return -1;
361  }
362  close(fd);
363  }
364 
365  /* Time to do the real deal... */
366  if (rename(oldfile, newfile) != 0)
367  {
368  fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"),
369  progname, oldfile, newfile, strerror(errno));
370  return -1;
371  }
372 
373  /*
374  * To guarantee renaming the file is persistent, fsync the file with its
375  * new name, and its containing directory.
376  */
377  if (fsync_fname(newfile, false, progname) != 0)
378  return -1;
379 
380  if (fsync_parent_path(newfile, progname) != 0)
381  return -1;
382 
383  return 0;
384 }
void fsync_pgdata(const char *pg_data, const char *progname, int serverVersion)
Definition: file_utils.c:58
#define MINIMUM_VERSION_FOR_PG_WAL
Definition: file_utils.c:35
static void walkdir(const char *path, int(*action)(const char *fname, bool isdir, const char *progname), bool process_symlinks, const char *progname)
Definition: file_utils.c:133
int closedir(DIR *)
Definition: dirent.c:113
int snprintf(char *str, size_t count, const char *fmt,...) pg_attribute_printf(3
const char * progname
Definition: pg_standby.c:37
Definition: dirent.h:9
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1038
Definition: dirent.c:25
#define MAXPGPATH
DIR * opendir(const char *)
Definition: dirent.c:33
void get_parent_directory(char *path)
Definition: path.c:854
static char * pg_data
Definition: initdb.c:120
#define fsync(fd)
Definition: win32.h:70
int durable_rename(const char *oldfile, const char *newfile, const char *progname)
Definition: file_utils.c:329
int fsync_fname(const char *fname, bool isdir, const char *progname)
Definition: file_utils.c:244
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
#define NULL
Definition: c.h:226
int fsync_parent_path(const char *fname, const char *progname)
Definition: file_utils.c:302
struct dirent * readdir(DIR *)
Definition: dirent.c:78
const char * strerror(int errnum)
Definition: strerror.c:19
char d_name[MAX_PATH]
Definition: dirent.h:14
#define close(a)
Definition: win32.h:17
#define lstat(path, sb)
Definition: win32.h:272
#define _(x)
Definition: elog.c:84
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:234