PostgreSQL Source Code  git master
file_utils.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * File-processing utility routines.
4  *
5  * Assorted utility functions to work on files.
6  *
7  *
8  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
9  * Portions Copyright (c) 1994, Regents of the University of California
10  *
11  * src/common/file_utils.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #ifndef FRONTEND
17 #include "postgres.h"
18 #else
19 #include "postgres_fe.h"
20 #endif
21 
22 #include <dirent.h>
23 #include <fcntl.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 
27 #include "common/file_utils.h"
28 #ifdef FRONTEND
29 #include "common/logging.h"
30 #endif
31 
32 #ifdef FRONTEND
33 
34 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
35 #if defined(HAVE_SYNC_FILE_RANGE)
36 #define PG_FLUSH_DATA_WORKS 1
37 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
38 #define PG_FLUSH_DATA_WORKS 1
39 #endif
40 
41 /*
42  * pg_xlog has been renamed to pg_wal in version 10.
43  */
44 #define MINIMUM_VERSION_FOR_PG_WAL 100000
45 
46 #ifdef PG_FLUSH_DATA_WORKS
47 static int pre_sync_fname(const char *fname, bool isdir);
48 #endif
49 static void walkdir(const char *path,
50  int (*action) (const char *fname, bool isdir),
51  bool process_symlinks);
52 
53 /*
54  * Issue fsync recursively on PGDATA and all its contents.
55  *
56  * We fsync regular files and directories wherever they are, but we follow
57  * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
58  * Other symlinks are presumed to point at files we're not responsible for
59  * fsyncing, and might not have privileges to write at all.
60  *
61  * serverVersion indicates the version of the server to be fsync'd.
62  */
63 void
64 fsync_pgdata(const char *pg_data,
65  int serverVersion)
66 {
67  bool xlog_is_symlink;
68  char pg_wal[MAXPGPATH];
69  char pg_tblspc[MAXPGPATH];
70 
71  /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
72  snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
73  serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
74  snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
75 
76  /*
77  * If pg_wal is a symlink, we'll need to recurse into it separately,
78  * because the first walkdir below will ignore it.
79  */
80  xlog_is_symlink = false;
81 
82 #ifndef WIN32
83  {
84  struct stat st;
85 
86  if (lstat(pg_wal, &st) < 0)
87  pg_log_error("could not stat file \"%s\": %m", pg_wal);
88  else if (S_ISLNK(st.st_mode))
89  xlog_is_symlink = true;
90  }
91 #else
92  if (pgwin32_is_junction(pg_wal))
93  xlog_is_symlink = true;
94 #endif
95 
96  /*
97  * If possible, hint to the kernel that we're soon going to fsync the data
98  * directory and its contents.
99  */
100 #ifdef PG_FLUSH_DATA_WORKS
101  walkdir(pg_data, pre_sync_fname, false);
102  if (xlog_is_symlink)
103  walkdir(pg_wal, pre_sync_fname, false);
104  walkdir(pg_tblspc, pre_sync_fname, true);
105 #endif
106 
107  /*
108  * Now we do the fsync()s in the same order.
109  *
110  * The main call ignores symlinks, so in addition to specially processing
111  * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
112  * process_symlinks = true. Note that if there are any plain directories
113  * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
114  * so we don't worry about optimizing it.
115  */
116  walkdir(pg_data, fsync_fname, false);
117  if (xlog_is_symlink)
118  walkdir(pg_wal, fsync_fname, false);
119  walkdir(pg_tblspc, fsync_fname, true);
120 }
121 
122 /*
123  * Issue fsync recursively on the given directory and all its contents.
124  *
125  * This is a convenient wrapper on top of walkdir().
126  */
127 void
128 fsync_dir_recurse(const char *dir)
129 {
130  /*
131  * If possible, hint to the kernel that we're soon going to fsync the data
132  * directory and its contents.
133  */
134 #ifdef PG_FLUSH_DATA_WORKS
135  walkdir(dir, pre_sync_fname, false);
136 #endif
137 
138  walkdir(dir, fsync_fname, false);
139 }
140 
141 /*
142  * walkdir: recursively walk a directory, applying the action to each
143  * regular file and directory (including the named directory itself).
144  *
145  * If process_symlinks is true, the action and recursion are also applied
146  * to regular files and directories that are pointed to by symlinks in the
147  * given directory; otherwise symlinks are ignored. Symlinks are always
148  * ignored in subdirectories, ie we intentionally don't pass down the
149  * process_symlinks flag to recursive calls.
150  *
151  * Errors are reported but not considered fatal.
152  *
153  * See also walkdir in fd.c, which is a backend version of this logic.
154  */
155 static void
156 walkdir(const char *path,
157  int (*action) (const char *fname, bool isdir),
158  bool process_symlinks)
159 {
160  DIR *dir;
161  struct dirent *de;
162 
163  dir = opendir(path);
164  if (dir == NULL)
165  {
166  pg_log_error("could not open directory \"%s\": %m", path);
167  return;
168  }
169 
170  while (errno = 0, (de = readdir(dir)) != NULL)
171  {
172  char subpath[MAXPGPATH * 2];
173 
174  if (strcmp(de->d_name, ".") == 0 ||
175  strcmp(de->d_name, "..") == 0)
176  continue;
177 
178  snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
179 
180  switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR))
181  {
182  case PGFILETYPE_REG:
183  (*action) (subpath, false);
184  break;
185  case PGFILETYPE_DIR:
186  walkdir(subpath, action, false);
187  break;
188  default:
189 
190  /*
191  * Errors are already reported directly by get_dirent_type(),
192  * and any remaining symlinks and unknown file types are
193  * ignored.
194  */
195  break;
196  }
197  }
198 
199  if (errno)
200  pg_log_error("could not read directory \"%s\": %m", path);
201 
202  (void) closedir(dir);
203 
204  /*
205  * It's important to fsync the destination directory itself as individual
206  * file fsyncs don't guarantee that the directory entry for the file is
207  * synced. Recent versions of ext4 have made the window much wider but
208  * it's been an issue for ext3 and other filesystems in the past.
209  */
210  (*action) (path, true);
211 }
212 
213 /*
214  * Hint to the OS that it should get ready to fsync() this file.
215  *
216  * Ignores errors trying to open unreadable files, and reports other errors
217  * non-fatally.
218  */
219 #ifdef PG_FLUSH_DATA_WORKS
220 
221 static int
222 pre_sync_fname(const char *fname, bool isdir)
223 {
224  int fd;
225 
226  fd = open(fname, O_RDONLY | PG_BINARY, 0);
227 
228  if (fd < 0)
229  {
230  if (errno == EACCES || (isdir && errno == EISDIR))
231  return 0;
232  pg_log_error("could not open file \"%s\": %m", fname);
233  return -1;
234  }
235 
236  /*
237  * We do what pg_flush_data() would do in the backend: prefer to use
238  * sync_file_range, but fall back to posix_fadvise. We ignore errors
239  * because this is only a hint.
240  */
241 #if defined(HAVE_SYNC_FILE_RANGE)
242  (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
243 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
244  (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
245 #else
246 #error PG_FLUSH_DATA_WORKS should not have been defined
247 #endif
248 
249  (void) close(fd);
250  return 0;
251 }
252 
253 #endif /* PG_FLUSH_DATA_WORKS */
254 
255 /*
256  * fsync_fname -- Try to fsync a file or directory
257  *
258  * Ignores errors trying to open unreadable files, or trying to fsync
259  * directories on systems where that isn't allowed/required. All other errors
260  * are fatal.
261  */
262 int
263 fsync_fname(const char *fname, bool isdir)
264 {
265  int fd;
266  int flags;
267  int returncode;
268 
269  /*
270  * Some OSs require directories to be opened read-only whereas other
271  * systems don't allow us to fsync files opened read-only; so we need both
272  * cases here. Using O_RDWR will cause us to fail to fsync files that are
273  * not writable by our userid, but we assume that's OK.
274  */
275  flags = PG_BINARY;
276  if (!isdir)
277  flags |= O_RDWR;
278  else
279  flags |= O_RDONLY;
280 
281  /*
282  * Open the file, silently ignoring errors about unreadable files (or
283  * unsupported operations, e.g. opening a directory under Windows), and
284  * logging others.
285  */
286  fd = open(fname, flags, 0);
287  if (fd < 0)
288  {
289  if (errno == EACCES || (isdir && errno == EISDIR))
290  return 0;
291  pg_log_error("could not open file \"%s\": %m", fname);
292  return -1;
293  }
294 
295  returncode = fsync(fd);
296 
297  /*
298  * Some OSes don't allow us to fsync directories at all, so we can ignore
299  * those errors. Anything else needs to be reported.
300  */
301  if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
302  {
303  pg_log_fatal("could not fsync file \"%s\": %m", fname);
304  (void) close(fd);
305  exit(EXIT_FAILURE);
306  }
307 
308  (void) close(fd);
309  return 0;
310 }
311 
312 /*
313  * fsync_parent_path -- fsync the parent path of a file or directory
314  *
315  * This is aimed at making file operations persistent on disk in case of
316  * an OS crash or power failure.
317  */
318 int
319 fsync_parent_path(const char *fname)
320 {
321  char parentpath[MAXPGPATH];
322 
323  strlcpy(parentpath, fname, MAXPGPATH);
324  get_parent_directory(parentpath);
325 
326  /*
327  * get_parent_directory() returns an empty string if the input argument is
328  * just a file name (see comments in path.c), so handle that as being the
329  * current directory.
330  */
331  if (strlen(parentpath) == 0)
332  strlcpy(parentpath, ".", MAXPGPATH);
333 
334  if (fsync_fname(parentpath, true) != 0)
335  return -1;
336 
337  return 0;
338 }
339 
340 /*
341  * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
342  *
343  * Wrapper around rename, similar to the backend version.
344  */
345 int
346 durable_rename(const char *oldfile, const char *newfile)
347 {
348  int fd;
349 
350  /*
351  * First fsync the old and target path (if it exists), to ensure that they
352  * are properly persistent on disk. Syncing the target file is not
353  * strictly necessary, but it makes it easier to reason about crashes;
354  * because it's then guaranteed that either source or target file exists
355  * after a crash.
356  */
357  if (fsync_fname(oldfile, false) != 0)
358  return -1;
359 
360  fd = open(newfile, PG_BINARY | O_RDWR, 0);
361  if (fd < 0)
362  {
363  if (errno != ENOENT)
364  {
365  pg_log_error("could not open file \"%s\": %m", newfile);
366  return -1;
367  }
368  }
369  else
370  {
371  if (fsync(fd) != 0)
372  {
373  pg_log_fatal("could not fsync file \"%s\": %m", newfile);
374  close(fd);
375  exit(EXIT_FAILURE);
376  }
377  close(fd);
378  }
379 
380  /* Time to do the real deal... */
381  if (rename(oldfile, newfile) != 0)
382  {
383  pg_log_error("could not rename file \"%s\" to \"%s\": %m",
384  oldfile, newfile);
385  return -1;
386  }
387 
388  /*
389  * To guarantee renaming the file is persistent, fsync the file with its
390  * new name, and its containing directory.
391  */
392  if (fsync_fname(newfile, false) != 0)
393  return -1;
394 
395  if (fsync_parent_path(newfile) != 0)
396  return -1;
397 
398  return 0;
399 }
400 
401 #endif /* FRONTEND */
402 
403 /*
404  * Return the type of a directory entry.
405  *
406  * In frontend code, elevel should be a level from logging.h; in backend code
407  * it should be a level from elog.h.
408  */
410 get_dirent_type(const char *path,
411  const struct dirent *de,
412  bool look_through_symlinks,
413  int elevel)
414 {
415  PGFileType result;
416 
417  /*
418  * Some systems tell us the type directly in the dirent struct, but that's
419  * a BSD and Linux extension not required by POSIX. Even when the
420  * interface is present, sometimes the type is unknown, depending on the
421  * filesystem.
422  */
423 #if defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK)
424  if (de->d_type == DT_REG)
425  result = PGFILETYPE_REG;
426  else if (de->d_type == DT_DIR)
427  result = PGFILETYPE_DIR;
428  else if (de->d_type == DT_LNK && !look_through_symlinks)
429  result = PGFILETYPE_LNK;
430  else
431  result = PGFILETYPE_UNKNOWN;
432 #else
433  result = PGFILETYPE_UNKNOWN;
434 #endif
435 
436  if (result == PGFILETYPE_UNKNOWN)
437  {
438  struct stat fst;
439  int sret;
440 
441 
442  if (look_through_symlinks)
443  sret = stat(path, &fst);
444  else
445  sret = lstat(path, &fst);
446 
447  if (sret < 0)
448  {
449  result = PGFILETYPE_ERROR;
450 #ifdef FRONTEND
451  pg_log_generic(elevel, "could not stat file \"%s\": %m", path);
452 #else
453  ereport(elevel,
455  errmsg("could not stat file \"%s\": %m", path)));
456 #endif
457  }
458  else if (S_ISREG(fst.st_mode))
459  result = PGFILETYPE_REG;
460  else if (S_ISDIR(fst.st_mode))
461  result = PGFILETYPE_DIR;
462 #ifdef S_ISLNK
463  else if (S_ISLNK(fst.st_mode))
464  result = PGFILETYPE_LNK;
465 #endif
466  }
467 
468  return result;
469 }
#define pg_log_error(...)
Definition: logging.h:80
static void walkdir(const char *path, void(*action)(const char *fname, bool isdir, int elevel), bool process_symlinks, int elevel)
Definition: fd.c:3331
#define DT_DIR
Definition: dirent.h:28
void fsync_fname(const char *fname, bool isdir)
Definition: fd.c:633
PGFileType
Definition: file_utils.h:18
int closedir(DIR *)
Definition: dirent.c:123
Definition: dirent.h:9
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1213
#define fsync(fd)
Definition: win32_port.h:68
Definition: dirent.c:25
#define MAXPGPATH
#define MINIMUM_VERSION_FOR_PG_WAL
DIR * opendir(const char *)
Definition: dirent.c:33
#define DT_LNK
Definition: dirent.h:31
int errcode_for_file_access(void)
Definition: elog.c:633
void get_parent_directory(char *path)
Definition: path.c:854
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:410
static char * pg_data
Definition: initdb.c:124
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:659
#define S_ISREG(m)
Definition: win32_port.h:319
static int elevel
Definition: vacuumlazy.c:333
unsigned char d_type
Definition: dirent.h:13
unsigned short st_mode
Definition: win32_port.h:260
#define ereport(elevel,...)
Definition: elog.h:144
void pg_log_generic(enum pg_log_level level, const char *pg_restrict fmt,...)
Definition: logging.c:197
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
struct dirent * readdir(DIR *)
Definition: dirent.c:78
#define S_ISDIR(m)
Definition: win32_port.h:316
#define lstat(path, sb)
Definition: win32_port.h:276
int errmsg(const char *fmt,...)
Definition: elog.c:821
#define EXIT_FAILURE
Definition: settings.h:161
#define DT_REG
Definition: dirent.h:30
char d_name[MAX_PATH]
Definition: dirent.h:15
#define close(a)
Definition: win32.h:12
#define snprintf
Definition: port.h:215
static int fsync_parent_path(const char *fname, int elevel)
Definition: fd.c:3540
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:241
#define stat
Definition: win32_port.h:275
bool pgwin32_is_junction(const char *path)
#define pg_log_fatal(...)
Definition: logging.h:76