PostgreSQL Source Code  git master
pg_test_fsync.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_test_fsync --- tests all supported fsync() methods
4  *
5  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6  *
7  * src/bin/pg_test_fsync/pg_test_fsync.c
8  *
9  *-------------------------------------------------------------------------
10  */
11 
12 #include "postgres_fe.h"
13 
14 #include <limits.h>
15 #include <sys/stat.h>
16 #include <sys/time.h>
17 #include <fcntl.h>
18 #include <time.h>
19 #include <unistd.h>
20 #include <signal.h>
21 
22 #include "common/logging.h"
23 #include "common/pg_prng.h"
24 #include "getopt_long.h"
25 
26 /*
27  * put the temp files in the local directory
28  * unless the user specifies otherwise
29  */
30 #define FSYNC_FILENAME "./pg_test_fsync.out"
31 
32 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
33 
34 #define LABEL_FORMAT " %-30s"
35 #define NA_FORMAT "%21s\n"
36 /* translator: maintain alignment with NA_FORMAT */
37 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
38 #define USECS_SEC 1000000
39 
40 /* These are macros to avoid timing the function call overhead. */
41 #ifndef WIN32
42 #define START_TIMER \
43 do { \
44  alarm_triggered = false; \
45  alarm(secs_per_test); \
46  gettimeofday(&start_t, NULL); \
47 } while (0)
48 #else
49 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
50 #define START_TIMER \
51 do { \
52  alarm_triggered = false; \
53  if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
54  INVALID_HANDLE_VALUE) \
55  pg_fatal("could not create thread for alarm"); \
56  gettimeofday(&start_t, NULL); \
57 } while (0)
58 #endif
59 
60 #define STOP_TIMER \
61 do { \
62  gettimeofday(&stop_t, NULL); \
63  print_elapse(start_t, stop_t, ops); \
64 } while (0)
65 
66 
67 static const char *progname;
68 
69 static unsigned int secs_per_test = 5;
70 static int needs_unlink = 0;
72  *buf,
74 static struct timeval start_t,
75  stop_t;
76 static sig_atomic_t alarm_triggered = false;
77 
78 
79 static void handle_args(int argc, char *argv[]);
80 static void prepare_buf(void);
81 static void test_open(void);
82 static void test_non_sync(void);
83 static void test_sync(int writes_per_op);
84 static void test_open_syncs(void);
85 static void test_open_sync(const char *msg, int writes_size);
86 static void test_file_descriptor_sync(void);
87 
88 #ifndef WIN32
89 static void process_alarm(SIGNAL_ARGS);
90 #else
91 static DWORD WINAPI process_alarm(LPVOID param);
92 #endif
93 static void signal_cleanup(SIGNAL_ARGS);
94 
95 #ifdef HAVE_FSYNC_WRITETHROUGH
96 static int pg_fsync_writethrough(int fd);
97 #endif
98 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
99 
100 #define die(msg) pg_fatal("%s: %m", _(msg))
101 
102 
103 int
104 main(int argc, char *argv[])
105 {
106  pg_logging_init(argv[0]);
107  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
108  progname = get_progname(argv[0]);
109 
110  handle_args(argc, argv);
111 
112  /* Prevent leaving behind the test file */
113  pqsignal(SIGINT, signal_cleanup);
114  pqsignal(SIGTERM, signal_cleanup);
115 #ifndef WIN32
117 #endif
118 #ifdef SIGHUP
119  /* Not defined on win32 */
121 #endif
122 
123  pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
124 
125  prepare_buf();
126 
127  test_open();
128 
129  /* Test using 1 XLOG_BLCKSZ write */
130  test_sync(1);
131 
132  /* Test using 2 XLOG_BLCKSZ writes */
133  test_sync(2);
134 
135  test_open_syncs();
136 
138 
139  test_non_sync();
140 
141  unlink(filename);
142 
143  return 0;
144 }
145 
146 static void
147 handle_args(int argc, char *argv[])
148 {
149  static struct option long_options[] = {
150  {"filename", required_argument, NULL, 'f'},
151  {"secs-per-test", required_argument, NULL, 's'},
152  {NULL, 0, NULL, 0}
153  };
154 
155  int option; /* Command line option */
156  int optindex = 0; /* used by getopt_long */
157  unsigned long optval; /* used for option parsing */
158  char *endptr;
159 
160  if (argc > 1)
161  {
162  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
163  {
164  printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
165  exit(0);
166  }
167  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
168  {
169  puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
170  exit(0);
171  }
172  }
173 
174  while ((option = getopt_long(argc, argv, "f:s:",
175  long_options, &optindex)) != -1)
176  {
177  switch (option)
178  {
179  case 'f':
181  break;
182 
183  case 's':
184  errno = 0;
185  optval = strtoul(optarg, &endptr, 10);
186 
187  if (endptr == optarg || *endptr != '\0' ||
188  errno != 0 || optval != (unsigned int) optval)
189  {
190  pg_log_error("invalid argument for option %s", "--secs-per-test");
191  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
192  exit(1);
193  }
194 
195  secs_per_test = (unsigned int) optval;
196  if (secs_per_test == 0)
197  pg_fatal("%s must be in range %u..%u",
198  "--secs-per-test", 1, UINT_MAX);
199  break;
200 
201  default:
202  /* getopt_long already emitted a complaint */
203  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
204  exit(1);
205  }
206  }
207 
208  if (argc > optind)
209  {
210  pg_log_error("too many command-line arguments (first is \"%s\")",
211  argv[optind]);
212  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
213  exit(1);
214  }
215 
216  printf(ngettext("%u second per test\n",
217  "%u seconds per test\n",
218  secs_per_test),
219  secs_per_test);
220 #if defined(O_DIRECT)
221  printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
222 #elif defined(F_NOCACHE)
223  printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
224 #else
225  printf(_("Direct I/O is not supported on this platform.\n"));
226 #endif
227 }
228 
229 static void
231 {
232  int ops;
233 
234  /* write random data into buffer */
235  for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
237 
238  buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
239 }
240 
241 static void
243 {
244  int tmpfile;
245 
246  /*
247  * test if we can open the target file
248  */
249  if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
250  die("could not open output file");
251  needs_unlink = 1;
252  if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
254  die("write failed");
255 
256  /* fsync now so that dirty buffers don't skew later tests */
257  if (fsync(tmpfile) != 0)
258  die("fsync failed");
259 
260  close(tmpfile);
261 }
262 
263 static int
264 open_direct(const char *path, int flags, mode_t mode)
265 {
266  int fd;
267 
268 #ifdef O_DIRECT
269  flags |= O_DIRECT;
270 #endif
271 
272  fd = open(path, flags, mode);
273 
274 #if !defined(O_DIRECT) && defined(F_NOCACHE)
275  if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
276  {
277  int save_errno = errno;
278 
279  close(fd);
280  errno = save_errno;
281  return -1;
282  }
283 #endif
284 
285  return fd;
286 }
287 
288 static void
289 test_sync(int writes_per_op)
290 {
291  int tmpfile,
292  ops,
293  writes;
294  bool fs_warning = false;
295 
296  if (writes_per_op == 1)
297  printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
298  else
299  printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
300  printf(_("(in \"wal_sync_method\" preference order, except fdatasync is Linux's default)\n"));
301 
302  /*
303  * Test open_datasync if available
304  */
305  printf(LABEL_FORMAT, "open_datasync");
306  fflush(stdout);
307 
308 #ifdef O_DSYNC
309  if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
310  {
311  printf(NA_FORMAT, _("n/a*"));
312  fs_warning = true;
313  }
314  else
315  {
316  START_TIMER;
317  for (ops = 0; alarm_triggered == false; ops++)
318  {
319  for (writes = 0; writes < writes_per_op; writes++)
320  if (pg_pwrite(tmpfile,
321  buf,
322  XLOG_BLCKSZ,
323  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
324  die("write failed");
325  }
326  STOP_TIMER;
327  close(tmpfile);
328  }
329 #else
330  printf(NA_FORMAT, _("n/a"));
331 #endif
332 
333 /*
334  * Test fdatasync if available
335  */
336  printf(LABEL_FORMAT, "fdatasync");
337  fflush(stdout);
338 
339  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
340  die("could not open output file");
341  START_TIMER;
342  for (ops = 0; alarm_triggered == false; ops++)
343  {
344  for (writes = 0; writes < writes_per_op; writes++)
345  if (pg_pwrite(tmpfile,
346  buf,
347  XLOG_BLCKSZ,
348  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
349  die("write failed");
350  fdatasync(tmpfile);
351  }
352  STOP_TIMER;
353  close(tmpfile);
354 
355 /*
356  * Test fsync
357  */
358  printf(LABEL_FORMAT, "fsync");
359  fflush(stdout);
360 
361  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
362  die("could not open output file");
363  START_TIMER;
364  for (ops = 0; alarm_triggered == false; ops++)
365  {
366  for (writes = 0; writes < writes_per_op; writes++)
367  if (pg_pwrite(tmpfile,
368  buf,
369  XLOG_BLCKSZ,
370  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
371  die("write failed");
372  if (fsync(tmpfile) != 0)
373  die("fsync failed");
374  }
375  STOP_TIMER;
376  close(tmpfile);
377 
378 /*
379  * If fsync_writethrough is available, test as well
380  */
381  printf(LABEL_FORMAT, "fsync_writethrough");
382  fflush(stdout);
383 
384 #ifdef HAVE_FSYNC_WRITETHROUGH
385  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
386  die("could not open output file");
387  START_TIMER;
388  for (ops = 0; alarm_triggered == false; ops++)
389  {
390  for (writes = 0; writes < writes_per_op; writes++)
391  if (pg_pwrite(tmpfile,
392  buf,
393  XLOG_BLCKSZ,
394  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
395  die("write failed");
396  if (pg_fsync_writethrough(tmpfile) != 0)
397  die("fsync failed");
398  }
399  STOP_TIMER;
400  close(tmpfile);
401 #else
402  printf(NA_FORMAT, _("n/a"));
403 #endif
404 
405 /*
406  * Test open_sync if available
407  */
408  printf(LABEL_FORMAT, "open_sync");
409  fflush(stdout);
410 
411 #ifdef O_SYNC
412  if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
413  {
414  printf(NA_FORMAT, _("n/a*"));
415  fs_warning = true;
416  }
417  else
418  {
419  START_TIMER;
420  for (ops = 0; alarm_triggered == false; ops++)
421  {
422  for (writes = 0; writes < writes_per_op; writes++)
423  if (pg_pwrite(tmpfile,
424  buf,
425  XLOG_BLCKSZ,
426  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
427 
428  /*
429  * This can generate write failures if the filesystem has
430  * a large block size, e.g. 4k, and there is no support
431  * for O_DIRECT writes smaller than the file system block
432  * size, e.g. XFS.
433  */
434  die("write failed");
435  }
436  STOP_TIMER;
437  close(tmpfile);
438  }
439 #else
440  printf(NA_FORMAT, _("n/a"));
441 #endif
442 
443  if (fs_warning)
444  {
445  printf(_("* This file system and its mount options do not support direct\n"
446  " I/O, e.g. ext4 in journaled mode.\n"));
447  }
448 }
449 
450 static void
452 {
453  printf(_("\nCompare open_sync with different write sizes:\n"));
454  printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
455  "open_sync sizes.)\n"));
456 
457  test_open_sync(_(" 1 * 16kB open_sync write"), 16);
458  test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
459  test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
460  test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
461  test_open_sync(_("16 * 1kB open_sync writes"), 1);
462 }
463 
464 /*
465  * Test open_sync with different size files
466  */
467 static void
468 test_open_sync(const char *msg, int writes_size)
469 {
470 #ifdef O_SYNC
471  int tmpfile,
472  ops,
473  writes;
474 #endif
475 
476  printf(LABEL_FORMAT, msg);
477  fflush(stdout);
478 
479 #ifdef O_SYNC
480  if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
481  printf(NA_FORMAT, _("n/a*"));
482  else
483  {
484  START_TIMER;
485  for (ops = 0; alarm_triggered == false; ops++)
486  {
487  for (writes = 0; writes < 16 / writes_size; writes++)
488  if (pg_pwrite(tmpfile,
489  buf,
490  writes_size * 1024,
491  writes * writes_size * 1024) !=
492  writes_size * 1024)
493  die("write failed");
494  }
495  STOP_TIMER;
496  close(tmpfile);
497  }
498 #else
499  printf(NA_FORMAT, _("n/a"));
500 #endif
501 }
502 
503 static void
505 {
506  int tmpfile,
507  ops;
508 
509  /*
510  * Test whether fsync can sync data written on a different descriptor for
511  * the same file. This checks the efficiency of multi-process fsyncs
512  * against the same file. Possibly this should be done with writethrough
513  * on platforms which support it.
514  */
515  printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
516  printf(_("(If the times are similar, fsync() can sync data written on a different\n"
517  "descriptor.)\n"));
518 
519  /*
520  * first write, fsync and close, which is the normal behavior without
521  * multiple descriptors
522  */
523  printf(LABEL_FORMAT, "write, fsync, close");
524  fflush(stdout);
525 
526  START_TIMER;
527  for (ops = 0; alarm_triggered == false; ops++)
528  {
529  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
530  die("could not open output file");
531  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
532  die("write failed");
533  if (fsync(tmpfile) != 0)
534  die("fsync failed");
535  close(tmpfile);
536 
537  /*
538  * open and close the file again to be consistent with the following
539  * test
540  */
541  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
542  die("could not open output file");
543  close(tmpfile);
544  }
545  STOP_TIMER;
546 
547  /*
548  * Now open, write, close, open again and fsync This simulates processes
549  * fsyncing each other's writes.
550  */
551  printf(LABEL_FORMAT, "write, close, fsync");
552  fflush(stdout);
553 
554  START_TIMER;
555  for (ops = 0; alarm_triggered == false; ops++)
556  {
557  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
558  die("could not open output file");
559  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
560  die("write failed");
561  close(tmpfile);
562  /* reopen file */
563  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
564  die("could not open output file");
565  if (fsync(tmpfile) != 0)
566  die("fsync failed");
567  close(tmpfile);
568  }
569  STOP_TIMER;
570 }
571 
572 static void
574 {
575  int tmpfile,
576  ops;
577 
578  /*
579  * Test a simple write without fsync
580  */
581  printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
582  printf(LABEL_FORMAT, "write");
583  fflush(stdout);
584 
585  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
586  die("could not open output file");
587  START_TIMER;
588  for (ops = 0; alarm_triggered == false; ops++)
589  {
590  if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
591  die("write failed");
592  }
593  STOP_TIMER;
594  close(tmpfile);
595 }
596 
597 static void
599 {
600  int rc;
601 
602  /* Delete the file if it exists. Ignore errors */
603  if (needs_unlink)
604  unlink(filename);
605  /* Finish incomplete line on stdout */
606  rc = write(STDOUT_FILENO, "\n", 1);
607  (void) rc; /* silence compiler warnings */
608  _exit(1);
609 }
610 
611 #ifdef HAVE_FSYNC_WRITETHROUGH
612 
613 static int
615 {
616 #if defined(F_FULLFSYNC)
617  return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
618 #else
619  errno = ENOSYS;
620  return -1;
621 #endif
622 }
623 #endif
624 
625 /*
626  * print out the writes per second for tests
627  */
628 static void
629 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
630 {
631  double total_time = (stop_t.tv_sec - start_t.tv_sec) +
632  (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
633  double per_second = ops / total_time;
634  double avg_op_time_us = (total_time / ops) * USECS_SEC;
635 
636  printf(_(OPS_FORMAT), per_second, avg_op_time_us);
637 }
638 
639 #ifndef WIN32
640 static void
642 {
643  alarm_triggered = true;
644 }
645 #else
646 static DWORD WINAPI
647 process_alarm(LPVOID param)
648 {
649  /* WIN32 doesn't support alarm, so we create a thread and sleep here */
650  Sleep(secs_per_test * 1000);
651  alarm_triggered = true;
652  ExitThread(0);
653 }
654 #endif
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:783
#define ngettext(s, p, n)
Definition: c.h:1160
#define SIGNAL_ARGS
Definition: c.h:1324
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1193
#define PG_BINARY
Definition: c.h:1252
int fdatasync(int fildes)
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:429
#define _(x)
Definition: elog.c:90
int pg_fsync_writethrough(int fd)
Definition: fd.c:460
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define required_argument
Definition: getopt_long.h:25
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
static void const char fflush(stdout)
exit(1)
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_fatal(...)
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define DEFAULT_XLOG_SEG_SIZE
PGDLLIMPORT int optind
Definition: getopt.c:51
PGDLLIMPORT char * optarg
Definition: getopt.c:53
int32 pg_prng_int32(pg_prng_state *state)
Definition: pg_prng.c:243
void pg_prng_seed(pg_prng_state *state, uint64 seed)
Definition: pg_prng.c:89
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
static void test_file_descriptor_sync(void)
int main(int argc, char *argv[])
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
static void prepare_buf(void)
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
#define NA_FORMAT
Definition: pg_test_fsync.c:35
static void test_open_syncs(void)
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:30
static char * filename
Definition: pg_test_fsync.c:73
static void handle_args(int argc, char *argv[])
static int open_direct(const char *path, int flags, mode_t mode)
#define START_TIMER
Definition: pg_test_fsync.c:42
#define STOP_TIMER
Definition: pg_test_fsync.c:60
#define LABEL_FORMAT
Definition: pg_test_fsync.c:34
static int needs_unlink
Definition: pg_test_fsync.c:70
static void test_sync(int writes_per_op)
static void test_open(void)
static void test_non_sync(void)
#define die(msg)
static char * buf
Definition: pg_test_fsync.c:72
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:74
static void test_open_sync(const char *msg, int writes_size)
static const char * progname
Definition: pg_test_fsync.c:67
static void process_alarm(SIGNAL_ARGS)
#define USECS_SEC
Definition: pg_test_fsync.c:38
static sig_atomic_t alarm_triggered
Definition: pg_test_fsync.c:76
#define OPS_FORMAT
Definition: pg_test_fsync.c:37
static unsigned int secs_per_test
Definition: pg_test_fsync.c:69
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:32
static void signal_cleanup(SIGNAL_ARGS)
#define pg_pwrite
Definition: port.h:226
const char * get_progname(const char *argv0)
Definition: path.c:575
pqsigfunc pqsignal(int signo, pqsigfunc func)
#define printf(...)
Definition: port.h:244
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define STDOUT_FILENO
Definition: unistd.h:8
#define fsync(fd)
Definition: win32_port.h:85
#define SIGHUP
Definition: win32_port.h:168
#define S_IRUSR
Definition: win32_port.h:289
#define SIGALRM
Definition: win32_port.h:174
#define S_IWUSR
Definition: win32_port.h:292
#define O_DSYNC
Definition: win32_port.h:352