PostgreSQL Source Code  git master
pg_test_fsync.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_test_fsync --- tests all supported fsync() methods
4  *
5  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6  *
7  * src/bin/pg_test_fsync/pg_test_fsync.c
8  *
9  *-------------------------------------------------------------------------
10  */
11 
12 #include "postgres_fe.h"
13 
14 #include <limits.h>
15 #include <sys/stat.h>
16 #include <sys/time.h>
17 #include <fcntl.h>
18 #include <time.h>
19 #include <unistd.h>
20 #include <signal.h>
21 
22 #include "access/xlogdefs.h"
23 #include "common/logging.h"
24 #include "common/pg_prng.h"
25 #include "getopt_long.h"
26 
27 /*
28  * put the temp files in the local directory
29  * unless the user specifies otherwise
30  */
31 #define FSYNC_FILENAME "./pg_test_fsync.out"
32 
33 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
34 
35 #define LABEL_FORMAT " %-30s"
36 #define NA_FORMAT "%21s\n"
37 /* translator: maintain alignment with NA_FORMAT */
38 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
39 #define USECS_SEC 1000000
40 
41 /* These are macros to avoid timing the function call overhead. */
42 #ifndef WIN32
43 #define START_TIMER \
44 do { \
45  alarm_triggered = false; \
46  alarm(secs_per_test); \
47  gettimeofday(&start_t, NULL); \
48 } while (0)
49 #else
50 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
51 #define START_TIMER \
52 do { \
53  alarm_triggered = false; \
54  if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
55  INVALID_HANDLE_VALUE) \
56  pg_fatal("could not create thread for alarm"); \
57  gettimeofday(&start_t, NULL); \
58 } while (0)
59 #endif
60 
61 #define STOP_TIMER \
62 do { \
63  gettimeofday(&stop_t, NULL); \
64  print_elapse(start_t, stop_t, ops); \
65 } while (0)
66 
67 
68 static const char *progname;
69 
70 static unsigned int secs_per_test = 5;
71 static int needs_unlink = 0;
73  *buf,
75 static struct timeval start_t,
76  stop_t;
77 static sig_atomic_t alarm_triggered = false;
78 
79 
80 static void handle_args(int argc, char *argv[]);
81 static void prepare_buf(void);
82 static void test_open(void);
83 static void test_non_sync(void);
84 static void test_sync(int writes_per_op);
85 static void test_open_syncs(void);
86 static void test_open_sync(const char *msg, int writes_size);
87 static void test_file_descriptor_sync(void);
88 
89 #ifndef WIN32
90 static void process_alarm(SIGNAL_ARGS);
91 #else
92 static DWORD WINAPI process_alarm(LPVOID param);
93 #endif
94 static void signal_cleanup(SIGNAL_ARGS);
95 
96 #ifdef HAVE_FSYNC_WRITETHROUGH
97 static int pg_fsync_writethrough(int fd);
98 #endif
99 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
100 
101 #define die(msg) pg_fatal("%s: %m", _(msg))
102 
103 
104 int
105 main(int argc, char *argv[])
106 {
107  pg_logging_init(argv[0]);
108  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
109  progname = get_progname(argv[0]);
110 
111  handle_args(argc, argv);
112 
113  /* Prevent leaving behind the test file */
114  pqsignal(SIGINT, signal_cleanup);
115  pqsignal(SIGTERM, signal_cleanup);
116 #ifndef WIN32
118 #endif
119 #ifdef SIGHUP
120  /* Not defined on win32 */
122 #endif
123 
124  pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
125 
126  prepare_buf();
127 
128  test_open();
129 
130  /* Test using 1 XLOG_BLCKSZ write */
131  test_sync(1);
132 
133  /* Test using 2 XLOG_BLCKSZ writes */
134  test_sync(2);
135 
136  test_open_syncs();
137 
139 
140  test_non_sync();
141 
142  unlink(filename);
143 
144  return 0;
145 }
146 
147 static void
148 handle_args(int argc, char *argv[])
149 {
150  static struct option long_options[] = {
151  {"filename", required_argument, NULL, 'f'},
152  {"secs-per-test", required_argument, NULL, 's'},
153  {NULL, 0, NULL, 0}
154  };
155 
156  int option; /* Command line option */
157  int optindex = 0; /* used by getopt_long */
158  unsigned long optval; /* used for option parsing */
159  char *endptr;
160 
161  if (argc > 1)
162  {
163  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
164  {
165  printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
166  exit(0);
167  }
168  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
169  {
170  puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
171  exit(0);
172  }
173  }
174 
175  while ((option = getopt_long(argc, argv, "f:s:",
176  long_options, &optindex)) != -1)
177  {
178  switch (option)
179  {
180  case 'f':
182  break;
183 
184  case 's':
185  errno = 0;
186  optval = strtoul(optarg, &endptr, 10);
187 
188  if (endptr == optarg || *endptr != '\0' ||
189  errno != 0 || optval != (unsigned int) optval)
190  {
191  pg_log_error("invalid argument for option %s", "--secs-per-test");
192  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
193  exit(1);
194  }
195 
196  secs_per_test = (unsigned int) optval;
197  if (secs_per_test == 0)
198  pg_fatal("%s must be in range %u..%u",
199  "--secs-per-test", 1, UINT_MAX);
200  break;
201 
202  default:
203  /* getopt_long already emitted a complaint */
204  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
205  exit(1);
206  }
207  }
208 
209  if (argc > optind)
210  {
211  pg_log_error("too many command-line arguments (first is \"%s\")",
212  argv[optind]);
213  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
214  exit(1);
215  }
216 
217  printf(ngettext("%u second per test\n",
218  "%u seconds per test\n",
219  secs_per_test),
220  secs_per_test);
221 #if defined(O_DIRECT)
222  printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
223 #elif defined(F_NOCACHE)
224  printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
225 #else
226  printf(_("Direct I/O is not supported on this platform.\n"));
227 #endif
228 }
229 
230 static void
232 {
233  int ops;
234 
235  /* write random data into buffer */
236  for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
238 
239  buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
240 }
241 
242 static void
244 {
245  int tmpfile;
246 
247  /*
248  * test if we can open the target file
249  */
250  if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
251  die("could not open output file");
252  needs_unlink = 1;
253  if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
255  die("write failed");
256 
257  /* fsync now so that dirty buffers don't skew later tests */
258  if (fsync(tmpfile) != 0)
259  die("fsync failed");
260 
261  close(tmpfile);
262 }
263 
264 static int
265 open_direct(const char *path, int flags, mode_t mode)
266 {
267  int fd;
268 
269 #ifdef O_DIRECT
270  flags |= O_DIRECT;
271 #endif
272 
273  fd = open(path, flags, mode);
274 
275 #if !defined(O_DIRECT) && defined(F_NOCACHE)
276  if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
277  {
278  int save_errno = errno;
279 
280  close(fd);
281  errno = save_errno;
282  return -1;
283  }
284 #endif
285 
286  return fd;
287 }
288 
289 static void
290 test_sync(int writes_per_op)
291 {
292  int tmpfile,
293  ops,
294  writes;
295  bool fs_warning = false;
296 
297  if (writes_per_op == 1)
298  printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
299  else
300  printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
301  printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
302 
303  /*
304  * Test open_datasync if available
305  */
306  printf(LABEL_FORMAT, "open_datasync");
307  fflush(stdout);
308 
309 #ifdef O_DSYNC
310  if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
311  {
312  printf(NA_FORMAT, _("n/a*"));
313  fs_warning = true;
314  }
315  else
316  {
317  START_TIMER;
318  for (ops = 0; alarm_triggered == false; ops++)
319  {
320  for (writes = 0; writes < writes_per_op; writes++)
321  if (pg_pwrite(tmpfile,
322  buf,
323  XLOG_BLCKSZ,
324  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
325  die("write failed");
326  }
327  STOP_TIMER;
328  close(tmpfile);
329  }
330 #else
331  printf(NA_FORMAT, _("n/a"));
332 #endif
333 
334 /*
335  * Test fdatasync if available
336  */
337  printf(LABEL_FORMAT, "fdatasync");
338  fflush(stdout);
339 
340  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
341  die("could not open output file");
342  START_TIMER;
343  for (ops = 0; alarm_triggered == false; ops++)
344  {
345  for (writes = 0; writes < writes_per_op; writes++)
346  if (pg_pwrite(tmpfile,
347  buf,
348  XLOG_BLCKSZ,
349  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
350  die("write failed");
351  fdatasync(tmpfile);
352  }
353  STOP_TIMER;
354  close(tmpfile);
355 
356 /*
357  * Test fsync
358  */
359  printf(LABEL_FORMAT, "fsync");
360  fflush(stdout);
361 
362  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
363  die("could not open output file");
364  START_TIMER;
365  for (ops = 0; alarm_triggered == false; ops++)
366  {
367  for (writes = 0; writes < writes_per_op; writes++)
368  if (pg_pwrite(tmpfile,
369  buf,
370  XLOG_BLCKSZ,
371  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
372  die("write failed");
373  if (fsync(tmpfile) != 0)
374  die("fsync failed");
375  }
376  STOP_TIMER;
377  close(tmpfile);
378 
379 /*
380  * If fsync_writethrough is available, test as well
381  */
382  printf(LABEL_FORMAT, "fsync_writethrough");
383  fflush(stdout);
384 
385 #ifdef HAVE_FSYNC_WRITETHROUGH
386  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
387  die("could not open output file");
388  START_TIMER;
389  for (ops = 0; alarm_triggered == false; ops++)
390  {
391  for (writes = 0; writes < writes_per_op; writes++)
392  if (pg_pwrite(tmpfile,
393  buf,
394  XLOG_BLCKSZ,
395  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
396  die("write failed");
397  if (pg_fsync_writethrough(tmpfile) != 0)
398  die("fsync failed");
399  }
400  STOP_TIMER;
401  close(tmpfile);
402 #else
403  printf(NA_FORMAT, _("n/a"));
404 #endif
405 
406 /*
407  * Test open_sync if available
408  */
409  printf(LABEL_FORMAT, "open_sync");
410  fflush(stdout);
411 
412 #ifdef O_SYNC
413  if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
414  {
415  printf(NA_FORMAT, _("n/a*"));
416  fs_warning = true;
417  }
418  else
419  {
420  START_TIMER;
421  for (ops = 0; alarm_triggered == false; ops++)
422  {
423  for (writes = 0; writes < writes_per_op; writes++)
424  if (pg_pwrite(tmpfile,
425  buf,
426  XLOG_BLCKSZ,
427  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
428 
429  /*
430  * This can generate write failures if the filesystem has
431  * a large block size, e.g. 4k, and there is no support
432  * for O_DIRECT writes smaller than the file system block
433  * size, e.g. XFS.
434  */
435  die("write failed");
436  }
437  STOP_TIMER;
438  close(tmpfile);
439  }
440 #else
441  printf(NA_FORMAT, _("n/a"));
442 #endif
443 
444  if (fs_warning)
445  {
446  printf(_("* This file system and its mount options do not support direct\n"
447  " I/O, e.g. ext4 in journaled mode.\n"));
448  }
449 }
450 
451 static void
453 {
454  printf(_("\nCompare open_sync with different write sizes:\n"));
455  printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
456  "open_sync sizes.)\n"));
457 
458  test_open_sync(_(" 1 * 16kB open_sync write"), 16);
459  test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
460  test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
461  test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
462  test_open_sync(_("16 * 1kB open_sync writes"), 1);
463 }
464 
465 /*
466  * Test open_sync with different size files
467  */
468 static void
469 test_open_sync(const char *msg, int writes_size)
470 {
471 #ifdef O_SYNC
472  int tmpfile,
473  ops,
474  writes;
475 #endif
476 
477  printf(LABEL_FORMAT, msg);
478  fflush(stdout);
479 
480 #ifdef O_SYNC
481  if ((tmpfile = open_direct(filename, O_RDWR | O_SYNC | PG_BINARY, 0)) == -1)
482  printf(NA_FORMAT, _("n/a*"));
483  else
484  {
485  START_TIMER;
486  for (ops = 0; alarm_triggered == false; ops++)
487  {
488  for (writes = 0; writes < 16 / writes_size; writes++)
489  if (pg_pwrite(tmpfile,
490  buf,
491  writes_size * 1024,
492  writes * writes_size * 1024) !=
493  writes_size * 1024)
494  die("write failed");
495  }
496  STOP_TIMER;
497  close(tmpfile);
498  }
499 #else
500  printf(NA_FORMAT, _("n/a"));
501 #endif
502 }
503 
504 static void
506 {
507  int tmpfile,
508  ops;
509 
510  /*
511  * Test whether fsync can sync data written on a different descriptor for
512  * the same file. This checks the efficiency of multi-process fsyncs
513  * against the same file. Possibly this should be done with writethrough
514  * on platforms which support it.
515  */
516  printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
517  printf(_("(If the times are similar, fsync() can sync data written on a different\n"
518  "descriptor.)\n"));
519 
520  /*
521  * first write, fsync and close, which is the normal behavior without
522  * multiple descriptors
523  */
524  printf(LABEL_FORMAT, "write, fsync, close");
525  fflush(stdout);
526 
527  START_TIMER;
528  for (ops = 0; alarm_triggered == false; ops++)
529  {
530  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
531  die("could not open output file");
532  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
533  die("write failed");
534  if (fsync(tmpfile) != 0)
535  die("fsync failed");
536  close(tmpfile);
537 
538  /*
539  * open and close the file again to be consistent with the following
540  * test
541  */
542  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
543  die("could not open output file");
544  close(tmpfile);
545  }
546  STOP_TIMER;
547 
548  /*
549  * Now open, write, close, open again and fsync This simulates processes
550  * fsyncing each other's writes.
551  */
552  printf(LABEL_FORMAT, "write, close, fsync");
553  fflush(stdout);
554 
555  START_TIMER;
556  for (ops = 0; alarm_triggered == false; ops++)
557  {
558  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
559  die("could not open output file");
560  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
561  die("write failed");
562  close(tmpfile);
563  /* reopen file */
564  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
565  die("could not open output file");
566  if (fsync(tmpfile) != 0)
567  die("fsync failed");
568  close(tmpfile);
569  }
570  STOP_TIMER;
571 }
572 
573 static void
575 {
576  int tmpfile,
577  ops;
578 
579  /*
580  * Test a simple write without fsync
581  */
582  printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
583  printf(LABEL_FORMAT, "write");
584  fflush(stdout);
585 
586  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
587  die("could not open output file");
588  START_TIMER;
589  for (ops = 0; alarm_triggered == false; ops++)
590  {
591  if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
592  die("write failed");
593  }
594  STOP_TIMER;
595  close(tmpfile);
596 }
597 
598 static void
600 {
601  int rc;
602 
603  /* Delete the file if it exists. Ignore errors */
604  if (needs_unlink)
605  unlink(filename);
606  /* Finish incomplete line on stdout */
607  rc = write(STDOUT_FILENO, "\n", 1);
608  (void) rc; /* silence compiler warnings */
609  _exit(1);
610 }
611 
612 #ifdef HAVE_FSYNC_WRITETHROUGH
613 
614 static int
616 {
617 #if defined(F_FULLFSYNC)
618  return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
619 #else
620  errno = ENOSYS;
621  return -1;
622 #endif
623 }
624 #endif
625 
626 /*
627  * print out the writes per second for tests
628  */
629 static void
630 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
631 {
632  double total_time = (stop_t.tv_sec - start_t.tv_sec) +
633  (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
634  double per_second = ops / total_time;
635  double avg_op_time_us = (total_time / ops) * USECS_SEC;
636 
637  printf(_(OPS_FORMAT), per_second, avg_op_time_us);
638 }
639 
640 #ifndef WIN32
641 static void
643 {
644  alarm_triggered = true;
645 }
646 #else
647 static DWORD WINAPI
648 process_alarm(LPVOID param)
649 {
650  /* WIN32 doesn't support alarm, so we create a thread and sleep here */
651  Sleep(secs_per_test * 1000);
652  alarm_triggered = true;
653  ExitThread(0);
654 }
655 #endif
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:791
#define ngettext(s, p, n)
Definition: c.h:1168
#define SIGNAL_ARGS
Definition: c.h:1332
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1201
#define PG_BINARY
Definition: c.h:1260
int fdatasync(int fildes)
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:448
#define _(x)
Definition: elog.c:90
int pg_fsync_writethrough(int fd)
Definition: fd.c:461
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define required_argument
Definition: getopt_long.h:25
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
static void const char fflush(stdout)
exit(1)
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_fatal(...)
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define DEFAULT_XLOG_SEG_SIZE
PGDLLIMPORT int optind
Definition: getopt.c:50
PGDLLIMPORT char * optarg
Definition: getopt.c:52
int32 pg_prng_int32(pg_prng_state *state)
Definition: pg_prng.c:207
void pg_prng_seed(pg_prng_state *state, uint64 seed)
Definition: pg_prng.c:89
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
static void test_file_descriptor_sync(void)
int main(int argc, char *argv[])
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:72
static void prepare_buf(void)
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
#define NA_FORMAT
Definition: pg_test_fsync.c:36
static void test_open_syncs(void)
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:31
static char * filename
Definition: pg_test_fsync.c:74
static void handle_args(int argc, char *argv[])
static int open_direct(const char *path, int flags, mode_t mode)
#define START_TIMER
Definition: pg_test_fsync.c:43
#define STOP_TIMER
Definition: pg_test_fsync.c:61
#define LABEL_FORMAT
Definition: pg_test_fsync.c:35
static int needs_unlink
Definition: pg_test_fsync.c:71
static void test_sync(int writes_per_op)
static void test_open(void)
static void test_non_sync(void)
#define die(msg)
static char * buf
Definition: pg_test_fsync.c:73
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:75
static void test_open_sync(const char *msg, int writes_size)
static const char * progname
Definition: pg_test_fsync.c:68
static void process_alarm(SIGNAL_ARGS)
#define USECS_SEC
Definition: pg_test_fsync.c:39
static sig_atomic_t alarm_triggered
Definition: pg_test_fsync.c:77
#define OPS_FORMAT
Definition: pg_test_fsync.c:38
static unsigned int secs_per_test
Definition: pg_test_fsync.c:70
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:33
static void signal_cleanup(SIGNAL_ARGS)
#define pg_pwrite
Definition: port.h:226
const char * get_progname(const char *argv0)
Definition: path.c:574
pqsigfunc pqsignal(int signo, pqsigfunc func)
#define printf(...)
Definition: port.h:244
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define STDOUT_FILENO
Definition: unistd.h:8
#define fsync(fd)
Definition: win32_port.h:85
#define SIGHUP
Definition: win32_port.h:168
#define S_IRUSR
Definition: win32_port.h:289
#define SIGALRM
Definition: win32_port.h:174
#define S_IWUSR
Definition: win32_port.h:292
#define O_DSYNC
Definition: win32_port.h:352