PostgreSQL Source Code  git master
pg_test_fsync.c
Go to the documentation of this file.
1 /*
2  * pg_test_fsync.c
3  * tests all supported fsync() methods
4  */
5 
6 #include "postgres_fe.h"
7 
8 #include <limits.h>
9 #include <sys/stat.h>
10 #include <sys/time.h>
11 #include <fcntl.h>
12 #include <time.h>
13 #include <unistd.h>
14 #include <signal.h>
15 
16 #include "access/xlogdefs.h"
17 #include "common/logging.h"
18 #include "common/pg_prng.h"
19 #include "getopt_long.h"
20 
21 /*
22  * put the temp files in the local directory
23  * unless the user specifies otherwise
24  */
25 #define FSYNC_FILENAME "./pg_test_fsync.out"
26 
27 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
28 
29 #define LABEL_FORMAT " %-30s"
30 #define NA_FORMAT "%21s\n"
31 /* translator: maintain alignment with NA_FORMAT */
32 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
33 #define USECS_SEC 1000000
34 
35 /* These are macros to avoid timing the function call overhead. */
36 #ifndef WIN32
37 #define START_TIMER \
38 do { \
39  alarm_triggered = false; \
40  alarm(secs_per_test); \
41  gettimeofday(&start_t, NULL); \
42 } while (0)
43 #else
44 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
45 #define START_TIMER \
46 do { \
47  alarm_triggered = false; \
48  if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
49  INVALID_HANDLE_VALUE) \
50  pg_fatal("could not create thread for alarm"); \
51  gettimeofday(&start_t, NULL); \
52 } while (0)
53 #endif
54 
55 #define STOP_TIMER \
56 do { \
57  gettimeofday(&stop_t, NULL); \
58  print_elapse(start_t, stop_t, ops); \
59 } while (0)
60 
61 
62 static const char *progname;
63 
64 static unsigned int secs_per_test = 5;
65 static int needs_unlink = 0;
67  *buf,
69 static struct timeval start_t,
70  stop_t;
71 static bool alarm_triggered = false;
72 
73 
74 static void handle_args(int argc, char *argv[]);
75 static void prepare_buf(void);
76 static void test_open(void);
77 static void test_non_sync(void);
78 static void test_sync(int writes_per_op);
79 static void test_open_syncs(void);
80 static void test_open_sync(const char *msg, int writes_size);
81 static void test_file_descriptor_sync(void);
82 
83 #ifndef WIN32
84 static void process_alarm(int sig);
85 #else
86 static DWORD WINAPI process_alarm(LPVOID param);
87 #endif
88 static void signal_cleanup(int sig);
89 
90 #ifdef HAVE_FSYNC_WRITETHROUGH
91 static int pg_fsync_writethrough(int fd);
92 #endif
93 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
94 
95 #define die(msg) pg_fatal("%s: %m", _(msg))
96 
97 
98 int
99 main(int argc, char *argv[])
100 {
101  pg_logging_init(argv[0]);
102  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
103  progname = get_progname(argv[0]);
104 
105  handle_args(argc, argv);
106 
107  /* Prevent leaving behind the test file */
108  pqsignal(SIGINT, signal_cleanup);
109  pqsignal(SIGTERM, signal_cleanup);
110 #ifndef WIN32
112 #endif
113 #ifdef SIGHUP
114  /* Not defined on win32 */
116 #endif
117 
118  pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
119 
120  prepare_buf();
121 
122  test_open();
123 
124  /* Test using 1 XLOG_BLCKSZ write */
125  test_sync(1);
126 
127  /* Test using 2 XLOG_BLCKSZ writes */
128  test_sync(2);
129 
130  test_open_syncs();
131 
133 
134  test_non_sync();
135 
136  unlink(filename);
137 
138  return 0;
139 }
140 
141 static void
142 handle_args(int argc, char *argv[])
143 {
144  static struct option long_options[] = {
145  {"filename", required_argument, NULL, 'f'},
146  {"secs-per-test", required_argument, NULL, 's'},
147  {NULL, 0, NULL, 0}
148  };
149 
150  int option; /* Command line option */
151  int optindex = 0; /* used by getopt_long */
152  unsigned long optval; /* used for option parsing */
153  char *endptr;
154 
155  if (argc > 1)
156  {
157  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
158  {
159  printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
160  exit(0);
161  }
162  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
163  {
164  puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
165  exit(0);
166  }
167  }
168 
169  while ((option = getopt_long(argc, argv, "f:s:",
170  long_options, &optindex)) != -1)
171  {
172  switch (option)
173  {
174  case 'f':
176  break;
177 
178  case 's':
179  errno = 0;
180  optval = strtoul(optarg, &endptr, 10);
181 
182  if (endptr == optarg || *endptr != '\0' ||
183  errno != 0 || optval != (unsigned int) optval)
184  {
185  pg_log_error("invalid argument for option %s", "--secs-per-test");
186  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
187  exit(1);
188  }
189 
190  secs_per_test = (unsigned int) optval;
191  if (secs_per_test == 0)
192  pg_fatal("%s must be in range %u..%u",
193  "--secs-per-test", 1, UINT_MAX);
194  break;
195 
196  default:
197  /* getopt_long already emitted a complaint */
198  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
199  exit(1);
200  }
201  }
202 
203  if (argc > optind)
204  {
205  pg_log_error("too many command-line arguments (first is \"%s\")",
206  argv[optind]);
207  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
208  exit(1);
209  }
210 
211  printf(ngettext("%u second per test\n",
212  "%u seconds per test\n",
213  secs_per_test),
214  secs_per_test);
215 #if defined(O_DIRECT)
216  printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
217 #elif defined(F_NOCACHE)
218  printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
219 #else
220  printf(_("Direct I/O is not supported on this platform.\n"));
221 #endif
222 }
223 
224 static void
226 {
227  int ops;
228 
229  /* write random data into buffer */
230  for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
232 
233  buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
234 }
235 
236 static void
238 {
239  int tmpfile;
240 
241  /*
242  * test if we can open the target file
243  */
244  if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
245  die("could not open output file");
246  needs_unlink = 1;
247  if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
249  die("write failed");
250 
251  /* fsync now so that dirty buffers don't skew later tests */
252  if (fsync(tmpfile) != 0)
253  die("fsync failed");
254 
255  close(tmpfile);
256 }
257 
258 static int
259 open_direct(const char *path, int flags, mode_t mode)
260 {
261  int fd;
262 
263 #ifdef O_DIRECT
264  flags |= O_DIRECT;
265 #endif
266 
267  fd = open(path, flags, mode);
268 
269 #if !defined(O_DIRECT) && defined(F_NOCACHE)
270  if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
271  {
272  int save_errno = errno;
273 
274  close(fd);
275  errno = save_errno;
276  return -1;
277  }
278 #endif
279 
280  return fd;
281 }
282 
283 static void
284 test_sync(int writes_per_op)
285 {
286  int tmpfile,
287  ops,
288  writes;
289  bool fs_warning = false;
290 
291  if (writes_per_op == 1)
292  printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
293  else
294  printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
295  printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
296 
297  /*
298  * Test open_datasync if available
299  */
300  printf(LABEL_FORMAT, "open_datasync");
301  fflush(stdout);
302 
303 #ifdef OPEN_DATASYNC_FLAG
304  if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
305  {
306  printf(NA_FORMAT, _("n/a*"));
307  fs_warning = true;
308  }
309  else
310  {
311  START_TIMER;
312  for (ops = 0; alarm_triggered == false; ops++)
313  {
314  for (writes = 0; writes < writes_per_op; writes++)
315  if (pg_pwrite(tmpfile,
316  buf,
317  XLOG_BLCKSZ,
318  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
319  die("write failed");
320  }
321  STOP_TIMER;
322  close(tmpfile);
323  }
324 #else
325  printf(NA_FORMAT, _("n/a"));
326 #endif
327 
328 /*
329  * Test fdatasync if available
330  */
331  printf(LABEL_FORMAT, "fdatasync");
332  fflush(stdout);
333 
334 #ifdef HAVE_FDATASYNC
335  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
336  die("could not open output file");
337  START_TIMER;
338  for (ops = 0; alarm_triggered == false; ops++)
339  {
340  for (writes = 0; writes < writes_per_op; writes++)
341  if (pg_pwrite(tmpfile,
342  buf,
343  XLOG_BLCKSZ,
344  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
345  die("write failed");
346  fdatasync(tmpfile);
347  }
348  STOP_TIMER;
349  close(tmpfile);
350 #else
351  printf(NA_FORMAT, _("n/a"));
352 #endif
353 
354 /*
355  * Test fsync
356  */
357  printf(LABEL_FORMAT, "fsync");
358  fflush(stdout);
359 
360  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
361  die("could not open output file");
362  START_TIMER;
363  for (ops = 0; alarm_triggered == false; ops++)
364  {
365  for (writes = 0; writes < writes_per_op; writes++)
366  if (pg_pwrite(tmpfile,
367  buf,
368  XLOG_BLCKSZ,
369  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
370  die("write failed");
371  if (fsync(tmpfile) != 0)
372  die("fsync failed");
373  }
374  STOP_TIMER;
375  close(tmpfile);
376 
377 /*
378  * If fsync_writethrough is available, test as well
379  */
380  printf(LABEL_FORMAT, "fsync_writethrough");
381  fflush(stdout);
382 
383 #ifdef HAVE_FSYNC_WRITETHROUGH
384  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
385  die("could not open output file");
386  START_TIMER;
387  for (ops = 0; alarm_triggered == false; ops++)
388  {
389  for (writes = 0; writes < writes_per_op; writes++)
390  if (pg_pwrite(tmpfile,
391  buf,
392  XLOG_BLCKSZ,
393  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
394  die("write failed");
395  if (pg_fsync_writethrough(tmpfile) != 0)
396  die("fsync failed");
397  }
398  STOP_TIMER;
399  close(tmpfile);
400 #else
401  printf(NA_FORMAT, _("n/a"));
402 #endif
403 
404 /*
405  * Test open_sync if available
406  */
407  printf(LABEL_FORMAT, "open_sync");
408  fflush(stdout);
409 
410 #ifdef OPEN_SYNC_FLAG
411  if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
412  {
413  printf(NA_FORMAT, _("n/a*"));
414  fs_warning = true;
415  }
416  else
417  {
418  START_TIMER;
419  for (ops = 0; alarm_triggered == false; ops++)
420  {
421  for (writes = 0; writes < writes_per_op; writes++)
422  if (pg_pwrite(tmpfile,
423  buf,
424  XLOG_BLCKSZ,
425  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
426 
427  /*
428  * This can generate write failures if the filesystem has
429  * a large block size, e.g. 4k, and there is no support
430  * for O_DIRECT writes smaller than the file system block
431  * size, e.g. XFS.
432  */
433  die("write failed");
434  }
435  STOP_TIMER;
436  close(tmpfile);
437  }
438 #else
439  printf(NA_FORMAT, _("n/a"));
440 #endif
441 
442  if (fs_warning)
443  {
444  printf(_("* This file system and its mount options do not support direct\n"
445  " I/O, e.g. ext4 in journaled mode.\n"));
446  }
447 }
448 
449 static void
451 {
452  printf(_("\nCompare open_sync with different write sizes:\n"));
453  printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
454  "open_sync sizes.)\n"));
455 
456  test_open_sync(_(" 1 * 16kB open_sync write"), 16);
457  test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
458  test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
459  test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
460  test_open_sync(_("16 * 1kB open_sync writes"), 1);
461 }
462 
463 /*
464  * Test open_sync with different size files
465  */
466 static void
467 test_open_sync(const char *msg, int writes_size)
468 {
469 #ifdef OPEN_SYNC_FLAG
470  int tmpfile,
471  ops,
472  writes;
473 #endif
474 
475  printf(LABEL_FORMAT, msg);
476  fflush(stdout);
477 
478 #ifdef OPEN_SYNC_FLAG
479  if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
480  printf(NA_FORMAT, _("n/a*"));
481  else
482  {
483  START_TIMER;
484  for (ops = 0; alarm_triggered == false; ops++)
485  {
486  for (writes = 0; writes < 16 / writes_size; writes++)
487  if (pg_pwrite(tmpfile,
488  buf,
489  writes_size * 1024,
490  writes * writes_size * 1024) !=
491  writes_size * 1024)
492  die("write failed");
493  }
494  STOP_TIMER;
495  close(tmpfile);
496  }
497 #else
498  printf(NA_FORMAT, _("n/a"));
499 #endif
500 }
501 
502 static void
504 {
505  int tmpfile,
506  ops;
507 
508  /*
509  * Test whether fsync can sync data written on a different descriptor for
510  * the same file. This checks the efficiency of multi-process fsyncs
511  * against the same file. Possibly this should be done with writethrough
512  * on platforms which support it.
513  */
514  printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
515  printf(_("(If the times are similar, fsync() can sync data written on a different\n"
516  "descriptor.)\n"));
517 
518  /*
519  * first write, fsync and close, which is the normal behavior without
520  * multiple descriptors
521  */
522  printf(LABEL_FORMAT, "write, fsync, close");
523  fflush(stdout);
524 
525  START_TIMER;
526  for (ops = 0; alarm_triggered == false; ops++)
527  {
528  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
529  die("could not open output file");
530  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
531  die("write failed");
532  if (fsync(tmpfile) != 0)
533  die("fsync failed");
534  close(tmpfile);
535 
536  /*
537  * open and close the file again to be consistent with the following
538  * test
539  */
540  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
541  die("could not open output file");
542  close(tmpfile);
543  }
544  STOP_TIMER;
545 
546  /*
547  * Now open, write, close, open again and fsync This simulates processes
548  * fsyncing each other's writes.
549  */
550  printf(LABEL_FORMAT, "write, close, fsync");
551  fflush(stdout);
552 
553  START_TIMER;
554  for (ops = 0; alarm_triggered == false; ops++)
555  {
556  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
557  die("could not open output file");
558  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
559  die("write failed");
560  close(tmpfile);
561  /* reopen file */
562  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
563  die("could not open output file");
564  if (fsync(tmpfile) != 0)
565  die("fsync failed");
566  close(tmpfile);
567  }
568  STOP_TIMER;
569 }
570 
571 static void
573 {
574  int tmpfile,
575  ops;
576 
577  /*
578  * Test a simple write without fsync
579  */
580  printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
581  printf(LABEL_FORMAT, "write");
582  fflush(stdout);
583 
584  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
585  die("could not open output file");
586  START_TIMER;
587  for (ops = 0; alarm_triggered == false; ops++)
588  {
589  if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
590  die("write failed");
591  }
592  STOP_TIMER;
593  close(tmpfile);
594 }
595 
596 static void
597 signal_cleanup(int signum)
598 {
599  /* Delete the file if it exists. Ignore errors */
600  if (needs_unlink)
601  unlink(filename);
602  /* Finish incomplete line on stdout */
603  puts("");
604  exit(signum);
605 }
606 
607 #ifdef HAVE_FSYNC_WRITETHROUGH
608 
609 static int
611 {
612 #ifdef WIN32
613  return _commit(fd);
614 #elif defined(F_FULLFSYNC)
615  return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
616 #else
617  errno = ENOSYS;
618  return -1;
619 #endif
620 }
621 #endif
622 
623 /*
624  * print out the writes per second for tests
625  */
626 static void
627 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
628 {
629  double total_time = (stop_t.tv_sec - start_t.tv_sec) +
630  (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
631  double per_second = ops / total_time;
632  double avg_op_time_us = (total_time / ops) * USECS_SEC;
633 
634  printf(_(OPS_FORMAT), per_second, avg_op_time_us);
635 }
636 
637 #ifndef WIN32
638 static void
640 {
641  alarm_triggered = true;
642 }
643 #else
644 static DWORD WINAPI
645 process_alarm(LPVOID param)
646 {
647  /* WIN32 doesn't support alarm, so we create a thread and sleep here */
648  Sleep(secs_per_test * 1000);
649  alarm_triggered = true;
650  ExitThread(0);
651 }
652 #endif
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:750
#define ngettext(s, p, n)
Definition: c.h:1179
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1212
#define PG_BINARY
Definition: c.h:1268
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:446
#define _(x)
Definition: elog.c:89
int pg_fsync_writethrough(int fd)
Definition: fd.c:426
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
#define required_argument
Definition: getopt_long.h:25
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
static void const char fflush(stdout)
exit(1)
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_fatal(...)
static PgChecksumMode mode
Definition: pg_checksums.c:65
#define DEFAULT_XLOG_SEG_SIZE
static int sig
Definition: pg_ctl.c:84
PGDLLIMPORT int optind
Definition: getopt.c:50
PGDLLIMPORT char * optarg
Definition: getopt.c:52
int32 pg_prng_int32(pg_prng_state *state)
Definition: pg_prng.c:201
void pg_prng_seed(pg_prng_state *state, uint64 seed)
Definition: pg_prng.c:83
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:28
static void test_file_descriptor_sync(void)
int main(int argc, char *argv[])
Definition: pg_test_fsync.c:99
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:66
static void prepare_buf(void)
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
static void process_alarm(int sig)
#define NA_FORMAT
Definition: pg_test_fsync.c:30
static void test_open_syncs(void)
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:25
static char * filename
Definition: pg_test_fsync.c:68
static void handle_args(int argc, char *argv[])
static int open_direct(const char *path, int flags, mode_t mode)
#define START_TIMER
Definition: pg_test_fsync.c:37
#define STOP_TIMER
Definition: pg_test_fsync.c:55
#define LABEL_FORMAT
Definition: pg_test_fsync.c:29
static void signal_cleanup(int sig)
static int needs_unlink
Definition: pg_test_fsync.c:65
static void test_sync(int writes_per_op)
static void test_open(void)
static void test_non_sync(void)
#define die(msg)
Definition: pg_test_fsync.c:95
static char * buf
Definition: pg_test_fsync.c:67
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:69
static void test_open_sync(const char *msg, int writes_size)
static const char * progname
Definition: pg_test_fsync.c:62
static bool alarm_triggered
Definition: pg_test_fsync.c:71
#define USECS_SEC
Definition: pg_test_fsync.c:33
#define OPS_FORMAT
Definition: pg_test_fsync.c:32
static unsigned int secs_per_test
Definition: pg_test_fsync.c:64
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:27
const char * get_progname(const char *argv0)
Definition: path.c:574
ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset)
Definition: pwrite.c:27
#define printf(...)
Definition: port.h:231
static int fd(const char *x, int i)
Definition: preproc-init.c:105
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:180
#define fsync(fd)
Definition: win32_port.h:76
#define SIGHUP
Definition: win32_port.h:167
#define S_IRUSR
Definition: win32_port.h:288
#define SIGALRM
Definition: win32_port.h:173
#define S_IWUSR
Definition: win32_port.h:291
#define O_DSYNC
Definition: win32_port.h:336