PostgreSQL Source Code  git master
pg_test_fsync.c
Go to the documentation of this file.
1 /*
2  * pg_test_fsync.c
3  * tests all supported fsync() methods
4  */
5 
6 #include "postgres_fe.h"
7 
8 #include <limits.h>
9 #include <sys/stat.h>
10 #include <sys/time.h>
11 #include <fcntl.h>
12 #include <time.h>
13 #include <unistd.h>
14 #include <signal.h>
15 
16 #include "access/xlogdefs.h"
17 #include "common/logging.h"
18 #include "getopt_long.h"
19 
20 /*
21  * put the temp files in the local directory
22  * unless the user specifies otherwise
23  */
24 #define FSYNC_FILENAME "./pg_test_fsync.out"
25 
26 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
27 
28 #define LABEL_FORMAT " %-30s"
29 #define NA_FORMAT "%21s\n"
30 /* translator: maintain alignment with NA_FORMAT */
31 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
32 #define USECS_SEC 1000000
33 
34 /* These are macros to avoid timing the function call overhead. */
35 #ifndef WIN32
36 #define START_TIMER \
37 do { \
38  alarm_triggered = false; \
39  alarm(secs_per_test); \
40  gettimeofday(&start_t, NULL); \
41 } while (0)
42 #else
43 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
44 #define START_TIMER \
45 do { \
46  alarm_triggered = false; \
47  if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
48  INVALID_HANDLE_VALUE) \
49  { \
50  pg_log_error("could not create thread for alarm"); \
51  exit(1); \
52  } \
53  gettimeofday(&start_t, NULL); \
54 } while (0)
55 #endif
56 
57 #define STOP_TIMER \
58 do { \
59  gettimeofday(&stop_t, NULL); \
60  print_elapse(start_t, stop_t, ops); \
61 } while (0)
62 
63 
64 static const char *progname;
65 
66 static unsigned int secs_per_test = 5;
67 static int needs_unlink = 0;
69  *buf,
71 static struct timeval start_t,
72  stop_t;
73 static bool alarm_triggered = false;
74 
75 
76 static void handle_args(int argc, char *argv[]);
77 static void prepare_buf(void);
78 static void test_open(void);
79 static void test_non_sync(void);
80 static void test_sync(int writes_per_op);
81 static void test_open_syncs(void);
82 static void test_open_sync(const char *msg, int writes_size);
83 static void test_file_descriptor_sync(void);
84 
85 #ifndef WIN32
86 static void process_alarm(int sig);
87 #else
88 static DWORD WINAPI process_alarm(LPVOID param);
89 #endif
90 static void signal_cleanup(int sig);
91 
92 #ifdef HAVE_FSYNC_WRITETHROUGH
93 static int pg_fsync_writethrough(int fd);
94 #endif
95 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
96 
97 #define die(msg) do { pg_log_error("%s: %m", _(msg)); exit(1); } while(0)
98 
99 
100 int
101 main(int argc, char *argv[])
102 {
103  pg_logging_init(argv[0]);
104  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
105  progname = get_progname(argv[0]);
106 
107  handle_args(argc, argv);
108 
109  /* Prevent leaving behind the test file */
110  pqsignal(SIGINT, signal_cleanup);
111  pqsignal(SIGTERM, signal_cleanup);
112 #ifndef WIN32
114 #endif
115 #ifdef SIGHUP
116  /* Not defined on win32 */
118 #endif
119 
120  prepare_buf();
121 
122  test_open();
123 
124  /* Test using 1 XLOG_BLCKSZ write */
125  test_sync(1);
126 
127  /* Test using 2 XLOG_BLCKSZ writes */
128  test_sync(2);
129 
130  test_open_syncs();
131 
133 
134  test_non_sync();
135 
136  unlink(filename);
137 
138  return 0;
139 }
140 
141 static void
142 handle_args(int argc, char *argv[])
143 {
144  static struct option long_options[] = {
145  {"filename", required_argument, NULL, 'f'},
146  {"secs-per-test", required_argument, NULL, 's'},
147  {NULL, 0, NULL, 0}
148  };
149 
150  int option; /* Command line option */
151  int optindex = 0; /* used by getopt_long */
152  unsigned long optval; /* used for option parsing */
153  char *endptr;
154 
155  if (argc > 1)
156  {
157  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
158  {
159  printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
160  exit(0);
161  }
162  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
163  {
164  puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
165  exit(0);
166  }
167  }
168 
169  while ((option = getopt_long(argc, argv, "f:s:",
170  long_options, &optindex)) != -1)
171  {
172  switch (option)
173  {
174  case 'f':
176  break;
177 
178  case 's':
179  errno = 0;
180  optval = strtoul(optarg, &endptr, 10);
181 
182  if (endptr == optarg || *endptr != '\0' ||
183  errno != 0 || optval != (unsigned int) optval)
184  {
185  pg_log_error("invalid argument for option %s", "--secs-per-test");
186  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
187  exit(1);
188  }
189 
190  secs_per_test = (unsigned int) optval;
191  if (secs_per_test == 0)
192  {
193  pg_log_error("%s must be in range %u..%u",
194  "--secs-per-test", 1, UINT_MAX);
195  exit(1);
196  }
197  break;
198 
199  default:
200  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
201  progname);
202  exit(1);
203  break;
204  }
205  }
206 
207  if (argc > optind)
208  {
209  pg_log_error("too many command-line arguments (first is \"%s\")",
210  argv[optind]);
211  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
212  progname);
213  exit(1);
214  }
215 
216  printf(ngettext("%u second per test\n",
217  "%u seconds per test\n",
218  secs_per_test),
219  secs_per_test);
220 #if defined(O_DIRECT)
221  printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
222 #elif defined(F_NOCACHE)
223  printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
224 #else
225  printf(_("Direct I/O is not supported on this platform.\n"));
226 #endif
227 }
228 
229 static void
231 {
232  int ops;
233 
234  /* write random data into buffer */
235  for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
236  full_buf[ops] = random();
237 
238  buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
239 }
240 
241 static void
243 {
244  int tmpfile;
245 
246  /*
247  * test if we can open the target file
248  */
249  if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
250  die("could not open output file");
251  needs_unlink = 1;
252  if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
254  die("write failed");
255 
256  /* fsync now so that dirty buffers don't skew later tests */
257  if (fsync(tmpfile) != 0)
258  die("fsync failed");
259 
260  close(tmpfile);
261 }
262 
263 static int
264 open_direct(const char *path, int flags, mode_t mode)
265 {
266  int fd;
267 
268 #ifdef O_DIRECT
269  flags |= O_DIRECT;
270 #endif
271 
272  fd = open(path, flags, mode);
273 
274 #if !defined(O_DIRECT) && defined(F_NOCACHE)
275  if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
276  {
277  int save_errno = errno;
278 
279  close(fd);
280  errno = save_errno;
281  return -1;
282  }
283 #endif
284 
285  return fd;
286 }
287 
288 static void
289 test_sync(int writes_per_op)
290 {
291  int tmpfile,
292  ops,
293  writes;
294  bool fs_warning = false;
295 
296  if (writes_per_op == 1)
297  printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
298  else
299  printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
300  printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
301 
302  /*
303  * Test open_datasync if available
304  */
305  printf(LABEL_FORMAT, "open_datasync");
306  fflush(stdout);
307 
308 #ifdef OPEN_DATASYNC_FLAG
309  if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
310  {
311  printf(NA_FORMAT, _("n/a*"));
312  fs_warning = true;
313  }
314  else
315  {
316  START_TIMER;
317  for (ops = 0; alarm_triggered == false; ops++)
318  {
319  for (writes = 0; writes < writes_per_op; writes++)
320  if (pg_pwrite(tmpfile,
321  buf,
322  XLOG_BLCKSZ,
323  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
324  die("write failed");
325  }
326  STOP_TIMER;
327  close(tmpfile);
328  }
329 #else
330  printf(NA_FORMAT, _("n/a"));
331 #endif
332 
333 /*
334  * Test fdatasync if available
335  */
336  printf(LABEL_FORMAT, "fdatasync");
337  fflush(stdout);
338 
339 #ifdef HAVE_FDATASYNC
340  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
341  die("could not open output file");
342  START_TIMER;
343  for (ops = 0; alarm_triggered == false; ops++)
344  {
345  for (writes = 0; writes < writes_per_op; writes++)
346  if (pg_pwrite(tmpfile,
347  buf,
348  XLOG_BLCKSZ,
349  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
350  die("write failed");
351  fdatasync(tmpfile);
352  }
353  STOP_TIMER;
354  close(tmpfile);
355 #else
356  printf(NA_FORMAT, _("n/a"));
357 #endif
358 
359 /*
360  * Test fsync
361  */
362  printf(LABEL_FORMAT, "fsync");
363  fflush(stdout);
364 
365  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
366  die("could not open output file");
367  START_TIMER;
368  for (ops = 0; alarm_triggered == false; ops++)
369  {
370  for (writes = 0; writes < writes_per_op; writes++)
371  if (pg_pwrite(tmpfile,
372  buf,
373  XLOG_BLCKSZ,
374  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
375  die("write failed");
376  if (fsync(tmpfile) != 0)
377  die("fsync failed");
378  }
379  STOP_TIMER;
380  close(tmpfile);
381 
382 /*
383  * If fsync_writethrough is available, test as well
384  */
385  printf(LABEL_FORMAT, "fsync_writethrough");
386  fflush(stdout);
387 
388 #ifdef HAVE_FSYNC_WRITETHROUGH
389  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
390  die("could not open output file");
391  START_TIMER;
392  for (ops = 0; alarm_triggered == false; ops++)
393  {
394  for (writes = 0; writes < writes_per_op; writes++)
395  if (pg_pwrite(tmpfile,
396  buf,
397  XLOG_BLCKSZ,
398  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
399  die("write failed");
400  if (pg_fsync_writethrough(tmpfile) != 0)
401  die("fsync failed");
402  }
403  STOP_TIMER;
404  close(tmpfile);
405 #else
406  printf(NA_FORMAT, _("n/a"));
407 #endif
408 
409 /*
410  * Test open_sync if available
411  */
412  printf(LABEL_FORMAT, "open_sync");
413  fflush(stdout);
414 
415 #ifdef OPEN_SYNC_FLAG
416  if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
417  {
418  printf(NA_FORMAT, _("n/a*"));
419  fs_warning = true;
420  }
421  else
422  {
423  START_TIMER;
424  for (ops = 0; alarm_triggered == false; ops++)
425  {
426  for (writes = 0; writes < writes_per_op; writes++)
427  if (pg_pwrite(tmpfile,
428  buf,
429  XLOG_BLCKSZ,
430  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
431 
432  /*
433  * This can generate write failures if the filesystem has
434  * a large block size, e.g. 4k, and there is no support
435  * for O_DIRECT writes smaller than the file system block
436  * size, e.g. XFS.
437  */
438  die("write failed");
439  }
440  STOP_TIMER;
441  close(tmpfile);
442  }
443 #else
444  printf(NA_FORMAT, _("n/a"));
445 #endif
446 
447  if (fs_warning)
448  {
449  printf(_("* This file system and its mount options do not support direct\n"
450  " I/O, e.g. ext4 in journaled mode.\n"));
451  }
452 }
453 
454 static void
456 {
457  printf(_("\nCompare open_sync with different write sizes:\n"));
458  printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
459  "open_sync sizes.)\n"));
460 
461  test_open_sync(_(" 1 * 16kB open_sync write"), 16);
462  test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
463  test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
464  test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
465  test_open_sync(_("16 * 1kB open_sync writes"), 1);
466 }
467 
468 /*
469  * Test open_sync with different size files
470  */
471 static void
472 test_open_sync(const char *msg, int writes_size)
473 {
474 #ifdef OPEN_SYNC_FLAG
475  int tmpfile,
476  ops,
477  writes;
478 #endif
479 
480  printf(LABEL_FORMAT, msg);
481  fflush(stdout);
482 
483 #ifdef OPEN_SYNC_FLAG
484  if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
485  printf(NA_FORMAT, _("n/a*"));
486  else
487  {
488  START_TIMER;
489  for (ops = 0; alarm_triggered == false; ops++)
490  {
491  for (writes = 0; writes < 16 / writes_size; writes++)
492  if (pg_pwrite(tmpfile,
493  buf,
494  writes_size * 1024,
495  writes * writes_size * 1024) !=
496  writes_size * 1024)
497  die("write failed");
498  }
499  STOP_TIMER;
500  close(tmpfile);
501  }
502 #else
503  printf(NA_FORMAT, _("n/a"));
504 #endif
505 }
506 
507 static void
509 {
510  int tmpfile,
511  ops;
512 
513  /*
514  * Test whether fsync can sync data written on a different descriptor for
515  * the same file. This checks the efficiency of multi-process fsyncs
516  * against the same file. Possibly this should be done with writethrough
517  * on platforms which support it.
518  */
519  printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
520  printf(_("(If the times are similar, fsync() can sync data written on a different\n"
521  "descriptor.)\n"));
522 
523  /*
524  * first write, fsync and close, which is the normal behavior without
525  * multiple descriptors
526  */
527  printf(LABEL_FORMAT, "write, fsync, close");
528  fflush(stdout);
529 
530  START_TIMER;
531  for (ops = 0; alarm_triggered == false; ops++)
532  {
533  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
534  die("could not open output file");
535  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
536  die("write failed");
537  if (fsync(tmpfile) != 0)
538  die("fsync failed");
539  close(tmpfile);
540 
541  /*
542  * open and close the file again to be consistent with the following
543  * test
544  */
545  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
546  die("could not open output file");
547  close(tmpfile);
548  }
549  STOP_TIMER;
550 
551  /*
552  * Now open, write, close, open again and fsync This simulates processes
553  * fsyncing each other's writes.
554  */
555  printf(LABEL_FORMAT, "write, close, fsync");
556  fflush(stdout);
557 
558  START_TIMER;
559  for (ops = 0; alarm_triggered == false; ops++)
560  {
561  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
562  die("could not open output file");
563  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
564  die("write failed");
565  close(tmpfile);
566  /* reopen file */
567  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
568  die("could not open output file");
569  if (fsync(tmpfile) != 0)
570  die("fsync failed");
571  close(tmpfile);
572  }
573  STOP_TIMER;
574 }
575 
576 static void
578 {
579  int tmpfile,
580  ops;
581 
582  /*
583  * Test a simple write without fsync
584  */
585  printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
586  printf(LABEL_FORMAT, "write");
587  fflush(stdout);
588 
589  if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
590  die("could not open output file");
591  START_TIMER;
592  for (ops = 0; alarm_triggered == false; ops++)
593  {
594  if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ)
595  die("write failed");
596  }
597  STOP_TIMER;
598  close(tmpfile);
599 }
600 
601 static void
602 signal_cleanup(int signum)
603 {
604  /* Delete the file if it exists. Ignore errors */
605  if (needs_unlink)
606  unlink(filename);
607  /* Finish incomplete line on stdout */
608  puts("");
609  exit(signum);
610 }
611 
612 #ifdef HAVE_FSYNC_WRITETHROUGH
613 
614 static int
616 {
617 #ifdef WIN32
618  return _commit(fd);
619 #elif defined(F_FULLFSYNC)
620  return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
621 #else
622  errno = ENOSYS;
623  return -1;
624 #endif
625 }
626 #endif
627 
628 /*
629  * print out the writes per second for tests
630  */
631 static void
632 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
633 {
634  double total_time = (stop_t.tv_sec - start_t.tv_sec) +
635  (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
636  double per_second = ops / total_time;
637  double avg_op_time_us = (total_time / ops) * USECS_SEC;
638 
639  printf(_(OPS_FORMAT), per_second, avg_op_time_us);
640 }
641 
642 #ifndef WIN32
643 static void
645 {
646  alarm_triggered = true;
647 }
648 #else
649 static DWORD WINAPI
650 process_alarm(LPVOID param)
651 {
652  /* WIN32 doesn't support alarm, so we create a thread and sleep here */
653  Sleep(secs_per_test * 1000);
654  alarm_triggered = true;
655  ExitThread(0);
656 }
657 #endif
static void test_sync(int writes_per_op)
static PgChecksumMode mode
Definition: pg_checksums.c:65
static char * filename
Definition: pg_test_fsync.c:70
#define write(a, b, c)
Definition: win32.h:14
const char * get_progname(const char *argv0)
Definition: path.c:453
#define pg_log_error(...)
Definition: logging.h:80
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
long random(void)
Definition: random.c:22
int pg_fsync_writethrough(int fd)
Definition: fd.c:425
void pg_logging_init(const char *argv0)
Definition: logging.c:81
static int open_direct(const char *path, int flags, mode_t mode)
#define USECS_SEC
Definition: pg_test_fsync.c:32
#define DEFAULT_XLOG_SEG_SIZE
#define printf(...)
Definition: port.h:223
#define fprintf
Definition: port.h:221
static int fd(const char *x, int i)
Definition: preproc-init.c:105
#define PG_BINARY
Definition: c.h:1271
ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset)
Definition: pwrite.c:27
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:68
static void test_open(void)
static bool alarm_triggered
Definition: pg_test_fsync.c:73
static void prepare_buf(void)
#define required_argument
Definition: getopt_long.h:25
#define fsync(fd)
Definition: win32_port.h:76
int optind
Definition: getopt.c:50
#define S_IWUSR
Definition: win32_port.h:291
static const char * progname
Definition: pg_test_fsync.c:64
static char * buf
Definition: pg_test_fsync.c:68
#define SIGHUP
Definition: win32_port.h:167
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
#define NA_FORMAT
Definition: pg_test_fsync.c:29
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:26
#define STOP_TIMER
Definition: pg_test_fsync.c:57
#define ngettext(s, p, n)
Definition: c.h:1182
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1215
#define O_DSYNC
Definition: win32_port.h:336
#define LABEL_FORMAT
Definition: pg_test_fsync.c:28
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
static void test_open_syncs(void)
static void test_non_sync(void)
static unsigned int secs_per_test
Definition: pg_test_fsync.c:66
static int sig
Definition: pg_ctl.c:84
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:71
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:750
static void process_alarm(int sig)
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
static void signal_cleanup(int sig)
int main(int argc, char *argv[])
#define START_TIMER
Definition: pg_test_fsync.c:36
#define OPS_FORMAT
Definition: pg_test_fsync.c:31
#define SIGALRM
Definition: win32_port.h:173
#define S_IRUSR
Definition: win32_port.h:288
static int needs_unlink
Definition: pg_test_fsync.c:67
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:433
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:24
char * optarg
Definition: getopt.c:52
static void test_file_descriptor_sync(void)
#define close(a)
Definition: win32.h:12
#define _(x)
Definition: elog.c:89
#define die(msg)
Definition: pg_test_fsync.c:97
static void test_open_sync(const char *msg, int writes_size)
static void handle_args(int argc, char *argv[])