PostgreSQL Source Code  git master
pg_test_fsync.c
Go to the documentation of this file.
1 /*
2  * pg_test_fsync.c
3  * tests all supported fsync() methods
4  */
5 
6 #include "postgres_fe.h"
7 
8 #include <sys/stat.h>
9 #include <sys/time.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <unistd.h>
13 #include <signal.h>
14 
15 #include "getopt_long.h"
16 #include "access/xlogdefs.h"
17 #include "common/logging.h"
18 
19 
20 /*
21  * put the temp files in the local directory
22  * unless the user specifies otherwise
23  */
24 #define FSYNC_FILENAME "./pg_test_fsync.out"
25 
26 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
27 
28 #define LABEL_FORMAT " %-30s"
29 #define NA_FORMAT "%21s\n"
30 /* translator: maintain alignment with NA_FORMAT */
31 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
32 #define USECS_SEC 1000000
33 
34 /* These are macros to avoid timing the function call overhead. */
35 #ifndef WIN32
36 #define START_TIMER \
37 do { \
38  alarm_triggered = false; \
39  alarm(secs_per_test); \
40  gettimeofday(&start_t, NULL); \
41 } while (0)
42 #else
43 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
44 #define START_TIMER \
45 do { \
46  alarm_triggered = false; \
47  if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
48  INVALID_HANDLE_VALUE) \
49  { \
50  pg_log_error("could not create thread for alarm"); \
51  exit(1); \
52  } \
53  gettimeofday(&start_t, NULL); \
54 } while (0)
55 #endif
56 
57 #define STOP_TIMER \
58 do { \
59  gettimeofday(&stop_t, NULL); \
60  print_elapse(start_t, stop_t, ops); \
61 } while (0)
62 
63 
64 static const char *progname;
65 
66 static int secs_per_test = 5;
67 static int needs_unlink = 0;
69  *buf,
71 static struct timeval start_t,
72  stop_t;
73 static bool alarm_triggered = false;
74 
75 
76 static void handle_args(int argc, char *argv[]);
77 static void prepare_buf(void);
78 static void test_open(void);
79 static void test_non_sync(void);
80 static void test_sync(int writes_per_op);
81 static void test_open_syncs(void);
82 static void test_open_sync(const char *msg, int writes_size);
83 static void test_file_descriptor_sync(void);
84 
85 #ifndef WIN32
86 static void process_alarm(int sig);
87 #else
88 static DWORD WINAPI process_alarm(LPVOID param);
89 #endif
90 static void signal_cleanup(int sig);
91 
92 #ifdef HAVE_FSYNC_WRITETHROUGH
93 static int pg_fsync_writethrough(int fd);
94 #endif
95 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
96 
97 #define die(msg) do { pg_log_error("%s: %m", _(msg)); exit(1); } while(0)
98 
99 
100 int
101 main(int argc, char *argv[])
102 {
103  pg_logging_init(argv[0]);
104  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
105  progname = get_progname(argv[0]);
106 
107  handle_args(argc, argv);
108 
109  /* Prevent leaving behind the test file */
110  pqsignal(SIGINT, signal_cleanup);
111  pqsignal(SIGTERM, signal_cleanup);
112 #ifndef WIN32
114 #endif
115 #ifdef SIGHUP
116  /* Not defined on win32 */
118 #endif
119 
120  prepare_buf();
121 
122  test_open();
123 
124  /* Test using 1 XLOG_BLCKSZ write */
125  test_sync(1);
126 
127  /* Test using 2 XLOG_BLCKSZ writes */
128  test_sync(2);
129 
130  test_open_syncs();
131 
133 
134  test_non_sync();
135 
136  unlink(filename);
137 
138  return 0;
139 }
140 
141 static void
142 handle_args(int argc, char *argv[])
143 {
144  static struct option long_options[] = {
145  {"filename", required_argument, NULL, 'f'},
146  {"secs-per-test", required_argument, NULL, 's'},
147  {NULL, 0, NULL, 0}
148  };
149 
150  int option; /* Command line option */
151  int optindex = 0; /* used by getopt_long */
152 
153  if (argc > 1)
154  {
155  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
156  {
157  printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
158  exit(0);
159  }
160  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
161  {
162  puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
163  exit(0);
164  }
165  }
166 
167  while ((option = getopt_long(argc, argv, "f:s:",
168  long_options, &optindex)) != -1)
169  {
170  switch (option)
171  {
172  case 'f':
174  break;
175 
176  case 's':
177  secs_per_test = atoi(optarg);
178  break;
179 
180  default:
181  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
182  progname);
183  exit(1);
184  break;
185  }
186  }
187 
188  if (argc > optind)
189  {
190  pg_log_error("too many command-line arguments (first is \"%s\")",
191  argv[optind]);
192  fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
193  progname);
194  exit(1);
195  }
196 
197  printf(ngettext("%d second per test\n",
198  "%d seconds per test\n",
199  secs_per_test),
200  secs_per_test);
201 #if PG_O_DIRECT != 0
202  printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
203 #else
204  printf(_("Direct I/O is not supported on this platform.\n"));
205 #endif
206 }
207 
208 static void
210 {
211  int ops;
212 
213  /* write random data into buffer */
214  for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
215  full_buf[ops] = random();
216 
217  buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
218 }
219 
220 static void
222 {
223  int tmpfile;
224 
225  /*
226  * test if we can open the target file
227  */
228  if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
229  die("could not open output file");
230  needs_unlink = 1;
231  if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
233  die("write failed");
234 
235  /* fsync now so that dirty buffers don't skew later tests */
236  if (fsync(tmpfile) != 0)
237  die("fsync failed");
238 
239  close(tmpfile);
240 }
241 
242 static void
243 test_sync(int writes_per_op)
244 {
245  int tmpfile,
246  ops,
247  writes;
248  bool fs_warning = false;
249 
250  if (writes_per_op == 1)
251  printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
252  else
253  printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
254  printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
255 
256  /*
257  * Test open_datasync if available
258  */
259  printf(LABEL_FORMAT, "open_datasync");
260  fflush(stdout);
261 
262 #ifdef OPEN_DATASYNC_FLAG
263  if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
264  {
265  printf(NA_FORMAT, _("n/a*"));
266  fs_warning = true;
267  }
268  else
269  {
270  START_TIMER;
271  for (ops = 0; alarm_triggered == false; ops++)
272  {
273  for (writes = 0; writes < writes_per_op; writes++)
274  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
275  die("write failed");
276  if (lseek(tmpfile, 0, SEEK_SET) == -1)
277  die("seek failed");
278  }
279  STOP_TIMER;
280  close(tmpfile);
281  }
282 #else
283  printf(NA_FORMAT, _("n/a"));
284 #endif
285 
286 /*
287  * Test fdatasync if available
288  */
289  printf(LABEL_FORMAT, "fdatasync");
290  fflush(stdout);
291 
292 #ifdef HAVE_FDATASYNC
293  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
294  die("could not open output file");
295  START_TIMER;
296  for (ops = 0; alarm_triggered == false; ops++)
297  {
298  for (writes = 0; writes < writes_per_op; writes++)
299  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
300  die("write failed");
301  fdatasync(tmpfile);
302  if (lseek(tmpfile, 0, SEEK_SET) == -1)
303  die("seek failed");
304  }
305  STOP_TIMER;
306  close(tmpfile);
307 #else
308  printf(NA_FORMAT, _("n/a"));
309 #endif
310 
311 /*
312  * Test fsync
313  */
314  printf(LABEL_FORMAT, "fsync");
315  fflush(stdout);
316 
317  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
318  die("could not open output file");
319  START_TIMER;
320  for (ops = 0; alarm_triggered == false; ops++)
321  {
322  for (writes = 0; writes < writes_per_op; writes++)
323  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
324  die("write failed");
325  if (fsync(tmpfile) != 0)
326  die("fsync failed");
327  if (lseek(tmpfile, 0, SEEK_SET) == -1)
328  die("seek failed");
329  }
330  STOP_TIMER;
331  close(tmpfile);
332 
333 /*
334  * If fsync_writethrough is available, test as well
335  */
336  printf(LABEL_FORMAT, "fsync_writethrough");
337  fflush(stdout);
338 
339 #ifdef HAVE_FSYNC_WRITETHROUGH
340  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
341  die("could not open output file");
342  START_TIMER;
343  for (ops = 0; alarm_triggered == false; ops++)
344  {
345  for (writes = 0; writes < writes_per_op; writes++)
346  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
347  die("write failed");
348  if (pg_fsync_writethrough(tmpfile) != 0)
349  die("fsync failed");
350  if (lseek(tmpfile, 0, SEEK_SET) == -1)
351  die("seek failed");
352  }
353  STOP_TIMER;
354  close(tmpfile);
355 #else
356  printf(NA_FORMAT, _("n/a"));
357 #endif
358 
359 /*
360  * Test open_sync if available
361  */
362  printf(LABEL_FORMAT, "open_sync");
363  fflush(stdout);
364 
365 #ifdef OPEN_SYNC_FLAG
366  if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
367  {
368  printf(NA_FORMAT, _("n/a*"));
369  fs_warning = true;
370  }
371  else
372  {
373  START_TIMER;
374  for (ops = 0; alarm_triggered == false; ops++)
375  {
376  for (writes = 0; writes < writes_per_op; writes++)
377  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
378 
379  /*
380  * This can generate write failures if the filesystem has
381  * a large block size, e.g. 4k, and there is no support
382  * for O_DIRECT writes smaller than the file system block
383  * size, e.g. XFS.
384  */
385  die("write failed");
386  if (lseek(tmpfile, 0, SEEK_SET) == -1)
387  die("seek failed");
388  }
389  STOP_TIMER;
390  close(tmpfile);
391  }
392 #else
393  printf(NA_FORMAT, _("n/a"));
394 #endif
395 
396  if (fs_warning)
397  {
398  printf(_("* This file system and its mount options do not support direct\n"
399  " I/O, e.g. ext4 in journaled mode.\n"));
400  }
401 }
402 
403 static void
405 {
406  printf(_("\nCompare open_sync with different write sizes:\n"));
407  printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
408  "open_sync sizes.)\n"));
409 
410  test_open_sync(_(" 1 * 16kB open_sync write"), 16);
411  test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
412  test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
413  test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
414  test_open_sync(_("16 * 1kB open_sync writes"), 1);
415 }
416 
417 /*
418  * Test open_sync with different size files
419  */
420 static void
421 test_open_sync(const char *msg, int writes_size)
422 {
423 #ifdef OPEN_SYNC_FLAG
424  int tmpfile,
425  ops,
426  writes;
427 #endif
428 
429  printf(LABEL_FORMAT, msg);
430  fflush(stdout);
431 
432 #ifdef OPEN_SYNC_FLAG
433  if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
434  printf(NA_FORMAT, _("n/a*"));
435  else
436  {
437  START_TIMER;
438  for (ops = 0; alarm_triggered == false; ops++)
439  {
440  for (writes = 0; writes < 16 / writes_size; writes++)
441  if (write(tmpfile, buf, writes_size * 1024) !=
442  writes_size * 1024)
443  die("write failed");
444  if (lseek(tmpfile, 0, SEEK_SET) == -1)
445  die("seek failed");
446  }
447  STOP_TIMER;
448  close(tmpfile);
449  }
450 #else
451  printf(NA_FORMAT, _("n/a"));
452 #endif
453 }
454 
455 static void
457 {
458  int tmpfile,
459  ops;
460 
461  /*
462  * Test whether fsync can sync data written on a different descriptor for
463  * the same file. This checks the efficiency of multi-process fsyncs
464  * against the same file. Possibly this should be done with writethrough
465  * on platforms which support it.
466  */
467  printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
468  printf(_("(If the times are similar, fsync() can sync data written on a different\n"
469  "descriptor.)\n"));
470 
471  /*
472  * first write, fsync and close, which is the normal behavior without
473  * multiple descriptors
474  */
475  printf(LABEL_FORMAT, "write, fsync, close");
476  fflush(stdout);
477 
478  START_TIMER;
479  for (ops = 0; alarm_triggered == false; ops++)
480  {
481  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
482  die("could not open output file");
483  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
484  die("write failed");
485  if (fsync(tmpfile) != 0)
486  die("fsync failed");
487  close(tmpfile);
488 
489  /*
490  * open and close the file again to be consistent with the following
491  * test
492  */
493  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
494  die("could not open output file");
495  close(tmpfile);
496  }
497  STOP_TIMER;
498 
499  /*
500  * Now open, write, close, open again and fsync This simulates processes
501  * fsyncing each other's writes.
502  */
503  printf(LABEL_FORMAT, "write, close, fsync");
504  fflush(stdout);
505 
506  START_TIMER;
507  for (ops = 0; alarm_triggered == false; ops++)
508  {
509  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
510  die("could not open output file");
511  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
512  die("write failed");
513  close(tmpfile);
514  /* reopen file */
515  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
516  die("could not open output file");
517  if (fsync(tmpfile) != 0)
518  die("fsync failed");
519  close(tmpfile);
520  }
521  STOP_TIMER;
522 }
523 
524 static void
526 {
527  int tmpfile,
528  ops;
529 
530  /*
531  * Test a simple write without fsync
532  */
533  printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
534  printf(LABEL_FORMAT, "write");
535  fflush(stdout);
536 
537  START_TIMER;
538  for (ops = 0; alarm_triggered == false; ops++)
539  {
540  if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
541  die("could not open output file");
542  if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
543  die("write failed");
544  close(tmpfile);
545  }
546  STOP_TIMER;
547 }
548 
549 static void
550 signal_cleanup(int signum)
551 {
552  /* Delete the file if it exists. Ignore errors */
553  if (needs_unlink)
554  unlink(filename);
555  /* Finish incomplete line on stdout */
556  puts("");
557  exit(signum);
558 }
559 
560 #ifdef HAVE_FSYNC_WRITETHROUGH
561 
562 static int
564 {
565 #ifdef WIN32
566  return _commit(fd);
567 #elif defined(F_FULLFSYNC)
568  return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
569 #else
570  errno = ENOSYS;
571  return -1;
572 #endif
573 }
574 #endif
575 
576 /*
577  * print out the writes per second for tests
578  */
579 static void
580 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
581 {
582  double total_time = (stop_t.tv_sec - start_t.tv_sec) +
583  (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
584  double per_second = ops / total_time;
585  double avg_op_time_us = (total_time / ops) * USECS_SEC;
586 
587  printf(_(OPS_FORMAT), per_second, avg_op_time_us);
588 }
589 
590 #ifndef WIN32
591 static void
593 {
594  alarm_triggered = true;
595 }
596 #else
597 static DWORD WINAPI
598 process_alarm(LPVOID param)
599 {
600  /* WIN32 doesn't support alarm, so we create a thread and sleep here */
601  Sleep(secs_per_test * 1000);
602  alarm_triggered = true;
603  ExitThread(0);
604 }
605 #endif
#define PG_O_DIRECT
Definition: xlogdefs.h:72
static void test_sync(int writes_per_op)
static int secs_per_test
Definition: pg_test_fsync.c:66
static char * filename
Definition: pg_test_fsync.c:70
#define write(a, b, c)
Definition: win32.h:14
const char * get_progname(const char *argv0)
Definition: path.c:453
#define pg_log_error(...)
Definition: logging.h:79
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
long random(void)
Definition: random.c:22
int pg_fsync_writethrough(int fd)
Definition: fd.c:362
void pg_logging_init(const char *argv0)
Definition: logging.c:39
#define USECS_SEC
Definition: pg_test_fsync.c:32
#define DEFAULT_XLOG_SEG_SIZE
#define printf(...)
Definition: port.h:198
#define fprintf
Definition: port.h:196
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static char full_buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:68
static void test_open(void)
static bool alarm_triggered
Definition: pg_test_fsync.c:73
static void prepare_buf(void)
#define required_argument
Definition: getopt_long.h:25
#define fsync(fd)
Definition: win32_port.h:63
int optind
Definition: getopt.c:50
#define S_IWUSR
Definition: win32_port.h:272
static const char * progname
Definition: pg_test_fsync.c:64
static char * buf
Definition: pg_test_fsync.c:68
#define SIGHUP
Definition: win32_port.h:163
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
#define NA_FORMAT
Definition: pg_test_fsync.c:29
#define XLOG_BLCKSZ_K
Definition: pg_test_fsync.c:26
#define STOP_TIMER
Definition: pg_test_fsync.c:57
#define ngettext(s, p, n)
Definition: c.h:1089
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1121
#define O_DSYNC
Definition: win32_port.h:317
#define LABEL_FORMAT
Definition: pg_test_fsync.c:28
static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
static void test_open_syncs(void)
static void test_non_sync(void)
static int sig
Definition: pg_ctl.c:83
static struct timeval start_t stop_t
Definition: pg_test_fsync.c:71
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:678
static void process_alarm(int sig)
pqsigfunc pqsignal(int signum, pqsigfunc handler)
Definition: signal.c:170
static void signal_cleanup(int sig)
int main(int argc, char *argv[])
#define START_TIMER
Definition: pg_test_fsync.c:36
#define OPS_FORMAT
Definition: pg_test_fsync.c:31
#define SIGALRM
Definition: win32_port.h:169
#define S_IRUSR
Definition: win32_port.h:269
static int needs_unlink
Definition: pg_test_fsync.c:67
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:565
#define FSYNC_FILENAME
Definition: pg_test_fsync.c:24
char * optarg
Definition: getopt.c:52
static void test_file_descriptor_sync(void)
#define close(a)
Definition: win32.h:12
#define _(x)
Definition: elog.c:84
#define die(msg)
Definition: pg_test_fsync.c:97
static void test_open_sync(const char *msg, int writes_size)
static void handle_args(int argc, char *argv[])